diff --git a/src/core/annotation.js b/src/core/annotation.js index 5fe3d86547620..8c1b8eaa3641e 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -51,7 +51,7 @@ class AnnotationFactory { * instance. */ static create(xref, ref, pdfManager, idFactory) { - return pdfManager.ensureDoc("acroForm").then(acroForm => { + return pdfManager.ensureCatalog("acroForm").then(acroForm => { return pdfManager.ensure(this, "_create", [ xref, ref, diff --git a/src/core/document.js b/src/core/document.js index 404b046da76f8..cd459f6f7f62f 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -552,6 +552,7 @@ class PDFDocument { this.stream = stream; this.xref = new XRef(stream, pdfManager); this._pagePromises = []; + this._version = null; const idCounters = { font: 0, @@ -572,42 +573,15 @@ class PDFDocument { } parse(recoveryMode) { - this.setup(recoveryMode); - - const version = this.catalog.catDict.get("Version"); - if (isName(version)) { - this.pdfFormatVersion = version.name; - } - - // Check if AcroForms are present in the document. - try { - this.acroForm = this.catalog.catDict.get("AcroForm"); - if (this.acroForm) { - this.xfa = this.acroForm.get("XFA"); - const fields = this.acroForm.get("Fields"); - if ((!Array.isArray(fields) || fields.length === 0) && !this.xfa) { - this.acroForm = null; // No fields and no XFA, so it's not a form. - } - } - } catch (ex) { - if (ex instanceof MissingDataException) { - throw ex; - } - info("Cannot fetch AcroForm entry; assuming no AcroForms are present"); - this.acroForm = null; - } + this.xref.parse(recoveryMode); + this.catalog = new Catalog(this.pdfManager, this.xref); - // Check if a Collection dictionary is present in the document. - try { - const collection = this.catalog.catDict.get("Collection"); - if (isDict(collection) && collection.size > 0) { - this.collection = collection; - } - } catch (ex) { - if (ex instanceof MissingDataException) { - throw ex; - } - info("Cannot fetch Collection dictionary."); + // The `checkHeader` method is called before this method and parses the + // version from the header. The specification states in section 7.5.2 + // that the version from the catalog, if present, should overwrite the + // version from the header. + if (this.catalog.version) { + this._version = this.catalog.version; } } @@ -693,9 +667,9 @@ class PDFDocument { } version += String.fromCharCode(ch); } - if (!this.pdfFormatVersion) { + if (!this._version) { // Remove the "%PDF-" prefix. - this.pdfFormatVersion = version.substring(5); + this._version = version.substring(5); } } @@ -703,17 +677,75 @@ class PDFDocument { this.xref.setStartXRef(this.startXRef); } - setup(recoveryMode) { - this.xref.parse(recoveryMode); - this.catalog = new Catalog(this.pdfManager, this.xref); - } - get numPages() { const linearization = this.linearization; const num = linearization ? linearization.numPages : this.catalog.numPages; return shadow(this, "numPages", num); } + /** + * @private + */ + _hasOnlyDocumentSignatures(fields, recursionDepth = 0) { + const RECURSION_LIMIT = 10; + return fields.every(field => { + field = this.xref.fetchIfRef(field); + if (field.has("Kids")) { + if (++recursionDepth > RECURSION_LIMIT) { + warn("_hasOnlyDocumentSignatures: maximum recursion depth reached"); + return false; + } + return this._hasOnlyDocumentSignatures( + field.get("Kids"), + recursionDepth + ); + } + const isSignature = isName(field.get("FT"), "Sig"); + const rectangle = field.get("Rect"); + const isInvisible = + Array.isArray(rectangle) && rectangle.every(value => value === 0); + return isSignature && isInvisible; + }); + } + + get formInfo() { + const formInfo = { hasAcroForm: false, hasXfa: false }; + const acroForm = this.catalog.acroForm; + if (!acroForm) { + return shadow(this, "formInfo", formInfo); + } + + try { + // The document contains XFA data if the `XFA` entry is a non-empty + // array or stream. + const xfa = acroForm.get("XFA"); + const hasXfa = + (Array.isArray(xfa) && xfa.length > 0) || + (isStream(xfa) && !xfa.isEmpty); + formInfo.hasXfa = hasXfa; + + // The document contains AcroForm data if the `Fields` entry is a + // non-empty array and it doesn't consist of only document signatures. + // This second check is required for files that don't actually contain + // AcroForm data (only XFA data), but that use the `Fields` entry to + // store (invisible) document signatures. This can be detected using + // the first bit of the `SigFlags` integer (see Table 219 in the + // specification). + const fields = acroForm.get("Fields"); + const hasFields = Array.isArray(fields) && fields.length > 0; + const sigFlags = acroForm.get("SigFlags"); + const hasOnlyDocumentSignatures = + !!(sigFlags & 0x1) && this._hasOnlyDocumentSignatures(fields); + formInfo.hasAcroForm = hasFields && !hasOnlyDocumentSignatures; + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + info("Cannot fetch form information."); + } + return shadow(this, "formInfo", formInfo); + } + get documentInfo() { const DocumentInfoValidators = { Title: isString, @@ -727,7 +759,7 @@ class PDFDocument { Trapped: isName, }; - let version = this.pdfFormatVersion; + let version = this._version; if ( typeof version !== "string" || !PDF_HEADER_VERSION_REGEXP.test(version) @@ -739,9 +771,9 @@ class PDFDocument { const docInfo = { PDFFormatVersion: version, IsLinearized: !!this.linearization, - IsAcroFormPresent: !!this.acroForm, - IsXFAPresent: !!this.xfa, - IsCollectionPresent: !!this.collection, + IsAcroFormPresent: this.formInfo.hasAcroForm, + IsXFAPresent: this.formInfo.hasXfa, + IsCollectionPresent: !!this.catalog.collection, }; let infoDict; diff --git a/src/core/obj.js b/src/core/obj.js index 3eb437fbeb188..f4b9d2e85a1bc 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -65,8 +65,8 @@ class Catalog { this.pdfManager = pdfManager; this.xref = xref; - this.catDict = xref.getCatalogObj(); - if (!isDict(this.catDict)) { + this._catDict = xref.getCatalogObj(); + if (!isDict(this._catDict)) { throw new FormatError("Catalog object is not a dictionary."); } @@ -76,8 +76,48 @@ class Catalog { this.pageKidsCountCache = new RefSetCache(); } + get version() { + const version = this._catDict.get("Version"); + if (!isName(version)) { + return shadow(this, "version", null); + } + return shadow(this, "version", version.name); + } + + get collection() { + let collection = null; + try { + const obj = this._catDict.get("Collection"); + if (isDict(obj) && obj.size > 0) { + collection = obj; + } + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + info("Cannot fetch Collection entry; assuming no collection is present."); + } + return shadow(this, "collection", collection); + } + + get acroForm() { + let acroForm = null; + try { + const obj = this._catDict.get("AcroForm"); + if (isDict(obj) && obj.size > 0) { + acroForm = obj; + } + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + info("Cannot fetch AcroForm entry; assuming no forms are present."); + } + return shadow(this, "acroForm", acroForm); + } + get metadata() { - const streamRef = this.catDict.getRaw("Metadata"); + const streamRef = this._catDict.getRaw("Metadata"); if (!isRef(streamRef)) { return shadow(this, "metadata", null); } @@ -112,7 +152,7 @@ class Catalog { } get toplevelPagesDict() { - const pagesObj = this.catDict.get("Pages"); + const pagesObj = this._catDict.get("Pages"); if (!isDict(pagesObj)) { throw new FormatError("Invalid top-level pages dictionary."); } @@ -136,7 +176,7 @@ class Catalog { * @private */ _readDocumentOutline() { - let obj = this.catDict.get("Outlines"); + let obj = this._catDict.get("Outlines"); if (!isDict(obj)) { return null; } @@ -257,7 +297,7 @@ class Catalog { get optionalContentConfig() { let config = null; try { - const properties = this.catDict.get("OCProperties"); + const properties = this._catDict.get("OCProperties"); if (!properties) { return shadow(this, "optionalContentConfig", null); } @@ -370,12 +410,12 @@ class Catalog { * @private */ _readDests() { - const obj = this.catDict.get("Names"); + const obj = this._catDict.get("Names"); if (obj && obj.has("Dests")) { return new NameTree(obj.getRaw("Dests"), this.xref); - } else if (this.catDict.has("Dests")) { + } else if (this._catDict.has("Dests")) { // Simple destination dictionary. - return this.catDict.get("Dests"); + return this._catDict.get("Dests"); } return undefined; } @@ -397,7 +437,7 @@ class Catalog { * @private */ _readPageLabels() { - const obj = this.catDict.getRaw("PageLabels"); + const obj = this._catDict.getRaw("PageLabels"); if (!obj) { return null; } @@ -497,7 +537,7 @@ class Catalog { } get pageLayout() { - const obj = this.catDict.get("PageLayout"); + const obj = this._catDict.get("PageLayout"); // Purposely use a non-standard default value, rather than 'SinglePage', to // allow differentiating between `undefined` and /SinglePage since that does // affect the Scroll mode (continuous/non-continuous) used in Adobe Reader. @@ -518,7 +558,7 @@ class Catalog { } get pageMode() { - const obj = this.catDict.get("PageMode"); + const obj = this._catDict.get("PageMode"); let pageMode = "UseNone"; // Default value. if (isName(obj)) { @@ -556,7 +596,7 @@ class Catalog { NumCopies: Number.isInteger, }; - const obj = this.catDict.get("ViewerPreferences"); + const obj = this._catDict.get("ViewerPreferences"); let prefs = null; if (isDict(obj)) { @@ -681,7 +721,7 @@ class Catalog { * NOTE: "JavaScript" actions are, for now, handled by `get javaScript` below. */ get openAction() { - const obj = this.catDict.get("OpenAction"); + const obj = this._catDict.get("OpenAction"); let openAction = null; if (isDict(obj)) { @@ -714,7 +754,7 @@ class Catalog { } get attachments() { - const obj = this.catDict.get("Names"); + const obj = this._catDict.get("Names"); let attachments = null; if (obj && obj.has("EmbeddedFiles")) { @@ -732,7 +772,7 @@ class Catalog { } get javaScript() { - const obj = this.catDict.get("Names"); + const obj = this._catDict.get("Names"); let javaScript = null; function appendIfJavaScriptDict(jsDict) { @@ -768,7 +808,7 @@ class Catalog { } // Append OpenAction "JavaScript" actions to the JavaScript array. - const openAction = this.catDict.get("OpenAction"); + const openAction = this._catDict.get("OpenAction"); if (isDict(openAction) && isName(openAction.get("S"), "JavaScript")) { appendIfJavaScriptDict(openAction); } @@ -813,7 +853,7 @@ class Catalog { getPageDict(pageIndex) { const capability = createPromiseCapability(); - const nodesToVisit = [this.catDict.getRaw("Pages")]; + const nodesToVisit = [this._catDict.getRaw("Pages")]; const visitedNodes = new RefSet(); const xref = this.xref, pageKidsCountCache = this.pageKidsCountCache; diff --git a/test/unit/annotation_spec.js b/test/unit/annotation_spec.js index f948a926ddcf4..d7fef39bf8c2d 100644 --- a/test/unit/annotation_spec.js +++ b/test/unit/annotation_spec.js @@ -41,7 +41,9 @@ describe("annotation", function () { constructor(params) { this.docBaseUrl = params.docBaseUrl || null; this.pdfDocument = { - acroForm: new Dict(), + catalog: { + acroForm: new Dict(), + }, }; } @@ -56,8 +58,8 @@ describe("annotation", function () { }); } - ensureDoc(prop, args) { - return this.ensure(this.pdfDocument, prop, args); + ensureCatalog(prop, args) { + return this.ensure(this.pdfDocument.catalog, prop, args); } } diff --git a/test/unit/document_spec.js b/test/unit/document_spec.js index 503a3ce95ca04..0586898d72bd8 100644 --- a/test/unit/document_spec.js +++ b/test/unit/document_spec.js @@ -13,7 +13,10 @@ * limitations under the License. */ -import { createIdFactory } from "./test_utils.js"; +import { createIdFactory, XRefMock } from "./test_utils.js"; +import { Dict, Name, Ref } from "../../src/core/primitives.js"; +import { PDFDocument } from "../../src/core/document.js"; +import { StringStream } from "../../src/core/stream.js"; describe("document", function () { describe("Page", function () { @@ -40,4 +43,111 @@ describe("document", function () { expect(idFactory1.getDocId()).toEqual("g_d0"); }); }); + + describe("PDFDocument", function () { + const pdfManager = { + get docId() { + return "d0"; + }, + }; + const stream = new StringStream("Dummy_PDF_data"); + + function getDocument(acroForm) { + const pdfDocument = new PDFDocument(pdfManager, stream); + pdfDocument.catalog = { acroForm }; + return pdfDocument; + } + + it("should get form info when no form data is present", function () { + const pdfDocument = getDocument(null); + expect(pdfDocument.formInfo).toEqual({ + hasAcroForm: false, + hasXfa: false, + }); + }); + + it("should get form info when XFA is present", function () { + const acroForm = new Dict(); + + // The `XFA` entry can only be a non-empty array or stream. + acroForm.set("XFA", []); + let pdfDocument = getDocument(acroForm); + expect(pdfDocument.formInfo).toEqual({ + hasAcroForm: false, + hasXfa: false, + }); + + acroForm.set("XFA", ["foo", "bar"]); + pdfDocument = getDocument(acroForm); + expect(pdfDocument.formInfo).toEqual({ + hasAcroForm: false, + hasXfa: true, + }); + + acroForm.set("XFA", new StringStream("")); + pdfDocument = getDocument(acroForm); + expect(pdfDocument.formInfo).toEqual({ + hasAcroForm: false, + hasXfa: false, + }); + + acroForm.set("XFA", new StringStream("non-empty")); + pdfDocument = getDocument(acroForm); + expect(pdfDocument.formInfo).toEqual({ + hasAcroForm: false, + hasXfa: true, + }); + }); + + it("should get form info when AcroForm is present", function () { + const acroForm = new Dict(); + + // The `Fields` entry can only be a non-empty array. + acroForm.set("Fields", []); + let pdfDocument = getDocument(acroForm); + expect(pdfDocument.formInfo).toEqual({ + hasAcroForm: false, + hasXfa: false, + }); + + acroForm.set("Fields", ["foo", "bar"]); + pdfDocument = getDocument(acroForm); + expect(pdfDocument.formInfo).toEqual({ + hasAcroForm: true, + hasXfa: false, + }); + + // If the first bit of the `SigFlags` entry is set and the `Fields` array + // only contains document signatures, then there is no AcroForm data. + acroForm.set("Fields", ["foo", "bar"]); + acroForm.set("SigFlags", 2); + pdfDocument = getDocument(acroForm); + expect(pdfDocument.formInfo).toEqual({ + hasAcroForm: true, + hasXfa: false, + }); + + const annotationDict = new Dict(); + annotationDict.set("FT", Name.get("Sig")); + annotationDict.set("Rect", [0, 0, 0, 0]); + const annotationRef = Ref.get(11, 0); + + const kidsDict = new Dict(); + kidsDict.set("Kids", [annotationRef]); + const kidsRef = Ref.get(10, 0); + + pdfDocument.xref = new XRefMock([ + { ref: annotationRef, data: annotationDict }, + { ref: kidsRef, data: kidsDict }, + ]); + + acroForm.set("Fields", [kidsRef]); + acroForm.set("SigFlags", 3); + pdfDocument = getDocument(acroForm); + expect(pdfDocument.formInfo).toEqual({ + hasAcroForm: false, + hasXfa: false, + }); + }); + }); }); diff --git a/web/app.js b/web/app.js index 9bbe26deb84b8..1ba89133d7317 100644 --- a/web/app.js +++ b/web/app.js @@ -1426,14 +1426,14 @@ const PDFViewerApplication = { this.setTitle(contentDispositionFilename); } - if (info.IsXFAPresent) { + if (info.IsXFAPresent && !info.IsAcroFormPresent) { console.warn("Warning: XFA is not supported"); this._delayedFallback(UNSUPPORTED_FEATURES.forms); } else if ( - info.IsAcroFormPresent && + (info.IsAcroFormPresent || info.IsXFAPresent) && !this.pdfViewer.renderInteractiveForms ) { - console.warn("Warning: AcroForm support is not enabled"); + console.warn("Warning: Interactive form support is not enabled"); this._delayedFallback(UNSUPPORTED_FEATURES.forms); } @@ -1454,8 +1454,10 @@ const PDFViewerApplication = { }); } let formType = null; - if (info.IsAcroFormPresent) { - formType = info.IsXFAPresent ? "xfa" : "acroform"; + if (info.IsXFAPresent) { + formType = "xfa"; + } else if (info.IsAcroFormPresent) { + formType = "acroform"; } this.externalServices.reportTelemetry({ type: "documentInfo",