Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve AcroForm/XFA form type detection #12271

Merged
merged 6 commits into from
Aug 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/core/annotation.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class AnnotationFactory {
* instance.
*/
static create(xref, ref, pdfManager, idFactory) {
return pdfManager.ensureDoc("acroForm").then(acroForm => {
return pdfManager.ensureCatalog("acroForm").then(acroForm => {
return pdfManager.ensure(this, "_create", [
xref,
ref,
Expand Down
124 changes: 78 additions & 46 deletions src/core/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,7 @@ class PDFDocument {
this.stream = stream;
this.xref = new XRef(stream, pdfManager);
this._pagePromises = [];
this._version = null;

const idCounters = {
font: 0,
Expand All @@ -572,42 +573,15 @@ class PDFDocument {
}

parse(recoveryMode) {
this.setup(recoveryMode);

const version = this.catalog.catDict.get("Version");
if (isName(version)) {
this.pdfFormatVersion = version.name;
}

// Check if AcroForms are present in the document.
try {
this.acroForm = this.catalog.catDict.get("AcroForm");
if (this.acroForm) {
this.xfa = this.acroForm.get("XFA");
const fields = this.acroForm.get("Fields");
if ((!Array.isArray(fields) || fields.length === 0) && !this.xfa) {
this.acroForm = null; // No fields and no XFA, so it's not a form.
}
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch AcroForm entry; assuming no AcroForms are present");
this.acroForm = null;
}
this.xref.parse(recoveryMode);
this.catalog = new Catalog(this.pdfManager, this.xref);

// Check if a Collection dictionary is present in the document.
try {
const collection = this.catalog.catDict.get("Collection");
if (isDict(collection) && collection.size > 0) {
this.collection = collection;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch Collection dictionary.");
// The `checkHeader` method is called before this method and parses the
// version from the header. The specification states in section 7.5.2
// that the version from the catalog, if present, should overwrite the
// version from the header.
if (this.catalog.version) {
this._version = this.catalog.version;
}
}

Expand Down Expand Up @@ -693,27 +667,85 @@ class PDFDocument {
}
version += String.fromCharCode(ch);
}
if (!this.pdfFormatVersion) {
if (!this._version) {
// Remove the "%PDF-" prefix.
this.pdfFormatVersion = version.substring(5);
this._version = version.substring(5);
}
}

parseStartXRef() {
this.xref.setStartXRef(this.startXRef);
}

setup(recoveryMode) {
this.xref.parse(recoveryMode);
this.catalog = new Catalog(this.pdfManager, this.xref);
}

get numPages() {
const linearization = this.linearization;
const num = linearization ? linearization.numPages : this.catalog.numPages;
return shadow(this, "numPages", num);
}

/**
* @private
*/
_hasOnlyDocumentSignatures(fields, recursionDepth = 0) {
const RECURSION_LIMIT = 10;
return fields.every(field => {
field = this.xref.fetchIfRef(field);
if (field.has("Kids")) {
if (++recursionDepth > RECURSION_LIMIT) {
warn("_hasOnlyDocumentSignatures: maximum recursion depth reached");
return false;
}
return this._hasOnlyDocumentSignatures(
field.get("Kids"),
recursionDepth
);
}
const isSignature = isName(field.get("FT"), "Sig");
const rectangle = field.get("Rect");
const isInvisible =
Array.isArray(rectangle) && rectangle.every(value => value === 0);
return isSignature && isInvisible;
});
}

get formInfo() {
const formInfo = { hasAcroForm: false, hasXfa: false };
const acroForm = this.catalog.acroForm;
if (!acroForm) {
return shadow(this, "formInfo", formInfo);
}

try {
// The document contains XFA data if the `XFA` entry is a non-empty
// array or stream.
const xfa = acroForm.get("XFA");
const hasXfa =
(Array.isArray(xfa) && xfa.length > 0) ||
(isStream(xfa) && !xfa.isEmpty);
formInfo.hasXfa = hasXfa;

// The document contains AcroForm data if the `Fields` entry is a
// non-empty array and it doesn't consist of only document signatures.
// This second check is required for files that don't actually contain
// AcroForm data (only XFA data), but that use the `Fields` entry to
// store (invisible) document signatures. This can be detected using
// the first bit of the `SigFlags` integer (see Table 219 in the
// specification).
const fields = acroForm.get("Fields");
const hasFields = Array.isArray(fields) && fields.length > 0;
const sigFlags = acroForm.get("SigFlags");
const hasOnlyDocumentSignatures =
!!(sigFlags & 0x1) && this._hasOnlyDocumentSignatures(fields);
formInfo.hasAcroForm = hasFields && !hasOnlyDocumentSignatures;
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch form information.");
}
return shadow(this, "formInfo", formInfo);
}

get documentInfo() {
const DocumentInfoValidators = {
Title: isString,
Expand All @@ -727,7 +759,7 @@ class PDFDocument {
Trapped: isName,
};

let version = this.pdfFormatVersion;
let version = this._version;
if (
typeof version !== "string" ||
!PDF_HEADER_VERSION_REGEXP.test(version)
Expand All @@ -739,9 +771,9 @@ class PDFDocument {
const docInfo = {
PDFFormatVersion: version,
IsLinearized: !!this.linearization,
IsAcroFormPresent: !!this.acroForm,
IsXFAPresent: !!this.xfa,
IsCollectionPresent: !!this.collection,
IsAcroFormPresent: this.formInfo.hasAcroForm,
IsXFAPresent: this.formInfo.hasXfa,
IsCollectionPresent: !!this.catalog.collection,
};

let infoDict;
Expand Down
76 changes: 58 additions & 18 deletions src/core/obj.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ class Catalog {
this.pdfManager = pdfManager;
this.xref = xref;

this.catDict = xref.getCatalogObj();
if (!isDict(this.catDict)) {
this._catDict = xref.getCatalogObj();
if (!isDict(this._catDict)) {
throw new FormatError("Catalog object is not a dictionary.");
}

Expand All @@ -76,8 +76,48 @@ class Catalog {
this.pageKidsCountCache = new RefSetCache();
}

get version() {
const version = this._catDict.get("Version");
if (!isName(version)) {
return shadow(this, "version", null);
}
return shadow(this, "version", version.name);
}

get collection() {
let collection = null;
try {
const obj = this._catDict.get("Collection");
if (isDict(obj) && obj.size > 0) {
collection = obj;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch Collection entry; assuming no collection is present.");
}
return shadow(this, "collection", collection);
}

get acroForm() {
let acroForm = null;
try {
const obj = this._catDict.get("AcroForm");
if (isDict(obj) && obj.size > 0) {
acroForm = obj;
}
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
info("Cannot fetch AcroForm entry; assuming no forms are present.");
}
return shadow(this, "acroForm", acroForm);
}

get metadata() {
const streamRef = this.catDict.getRaw("Metadata");
const streamRef = this._catDict.getRaw("Metadata");
if (!isRef(streamRef)) {
return shadow(this, "metadata", null);
}
Expand Down Expand Up @@ -112,7 +152,7 @@ class Catalog {
}

get toplevelPagesDict() {
const pagesObj = this.catDict.get("Pages");
const pagesObj = this._catDict.get("Pages");
if (!isDict(pagesObj)) {
throw new FormatError("Invalid top-level pages dictionary.");
}
Expand All @@ -136,7 +176,7 @@ class Catalog {
* @private
*/
_readDocumentOutline() {
let obj = this.catDict.get("Outlines");
let obj = this._catDict.get("Outlines");
if (!isDict(obj)) {
return null;
}
Expand Down Expand Up @@ -257,7 +297,7 @@ class Catalog {
get optionalContentConfig() {
let config = null;
try {
const properties = this.catDict.get("OCProperties");
const properties = this._catDict.get("OCProperties");
if (!properties) {
return shadow(this, "optionalContentConfig", null);
}
Expand Down Expand Up @@ -370,12 +410,12 @@ class Catalog {
* @private
*/
_readDests() {
const obj = this.catDict.get("Names");
const obj = this._catDict.get("Names");
if (obj && obj.has("Dests")) {
return new NameTree(obj.getRaw("Dests"), this.xref);
} else if (this.catDict.has("Dests")) {
} else if (this._catDict.has("Dests")) {
// Simple destination dictionary.
return this.catDict.get("Dests");
return this._catDict.get("Dests");
}
return undefined;
}
Expand All @@ -397,7 +437,7 @@ class Catalog {
* @private
*/
_readPageLabels() {
const obj = this.catDict.getRaw("PageLabels");
const obj = this._catDict.getRaw("PageLabels");
if (!obj) {
return null;
}
Expand Down Expand Up @@ -497,7 +537,7 @@ class Catalog {
}

get pageLayout() {
const obj = this.catDict.get("PageLayout");
const obj = this._catDict.get("PageLayout");
// Purposely use a non-standard default value, rather than 'SinglePage', to
// allow differentiating between `undefined` and /SinglePage since that does
// affect the Scroll mode (continuous/non-continuous) used in Adobe Reader.
Expand All @@ -518,7 +558,7 @@ class Catalog {
}

get pageMode() {
const obj = this.catDict.get("PageMode");
const obj = this._catDict.get("PageMode");
let pageMode = "UseNone"; // Default value.

if (isName(obj)) {
Expand Down Expand Up @@ -556,7 +596,7 @@ class Catalog {
NumCopies: Number.isInteger,
};

const obj = this.catDict.get("ViewerPreferences");
const obj = this._catDict.get("ViewerPreferences");
let prefs = null;

if (isDict(obj)) {
Expand Down Expand Up @@ -681,7 +721,7 @@ class Catalog {
* NOTE: "JavaScript" actions are, for now, handled by `get javaScript` below.
*/
get openAction() {
const obj = this.catDict.get("OpenAction");
const obj = this._catDict.get("OpenAction");
let openAction = null;

if (isDict(obj)) {
Expand Down Expand Up @@ -714,7 +754,7 @@ class Catalog {
}

get attachments() {
const obj = this.catDict.get("Names");
const obj = this._catDict.get("Names");
let attachments = null;

if (obj && obj.has("EmbeddedFiles")) {
Expand All @@ -732,7 +772,7 @@ class Catalog {
}

get javaScript() {
const obj = this.catDict.get("Names");
const obj = this._catDict.get("Names");

let javaScript = null;
function appendIfJavaScriptDict(jsDict) {
Expand Down Expand Up @@ -768,7 +808,7 @@ class Catalog {
}

// Append OpenAction "JavaScript" actions to the JavaScript array.
const openAction = this.catDict.get("OpenAction");
const openAction = this._catDict.get("OpenAction");
if (isDict(openAction) && isName(openAction.get("S"), "JavaScript")) {
appendIfJavaScriptDict(openAction);
}
Expand Down Expand Up @@ -813,7 +853,7 @@ class Catalog {

getPageDict(pageIndex) {
const capability = createPromiseCapability();
const nodesToVisit = [this.catDict.getRaw("Pages")];
const nodesToVisit = [this._catDict.getRaw("Pages")];
const visitedNodes = new RefSet();
const xref = this.xref,
pageKidsCountCache = this.pageKidsCountCache;
Expand Down
8 changes: 5 additions & 3 deletions test/unit/annotation_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ describe("annotation", function () {
constructor(params) {
this.docBaseUrl = params.docBaseUrl || null;
this.pdfDocument = {
acroForm: new Dict(),
catalog: {
acroForm: new Dict(),
},
};
}

Expand All @@ -56,8 +58,8 @@ describe("annotation", function () {
});
}

ensureDoc(prop, args) {
return this.ensure(this.pdfDocument, prop, args);
ensureCatalog(prop, args) {
return this.ensure(this.pdfDocument.catalog, prop, args);
}
}

Expand Down
Loading