summaryrefslogtreecommitdiffstats
path: root/filters/kword/pdf/xpdf/xpdf/Catalog.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'filters/kword/pdf/xpdf/xpdf/Catalog.cpp')
-rw-r--r--filters/kword/pdf/xpdf/xpdf/Catalog.cpp371
1 files changed, 371 insertions, 0 deletions
diff --git a/filters/kword/pdf/xpdf/xpdf/Catalog.cpp b/filters/kword/pdf/xpdf/xpdf/Catalog.cpp
new file mode 100644
index 000000000..583666b22
--- /dev/null
+++ b/filters/kword/pdf/xpdf/xpdf/Catalog.cpp
@@ -0,0 +1,371 @@
+//========================================================================
+//
+// Catalog.cpp
+//
+// Copyright 1996-2002 Glyph & Cog, LLC
+//
+//========================================================================
+
+#include <aconf.h>
+
+#ifdef USE_GCC_PRAGMAS
+#pragma implementation
+#endif
+
+#include <limits.h>
+#include <stddef.h>
+#include "gmem.h"
+#include "Object.h"
+#include "XRef.h"
+#include "Array.h"
+#include "Dict.h"
+#include "Page.h"
+#include "Error.h"
+#include "Link.h"
+#include "Catalog.h"
+
+// This define is used to limit the depth of recursive readPageTree calls
+// This is needed because the page tree nodes can reference their parents
+// leaving us in an infinite loop
+// Most sane pdf documents don't have a call depth higher than 10
+#define MAX_CALL_DEPTH 1000
+
+//------------------------------------------------------------------------
+// Catalog
+//------------------------------------------------------------------------
+
+Catalog::Catalog(XRef *xrefA) {
+ Object catDict, pagesDict;
+ Object obj, obj2;
+ int numPages0;
+ int i;
+
+ ok = gTrue;
+ xref = xrefA;
+ pages = NULL;
+ pageRefs = NULL;
+ numPages = pagesSize = 0;
+ baseURI = NULL;
+
+ xref->getCatalog(&catDict);
+ if (!catDict.isDict()) {
+ error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+ goto err1;
+ }
+
+ // read page tree
+ catDict.dictLookup("Pages", &pagesDict);
+ // This should really be isDict("Pages"), but I've seen at least one
+ // PDF file where the /Type entry is missing.
+ if (!pagesDict.isDict()) {
+ error(-1, "Top-level pages object is wrong type (%s)",
+ pagesDict.getTypeName());
+ goto err2;
+ }
+ pagesDict.dictLookup("Count", &obj);
+ if (!obj.isInt()) {
+ error(-1, "Page count in top-level pages object is wrong type (%s)",
+ obj.getTypeName());
+ goto err3;
+ }
+ pagesSize = numPages0 = obj.getInt();
+ obj.free();
+ if ((unsigned) pagesSize >= INT_MAX / sizeof(Page *) ||
+ (unsigned) pagesSize >= INT_MAX / sizeof(Ref)) {
+ error(-1, "Invalid 'pagesSize'");
+ ok = gFalse;
+ return;
+ }
+ pages = (Page **)gmalloc(pagesSize * sizeof(Page *));
+ pageRefs = (Ref *)gmalloc(pagesSize * sizeof(Ref));
+ for (i = 0; i < pagesSize; ++i) {
+ pages[i] = NULL;
+ pageRefs[i].num = -1;
+ pageRefs[i].gen = -1;
+ }
+ numPages = readPageTree(pagesDict.getDict(), NULL, 0, 0);
+ if (numPages != numPages0) {
+ error(-1, "Page count in top-level pages object is incorrect");
+ }
+ pagesDict.free();
+
+ // read named destination dictionary
+ catDict.dictLookup("Dests", &dests);
+
+ // read root of named destination tree
+ if (catDict.dictLookup("Names", &obj)->isDict())
+ obj.dictLookup("Dests", &nameTree);
+ else
+ nameTree.initNull();
+ obj.free();
+
+ // read base URI
+ if (catDict.dictLookup("URI", &obj)->isDict()) {
+ if (obj.dictLookup("Base", &obj2)->isString()) {
+ baseURI = obj2.getString()->copy();
+ }
+ obj2.free();
+ }
+ obj.free();
+
+ // get the metadata stream
+ catDict.dictLookup("Metadata", &metadata);
+
+ // get the structure tree root
+ catDict.dictLookup("StructTreeRoot", &structTreeRoot);
+
+ // get the outline dictionary
+ catDict.dictLookup("Outlines", &outline);
+
+ catDict.free();
+ return;
+
+ err3:
+ obj.free();
+ err2:
+ pagesDict.free();
+ err1:
+ catDict.free();
+ dests.initNull();
+ nameTree.initNull();
+ ok = gFalse;
+}
+
+Catalog::~Catalog() {
+ int i;
+
+ if (pages) {
+ for (i = 0; i < pagesSize; ++i) {
+ if (pages[i]) {
+ delete pages[i];
+ }
+ }
+ gfree(pages);
+ gfree(pageRefs);
+ }
+ dests.free();
+ nameTree.free();
+ if (baseURI) {
+ delete baseURI;
+ }
+ metadata.free();
+ structTreeRoot.free();
+ outline.free();
+}
+
+GString *Catalog::readMetadata() {
+ GString *s;
+ Dict *dict;
+ Object obj;
+ int c;
+
+ if (!metadata.isStream()) {
+ return NULL;
+ }
+ dict = metadata.streamGetDict();
+ if (!dict->lookup("Subtype", &obj)->isName("XML")) {
+ error(-1, "Unknown Metadata type: '%s'",
+ obj.isName() ? obj.getName() : "???");
+ }
+ obj.free();
+ s = new GString();
+ metadata.streamReset();
+ while ((c = metadata.streamGetChar()) != EOF) {
+ s->append(c);
+ }
+ metadata.streamClose();
+ return s;
+}
+
+int Catalog::readPageTree(Dict *pagesDict, PageAttrs *attrs, int start, int callDepth) {
+ Object kids;
+ Object kid;
+ Object kidRef;
+ PageAttrs *attrs1, *attrs2;
+ Page *page;
+ int i, j;
+
+ attrs1 = new PageAttrs(attrs, pagesDict);
+ pagesDict->lookup("Kids", &kids);
+ if (!kids.isArray()) {
+ error(-1, "Kids object (page %d) is wrong type (%s)",
+ start+1, kids.getTypeName());
+ goto err1;
+ }
+ for (i = 0; i < kids.arrayGetLength(); ++i) {
+ kids.arrayGet(i, &kid);
+ if (kid.isDict("Page")) {
+ attrs2 = new PageAttrs(attrs1, kid.getDict());
+ page = new Page(xref, start+1, kid.getDict(), attrs2);
+ if (!page->isOk()) {
+ ++start;
+ goto err3;
+ }
+ if (start >= pagesSize) {
+ pagesSize += 32;
+ if ((unsigned) pagesSize >= INT_MAX / sizeof(Page *) ||
+ (unsigned) pagesSize >= INT_MAX / sizeof(Ref)) {
+ error(-1, "Invalid 'pagesSize' parameter.");
+ goto err3;
+ }
+ pages = (Page **)grealloc(pages, pagesSize * sizeof(Page *));
+ pageRefs = (Ref *)grealloc(pageRefs, pagesSize * sizeof(Ref));
+ for (j = pagesSize - 32; j < pagesSize; ++j) {
+ pages[j] = NULL;
+ pageRefs[j].num = -1;
+ pageRefs[j].gen = -1;
+ }
+ }
+ pages[start] = page;
+ kids.arrayGetNF(i, &kidRef);
+ if (kidRef.isRef()) {
+ pageRefs[start].num = kidRef.getRefNum();
+ pageRefs[start].gen = kidRef.getRefGen();
+ }
+ kidRef.free();
+ ++start;
+ // This should really be isDict("Pages"), but I've seen at least one
+ // PDF file where the /Type entry is missing.
+ } else if (kid.isDict()) {
+ if (callDepth > MAX_CALL_DEPTH) {
+ error(-1, "Limit of %d recursive calls reached while reading the page tree. If your document is correct and not a test to try to force a crash, please report a bug.", MAX_CALL_DEPTH);
+ } else {
+ if ((start = readPageTree(kid.getDict(), attrs1, start, callDepth + 1))
+ < 0)
+ goto err2;
+ }
+ } else {
+ error(-1, "Kid object (page %d) is wrong type (%s)",
+ start+1, kid.getTypeName());
+ goto err2;
+ }
+ kid.free();
+ }
+ delete attrs1;
+ kids.free();
+ return start;
+
+ err3:
+ delete page;
+ err2:
+ kid.free();
+ err1:
+ kids.free();
+ delete attrs1;
+ ok = gFalse;
+ return -1;
+}
+
+int Catalog::findPage(int num, int gen) {
+ int i;
+
+ for (i = 0; i < numPages; ++i) {
+ if (pageRefs[i].num == num && pageRefs[i].gen == gen)
+ return i + 1;
+ }
+ return 0;
+}
+
+LinkDest *Catalog::findDest(GString *name) {
+ LinkDest *dest;
+ Object obj1, obj2;
+ GBool found;
+
+ // try named destination dictionary then name tree
+ found = gFalse;
+ if (dests.isDict()) {
+ if (!dests.dictLookup(name->getCString(), &obj1)->isNull())
+ found = gTrue;
+ else
+ obj1.free();
+ }
+ if (!found && nameTree.isDict()) {
+ if (!findDestInTree(&nameTree, name, &obj1)->isNull())
+ found = gTrue;
+ else
+ obj1.free();
+ }
+ if (!found)
+ return NULL;
+
+ // construct LinkDest
+ dest = NULL;
+ if (obj1.isArray()) {
+ dest = new LinkDest(obj1.getArray());
+ } else if (obj1.isDict()) {
+ if (obj1.dictLookup("D", &obj2)->isArray())
+ dest = new LinkDest(obj2.getArray());
+ else
+ error(-1, "Bad named destination value");
+ obj2.free();
+ } else {
+ error(-1, "Bad named destination value");
+ }
+ obj1.free();
+ if (dest && !dest->isOk()) {
+ delete dest;
+ dest = NULL;
+ }
+
+ return dest;
+}
+
+Object *Catalog::findDestInTree(Object *tree, GString *name, Object *obj) {
+ Object names, name1;
+ Object kids, kid, limits, low, high;
+ GBool done, found;
+ int cmp, i;
+
+ // leaf node
+ if (tree->dictLookup("Names", &names)->isArray()) {
+ done = found = gFalse;
+ for (i = 0; !done && i < names.arrayGetLength(); i += 2) {
+ if (names.arrayGet(i, &name1)->isString()) {
+ cmp = name->cmp(name1.getString());
+ if (cmp == 0) {
+ names.arrayGet(i+1, obj);
+ found = gTrue;
+ done = gTrue;
+ } else if (cmp < 0) {
+ done = gTrue;
+ }
+ name1.free();
+ }
+ }
+ names.free();
+ if (!found)
+ obj->initNull();
+ return obj;
+ }
+ names.free();
+
+ // root or intermediate node
+ done = gFalse;
+ if (tree->dictLookup("Kids", &kids)->isArray()) {
+ for (i = 0; !done && i < kids.arrayGetLength(); ++i) {
+ if (kids.arrayGet(i, &kid)->isDict()) {
+ if (kid.dictLookup("Limits", &limits)->isArray()) {
+ if (limits.arrayGet(0, &low)->isString() &&
+ name->cmp(low.getString()) >= 0) {
+ if (limits.arrayGet(1, &high)->isString() &&
+ name->cmp(high.getString()) <= 0) {
+ findDestInTree(&kid, name, obj);
+ done = gTrue;
+ }
+ high.free();
+ }
+ low.free();
+ }
+ limits.free();
+ }
+ kid.free();
+ }
+ }
+ kids.free();
+
+ // name was outside of ranges of all kids
+ if (!done)
+ obj->initNull();
+
+ return obj;
+}