summaryrefslogtreecommitdiffstats
path: root/filters/kword/pdf/xpdf/xpdf/pdftotext.cc
diff options
context:
space:
mode:
Diffstat (limited to 'filters/kword/pdf/xpdf/xpdf/pdftotext.cc')
-rw-r--r--filters/kword/pdf/xpdf/xpdf/pdftotext.cc322
1 files changed, 0 insertions, 322 deletions
diff --git a/filters/kword/pdf/xpdf/xpdf/pdftotext.cc b/filters/kword/pdf/xpdf/xpdf/pdftotext.cc
deleted file mode 100644
index 150954fc9..000000000
--- a/filters/kword/pdf/xpdf/xpdf/pdftotext.cc
+++ /dev/null
@@ -1,322 +0,0 @@
-//========================================================================
-//
-// pdftotext.cc
-//
-// Copyright 1997-2002 Glyph & Cog, LLC
-//
-//========================================================================
-
-#include <aconf.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <string.h>
-#include "parseargs.h"
-#include "GString.h"
-#include "gmem.h"
-#include "GlobalParams.h"
-#include "Object.h"
-#include "Stream.h"
-#include "Array.h"
-#include "Dict.h"
-#include "XRef.h"
-#include "Catalog.h"
-#include "Page.h"
-#include "PDFDoc.h"
-#include "TextOutputDev.h"
-#include "CharTypes.h"
-#include "UnicodeMap.h"
-#include "Error.h"
-#include "config.h"
-
-static void printInfoString(FILE *f, Dict *infoDict, char *key,
- char *text1, char *text2, UnicodeMap *uMap);
-static void printInfoDate(FILE *f, Dict *infoDict, char *key, char *fmt);
-
-static int firstPage = 1;
-static int lastPage = 0;
-static GBool rawOrder = gFalse;
-static GBool htmlMeta = gFalse;
-static char textEncName[128] = "";
-static char textEOL[16] = "";
-static char ownerPassword[33] = "";
-static char userPassword[33] = "";
-static GBool quiet = gFalse;
-static char cfgFileName[256] = "";
-static GBool printVersion = gFalse;
-static GBool printHelp = gFalse;
-
-static ArgDesc argDesc[] = {
- {"-f", argInt, &firstPage, 0,
- "first page to convert"},
- {"-l", argInt, &lastPage, 0,
- "last page to convert"},
- {"-raw", argFlag, &rawOrder, 0,
- "keep strings in content stream order"},
- {"-htmlmeta", argFlag, &htmlMeta, 0,
- "generate a simple HTML file, including the meta information"},
- {"-enc", argString, textEncName, sizeof(textEncName),
- "output text encoding name"},
- {"-eol", argString, textEOL, sizeof(textEOL),
- "output end-of-line convention (unix, dos, or mac)"},
- {"-opw", argString, ownerPassword, sizeof(ownerPassword),
- "owner password (for encrypted files)"},
- {"-upw", argString, userPassword, sizeof(userPassword),
- "user password (for encrypted files)"},
- {"-q", argFlag, &quiet, 0,
- "don't print any messages or errors"},
- {"-cfg", argString, cfgFileName, sizeof(cfgFileName),
- "configuration file to use in place of .xpdfrc"},
- {"-v", argFlag, &printVersion, 0,
- "print copyright and version info"},
- {"-h", argFlag, &printHelp, 0,
- "print usage information"},
- {"-help", argFlag, &printHelp, 0,
- "print usage information"},
- {"--help", argFlag, &printHelp, 0,
- "print usage information"},
- {"-?", argFlag, &printHelp, 0,
- "print usage information"},
- {NULL}
-};
-
-int main(int argc, char *argv[]) {
- PDFDoc *doc;
- GString *fileName;
- GString *textFileName;
- GString *ownerPW, *userPW;
- TextOutputDev *textOut;
- FILE *f;
- UnicodeMap *uMap;
- Object info;
- GBool ok;
- char *p;
- int exitCode;
-
- exitCode = 99;
-
- // parse args
- ok = parseArgs(argDesc, &argc, argv);
- if (!ok || argc < 2 || argc > 3 || printVersion || printHelp) {
- fprintf(stderr, "pdftotext version %s\n", xpdfVersion);
- fprintf(stderr, "%s\n", xpdfCopyright);
- if (!printVersion) {
- printUsage("pdftotext", "<PDF-file> [<text-file>]", argDesc);
- }
- goto err0;
- }
- fileName = new GString(argv[1]);
-
- // read config file
- globalParams = new GlobalParams(cfgFileName);
- if (textEncName[0]) {
- globalParams->setTextEncoding(textEncName);
- }
- if (textEOL[0]) {
- if (!globalParams->setTextEOL(textEOL)) {
- fprintf(stderr, "Bad '-eol' value on command line\n");
- }
- }
- if (quiet) {
- globalParams->setErrQuiet(quiet);
- }
-
- // get mapping to output encoding
- if (!(uMap = globalParams->getTextEncoding())) {
- error(-1, "Couldn't get text encoding");
- delete fileName;
- goto err1;
- }
-
- // open PDF file
- if (ownerPassword[0]) {
- ownerPW = new GString(ownerPassword);
- } else {
- ownerPW = NULL;
- }
- if (userPassword[0]) {
- userPW = new GString(userPassword);
- } else {
- userPW = NULL;
- }
- doc = new PDFDoc(fileName, ownerPW, userPW);
- if (userPW) {
- delete userPW;
- }
- if (ownerPW) {
- delete ownerPW;
- }
- if (!doc->isOk()) {
- exitCode = 1;
- goto err2;
- }
-
- // check for copy permission
- if (!doc->okToCopy()) {
- error(-1, "Copying of text from this document is not allowed.");
- exitCode = 3;
- goto err2;
- }
-
- // construct text file name
- if (argc == 3) {
- textFileName = new GString(argv[2]);
- } else {
- p = fileName->getCString() + fileName->getLength() - 4;
- if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) {
- textFileName = new GString(fileName->getCString(),
- fileName->getLength() - 4);
- } else {
- textFileName = fileName->copy();
- }
- textFileName->append(htmlMeta ? ".html" : ".txt");
- }
-
- // get page range
- if (firstPage < 1) {
- firstPage = 1;
- }
- if (lastPage < 1 || lastPage > doc->getNumPages()) {
- lastPage = doc->getNumPages();
- }
-
- // write HTML header
- if (htmlMeta) {
- if (!textFileName->cmp("-")) {
- f = stdout;
- } else {
- if (!(f = fopen(textFileName->getCString(), "wb"))) {
- error(-1, "Couldn't open text file '%s'", textFileName->getCString());
- exitCode = 2;
- goto err3;
- }
- }
- fputs("<html>\n", f);
- fputs("<head>\n", f);
- doc->getDocInfo(&info);
- if (info.isDict()) {
- printInfoString(f, info.getDict(), "Title", "<title>", "</title>\n",
- uMap);
- printInfoString(f, info.getDict(), "Subject",
- "<meta name=\"Subject\" content=\"", "\">\n", uMap);
- printInfoString(f, info.getDict(), "Keywords",
- "<meta name=\"Keywords\" content=\"", "\">\n", uMap);
- printInfoString(f, info.getDict(), "Author",
- "<meta name=\"Author\" content=\"", "\">\n", uMap);
- printInfoString(f, info.getDict(), "Creator",
- "<meta name=\"Creator\" content=\"", "\">\n", uMap);
- printInfoString(f, info.getDict(), "Producer",
- "<meta name=\"Producer\" content=\"", "\">\n", uMap);
- printInfoDate(f, info.getDict(), "CreationDate",
- "<meta name=\"CreationDate\" content=\"\">\n");
- printInfoDate(f, info.getDict(), "LastModifiedDate",
- "<meta name=\"ModDate\" content=\"\">\n");
- }
- info.free();
- fputs("</head>\n", f);
- fputs("<body>\n", f);
- fputs("<pre>\n", f);
- if (f != stdout) {
- fclose(f);
- }
- }
-
- // write text file
- textOut = new TextOutputDev(textFileName->getCString(), rawOrder, htmlMeta);
- if (textOut->isOk()) {
- doc->displayPages(textOut, firstPage, lastPage, 72, 0, gFalse);
- } else {
- delete textOut;
- exitCode = 2;
- goto err3;
- }
- delete textOut;
-
- // write end of HTML file
- if (htmlMeta) {
- if (!textFileName->cmp("-")) {
- f = stdout;
- } else {
- if (!(f = fopen(textFileName->getCString(), "ab"))) {
- error(-1, "Couldn't open text file '%s'", textFileName->getCString());
- exitCode = 2;
- goto err3;
- }
- }
- fputs("</pre>\n", f);
- fputs("</body>\n", f);
- fputs("</html>\n", f);
- if (f != stdout) {
- fclose(f);
- }
- }
-
- exitCode = 0;
-
- // clean up
- err3:
- delete textFileName;
- err2:
- delete doc;
- uMap->decRefCnt();
- err1:
- delete globalParams;
- err0:
-
- // check for memory leaks
- Object::memCheck(stderr);
- gMemReport(stderr);
-
- return exitCode;
-}
-
-static void printInfoString(FILE *f, Dict *infoDict, char *key,
- char *text1, char *text2, UnicodeMap *uMap) {
- Object obj;
- GString *s1;
- GBool isUnicode;
- Unicode u;
- char buf[8];
- int i, n;
-
- if (infoDict->lookup(key, &obj)->isString()) {
- fputs(text1, f);
- s1 = obj.getString();
- if ((s1->getChar(0) & 0xff) == 0xfe &&
- (s1->getChar(1) & 0xff) == 0xff) {
- isUnicode = gTrue;
- i = 2;
- } else {
- isUnicode = gFalse;
- i = 0;
- }
- while (i < obj.getString()->getLength()) {
- if (isUnicode) {
- u = ((s1->getChar(i) & 0xff) << 8) |
- (s1->getChar(i+1) & 0xff);
- i += 2;
- } else {
- u = s1->getChar(i) & 0xff;
- ++i;
- }
- n = uMap->mapUnicode(u, buf, sizeof(buf));
- fwrite(buf, 1, n, f);
- }
- fputs(text2, f);
- }
- obj.free();
-}
-
-static void printInfoDate(FILE *f, Dict *infoDict, char *key, char *fmt) {
- Object obj;
- char *s;
-
- if (infoDict->lookup(key, &obj)->isString()) {
- s = obj.getString()->getCString();
- if (s[0] == 'D' && s[1] == ':') {
- s += 2;
- }
- fprintf(f, fmt, s);
- }
- obj.free();
-}