diff options
Diffstat (limited to 'filters/kword/pdf/xpdf/xpdf/TextOutputDev.cc')
-rw-r--r-- | filters/kword/pdf/xpdf/xpdf/TextOutputDev.cc | 1243 |
1 files changed, 0 insertions, 1243 deletions
diff --git a/filters/kword/pdf/xpdf/xpdf/TextOutputDev.cc b/filters/kword/pdf/xpdf/xpdf/TextOutputDev.cc deleted file mode 100644 index 8eb789032..000000000 --- a/filters/kword/pdf/xpdf/xpdf/TextOutputDev.cc +++ /dev/null @@ -1,1243 +0,0 @@ -//======================================================================== -// -// TextOutputDev.cc -// -// Copyright 1997-2002 Glyph & Cog, LLC -// -//======================================================================== - -#include <aconf.h> - -#ifdef USE_GCC_PRAGMAS -#pragma implementation -#endif - -#include <stdio.h> -#include <stdlib.h> -#include <stddef.h> -#include <math.h> -#include <ctype.h> -#include "GString.h" -#include "gmem.h" -#include "config.h" -#include "Error.h" -#include "GlobalParams.h" -#include "UnicodeMap.h" -#include "GfxState.h" -#include "TextOutputDev.h" - -#ifdef MACOS -// needed for setting type/creator of MacOS files -#include "ICSupport.h" -#endif - -//------------------------------------------------------------------------ - -#define textOutSpace 0.2 -#define textOutColSpace 0.2 - -//------------------------------------------------------------------------ - -struct TextOutColumnEdge { - double x, y0, y1; -}; - -//------------------------------------------------------------------------ -// TextBlock -//------------------------------------------------------------------------ - -TextBlock::TextBlock() { - strings = NULL; - next = NULL; - xyNext = NULL; - text = NULL; - xRight = NULL; - col = NULL; -} - -TextBlock::~TextBlock() { - TextString *p1, *p2; - - for (p1 = strings; p1; p1 = p2) { - p2 = p1->next; - delete p1; - } - gfree(text); - gfree(xRight); - gfree(col); -} - -//------------------------------------------------------------------------ -// TextLine -//------------------------------------------------------------------------ - -TextLine::TextLine() { - blocks = NULL; - next = NULL; -} - -TextLine::~TextLine() { - TextBlock *p1, *p2; - - for (p1 = blocks; p1; p1 = p2) { - p2 = p1->next; - delete p1; - } -} - -//------------------------------------------------------------------------ -// TextString -//------------------------------------------------------------------------ - -TextString::TextString(GfxState *state, double x0, double y0, - double fontSize) { - GfxFont *font; - double x, y; - - state->transform(x0, y0, &x, &y); - if ((font = state->getFont())) { - yMin = y - font->getAscent() * fontSize; - yMax = y - font->getDescent() * fontSize; - } else { - // this means that the PDF file draws text without a current font, - // which should never happen - yMin = y - 0.95 * fontSize; - yMax = y + 0.35 * fontSize; - } - if (yMin == yMax) { - // this is a sanity check for a case that shouldn't happen -- but - // if it does happen, we want to avoid dividing by zero later - yMin = y; - yMax = y + 1; - } - marked = gFalse; - text = NULL; - xRight = NULL; - len = size = 0; - next = NULL; -} - - -TextString::~TextString() { - gfree(text); - gfree(xRight); -} - -void TextString::addChar(GfxState */*state*/, double x, double /*y*/, - double dx, double /*dy*/, Unicode u) { - if (len == size) { - size += 16; - text = (Unicode *)grealloc(text, size * sizeof(Unicode)); - xRight = (double *)grealloc(xRight, size * sizeof(double)); - } - text[len] = u; - if (len == 0) { - xMin = x; - } - xMax = xRight[len] = x + dx; - ++len; -} - -//------------------------------------------------------------------------ -// TextPage -//------------------------------------------------------------------------ - -TextPage::TextPage(GBool rawOrderA) { - rawOrder = rawOrderA; - curStr = NULL; - fontSize = 0; - xyStrings = NULL; - xyCur1 = xyCur2 = NULL; - lines = NULL; - nest = 0; - nTinyChars = 0; -} - -TextPage::~TextPage() { - clear(); -} - -void TextPage::updateFont(GfxState *state) { - GfxFont *font; - double *fm; - char *name; - int code, mCode, letterCode, anyCode; - double w; - - // adjust the font size - fontSize = state->getTransformedFontSize(); - if ((font = state->getFont()) && font->getType() == fontType3) { - // This is a hack which makes it possible to deal with some Type 3 - // fonts. The problem is that it's impossible to know what the - // base coordinate system used in the font is without actually - // rendering the font. This code tries to guess by looking at the - // width of the character 'm' (which breaks if the font is a - // subset that doesn't contain 'm'). - mCode = letterCode = anyCode = -1; - for (code = 0; code < 256; ++code) { - name = ((Gfx8BitFont *)font)->getCharName(code); - if (name && name[0] == 'm' && name[1] == '\0') { - mCode = code; - } - if (letterCode < 0 && name && name[1] == '\0' && - ((name[0] >= 'A' && name[0] <= 'Z') || - (name[0] >= 'a' && name[0] <= 'z'))) { - letterCode = code; - } - if (anyCode < 0 && name && ((Gfx8BitFont *)font)->getWidth(code) > 0) { - anyCode = code; - } - } - if (mCode >= 0 && - (w = ((Gfx8BitFont *)font)->getWidth(mCode)) > 0) { - // 0.6 is a generic average 'm' width -- yes, this is a hack - fontSize *= w / 0.6; - } else if (letterCode >= 0 && - (w = ((Gfx8BitFont *)font)->getWidth(letterCode)) > 0) { - // even more of a hack: 0.5 is a generic letter width - fontSize *= w / 0.5; - } else if (anyCode >= 0 && - (w = ((Gfx8BitFont *)font)->getWidth(anyCode)) > 0) { - // better than nothing: 0.5 is a generic character width - fontSize *= w / 0.5; - } - fm = font->getFontMatrix(); - if (fm[0] != 0) { - fontSize *= fabs(fm[3] / fm[0]); - } - } -} - -void TextPage::beginString(GfxState *state, double x0, double y0) { - // This check is needed because Type 3 characters can contain - // text-drawing operations. - if (curStr) { - ++nest; - return; - } - - curStr = new TextString(state, x0, y0, fontSize); -} - -void TextPage::addChar(GfxState *state, double x, double y, - double dx, double dy, Unicode *u, int uLen) { - double x1, y1, w1, h1, dx2, dy2; - int n, i; - - state->transform(x, y, &x1, &y1); - if (x1 < 0 || x1 > state->getPageWidth() || - y1 < 0 || y1 > state->getPageHeight()) { - return; - } - state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(), - 0, &dx2, &dy2); - dx -= dx2; - dy -= dy2; - state->transformDelta(dx, dy, &w1, &h1); - if (!globalParams->getTextKeepTinyChars() && - fabs(w1) < 3 && fabs(h1) < 3) { - if (++nTinyChars > 20000) { - return; - } - } - n = curStr->len; - if (n > 0 && x1 - curStr->xRight[n-1] > - 0.1 * (curStr->yMax - curStr->yMin)) { - // large char spacing is sometimes used to move text around - endString(); - beginString(state, x, y); - } - if (uLen == 1 && u[0] == (Unicode)0x20 && - w1 > 0.5 * (curStr->yMax - curStr->yMin)) { - // large word spacing is sometimes used to move text around - return; - } - if (uLen != 0) { - w1 /= uLen; - h1 /= uLen; - } - for (i = 0; i < uLen; ++i) { - curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]); - } -} - -void TextPage::endString() { - // This check is needed because Type 3 characters can contain - // text-drawing operations. - if (nest > 0) { - --nest; - return; - } - - addString(curStr); - curStr = NULL; -} - -void TextPage::addString(TextString *str) { - TextString *p1, *p2; - - // throw away zero-length strings -- they don't have valid xMin/xMax - // values, and they're useless anyway - if (str->len == 0) { - delete str; - return; - } - - // insert string in xy list - if (rawOrder) { - p1 = xyCur1; - p2 = NULL; - } else if ((!xyCur1 || xyBefore(xyCur1, str)) && - (!xyCur2 || xyBefore(str, xyCur2))) { - p1 = xyCur1; - p2 = xyCur2; - } else if (xyCur1 && xyBefore(xyCur1, str)) { - for (p1 = xyCur1, p2 = xyCur2; p2; p1 = p2, p2 = p2->next) { - if (xyBefore(str, p2)) { - break; - } - } - xyCur2 = p2; - } else { - for (p1 = NULL, p2 = xyStrings; p2; p1 = p2, p2 = p2->next) { - if (xyBefore(str, p2)) { - break; - } - } - xyCur2 = p2; - } - xyCur1 = str; - if (p1) { - p1->next = str; - } else { - xyStrings = str; - } - str->next = p2; -} - -void TextPage::coalesce() { - TextLine *line, *line0; - TextBlock *yxBlocks, *xyBlocks, *blk, *blk0, *blk1, *blk2; - TextString *str0, *str1, *str2, *str3, *str4; - TextString *str1prev, *str2prev, *str3prev; - TextOutColumnEdge *edges; - UnicodeMap *uMap; - GBool isUnicode; - char buf[8]; - int edgesLength, edgesSize; - double x, yMin, yMax; - double space, fit1, fit2, h; - int col1, col2, d; - int i, j; - -#if 0 //~ for debugging - for (str1 = xyStrings; str1; str1 = str1->next) { - printf("x=%.2f..%.2f y=%.2f..%.2f size=%.2f '", - str1->xMin, str1->xMax, str1->yMin, str1->yMax, - (str1->yMax - str1->yMin)); - for (i = 0; i < str1->len; ++i) { - fputc(str1->text[i] & 0xff, stdout); - } - printf("'\n"); - } - printf("\n------------------------------------------------------------\n\n"); -#endif - - // build the list of column edges - edges = NULL; - edgesLength = edgesSize = 0; - if (!rawOrder) { - for (str1prev = NULL, str1 = xyStrings; - str1; - str1prev = str1, str1 = str1->next) { - if (str1->marked) { - continue; - } - h = str1->yMax - str1->yMin; - if (str1prev && (str1->xMin - str1prev->xMax) / h < textOutColSpace) { - continue; - } - x = str1->xMin; - yMin = str1->yMin; - yMax = str1->yMax; - for (str2prev = str1, str2 = str1->next; - str2; - str2prev = str2, str2 = str2->next) { - h = str2->yMax - str2->yMin; - if (!str2->marked && - (str2->xMin - str2prev->xMax) / h > textOutColSpace && - fabs(str2->xMin - x) < 0.5 && - str2->yMin - yMax < 0.3 * h && - yMin - str2->yMax < 0.3 * h) { - break; - } - } - if (str2) { - if (str2->yMin < yMin) { - yMin = str2->yMin; - } - if (str2->yMax > yMax) { - yMax = str2->yMax; - } - str2->marked = gTrue; - for (str3prev = str1, str3 = str1->next; - str3; - str3prev = str3, str3 = str3->next) { - h = str3->yMax - str3->yMin; - if (!str3->marked && - (str3->xMin - str3prev->xMax) / h > textOutColSpace && - fabs(str3->xMin - x) < 0.5 && - str3->yMin - yMax < 0.3 * h && - yMin - str3->yMax < 0.3 * h) { - break; - } - } - if (str3) { - if (str3->yMin < yMin) { - yMin = str3->yMin; - } - if (str3->yMax > yMax) { - yMax = str3->yMax; - } - str3->marked = gTrue; - do { - for (str2prev = str1, str2 = str1->next; - str2; - str2prev = str2, str2 = str2->next) { - h = str2->yMax - str2->yMin; - if (!str2->marked && - (str2->xMin - str2prev->xMax) / h > textOutColSpace && - fabs(str2->xMin - x) < 0.5 && - str2->yMin - yMax < 0.3 * h && - yMin - str2->yMax < 0.3 * h) { - if (str2->yMin < yMin) { - yMin = str2->yMin; - } - if (str2->yMax > yMax) { - yMax = str2->yMax; - } - str2->marked = gTrue; - break; - } - } - } while (str2); - if (edgesLength == edgesSize) { - edgesSize = edgesSize ? 2 * edgesSize : 16; - edges = (TextOutColumnEdge *) - grealloc(edges, edgesSize * sizeof(TextOutColumnEdge)); - } - edges[edgesLength].x = x; - edges[edgesLength].y0 = yMin; - edges[edgesLength].y1 = yMax; - ++edgesLength; - } else { - str2->marked = gFalse; - } - } - str1->marked = gTrue; - } - } - -#if 0 //~ for debugging - printf("column edges:\n"); - for (i = 0; i < edgesLength; ++i) { - printf("%d: x=%.2f y0=%.2f y1=%.2f\n", - i, edges[i].x, edges[i].y0, edges[i].y1); - } - printf("\n------------------------------------------------------------\n\n"); -#endif - - // build the blocks - yxBlocks = NULL; - blk1 = blk2 = NULL; - while (xyStrings) { - - // build the block - str0 = xyStrings; - xyStrings = xyStrings->next; - str0->next = NULL; - blk = new TextBlock(); - blk->strings = str0; - blk->xMin = str0->xMin; - blk->xMax = str0->xMax; - blk->yMin = str0->yMin; - blk->yMax = str0->yMax; - while (xyStrings) { - str1 = NULL; - str2 = xyStrings; - fit1 = coalesceFit(str0, str2); - if (!rawOrder) { - // look for best-fitting string - space = str0->yMax - str0->yMin; - for (str3 = xyStrings, str4 = xyStrings->next; - str4 && str4->xMin - str0->xMax <= space; - str3 = str4, str4 = str4->next) { - fit2 = coalesceFit(str0, str4); - if (fit2 < fit1) { - str1 = str3; - str2 = str4; - fit1 = fit2; - } - } - } - if (fit1 > 1) { - // no fit - we're done with this block - break; - } - - // if we've hit a column edge we're done with this block - if (fit1 > 0.2) { - for (i = 0; i < edgesLength; ++i) { - if (str0->xMax < edges[i].x + 0.5 && edges[i].x - 0.5 < str2->xMin && - str0->yMin < edges[i].y1 && str0->yMax > edges[i].y0 && - str2->yMin < edges[i].y1 && str2->yMax > edges[i].y0) { - break; - } - } - if (i < edgesLength) { - break; - } - } - - if (str1) { - str1->next = str2->next; - } else { - xyStrings = str2->next; - } - str0->next = str2; - str2->next = NULL; - if (str2->xMax > blk->xMax) { - blk->xMax = str2->xMax; - } - if (str2->yMin < blk->yMin) { - blk->yMin = str2->yMin; - } - if (str2->yMax > blk->yMax) { - blk->yMax = str2->yMax; - } - str0 = str2; - } - - // insert block on list - if (!rawOrder) { - // insert block on list in yx order - for (blk1 = NULL, blk2 = yxBlocks; - blk2 && !yxBefore(blk, blk2); - blk1 = blk2, blk2 = blk2->next) ; - } - blk->next = blk2; - if (blk1) { - blk1->next = blk; - } else { - yxBlocks = blk; - } - blk1 = blk; - } - - gfree(edges); - - // the strings are now owned by the lines/blocks tree - xyStrings = NULL; - - // build the block text - uMap = globalParams->getTextEncoding(); - isUnicode = uMap ? uMap->isUnicode() : gFalse; - for (blk = yxBlocks; blk; blk = blk->next) { - blk->len = 0; - for (str1 = blk->strings; str1; str1 = str1->next) { - blk->len += str1->len; - if (str1->next && str1->next->xMin - str1->xMax > - textOutSpace * (str1->yMax - str1->yMin)) { - str1->spaceAfter = gTrue; - ++blk->len; - } else { - str1->spaceAfter = gFalse; - } - } - blk->text = (Unicode *)gmalloc(blk->len * sizeof(Unicode)); - blk->xRight = (double *)gmalloc(blk->len * sizeof(double)); - blk->col = (int *)gmalloc(blk->len * sizeof(int)); - i = 0; - for (str1 = blk->strings; str1; str1 = str1->next) { - for (j = 0; j < str1->len; ++j) { - blk->text[i] = str1->text[j]; - blk->xRight[i] = str1->xRight[j]; - ++i; - } - if (str1->spaceAfter) { - blk->text[i] = (Unicode)0x0020; - blk->xRight[i] = str1->next->xMin; - ++i; - } - } - blk->convertedLen = 0; - for (j = 0; j < blk->len; ++j) { - blk->col[j] = blk->convertedLen; - if (isUnicode) { - ++blk->convertedLen; - } else if (uMap) { - blk->convertedLen += uMap->mapUnicode(blk->text[j], buf, sizeof(buf)); - } - } - } - if (uMap) { - uMap->decRefCnt(); - } - -#if 0 //~ for debugging - for (blk = yxBlocks; blk; blk = blk->next) { - printf("[block: x=%.2f..%.2f y=%.2f..%.2f len=%d]\n", - blk->xMin, blk->xMax, blk->yMin, blk->yMax, blk->len); - TextString *str; - for (str = blk->strings; str; str = str->next) { - printf(" x=%.2f..%.2f y=%.2f..%.2f size=%.2f'", - str->xMin, str->xMax, str->yMin, str->yMax, - (str->yMax - str->yMin)); - for (i = 0; i < str->len; ++i) { - fputc(str->text[i] & 0xff, stdout); - } - if (str->spaceAfter) { - fputc(' ', stdout); - } - printf("'\n"); - } - } - printf("\n------------------------------------------------------------\n\n"); -#endif - - // build the lines - lines = NULL; - line0 = NULL; - while (yxBlocks) { - blk0 = yxBlocks; - yxBlocks = yxBlocks->next; - blk0->next = NULL; - line = new TextLine(); - line->blocks = blk0; - line->yMin = blk0->yMin; - line->yMax = blk0->yMax; - while (yxBlocks) { - - // remove duplicated text (fake boldface, shadowed text) - h = blk0->yMax - blk0->yMin; - if (yxBlocks->len == blk0->len && - !memcmp(yxBlocks->text, blk0->text, - yxBlocks->len * sizeof(Unicode)) && - fabs(yxBlocks->yMin - blk0->yMin) / h < 0.2 && - fabs(yxBlocks->yMax - blk0->yMax) / h < 0.2 && - fabs(yxBlocks->xMin - blk0->xMin) / h < 0.2 && - fabs(yxBlocks->xMax - blk0->xMax) / h < 0.2) { - blk1 = yxBlocks; - yxBlocks = yxBlocks->next; - delete blk1; - continue; - } - - if (rawOrder && yxBlocks->yMax < blk0->yMin) { - break; - } - if (yxBlocks->yMin > 0.2*blk0->yMin + 0.8*blk0->yMax || - yxBlocks->xMin < blk0->xMax) { - break; - } - blk1 = yxBlocks; - yxBlocks = yxBlocks->next; - blk0->next = blk1; - blk1->next = NULL; - if (blk1->yMin < line->yMin) { - line->yMin = blk1->yMin; - } - if (blk1->yMax > line->yMax) { - line->yMax = blk1->yMax; - } - blk0 = blk1; - } - if (line0) { - line0->next = line; - } else { - lines = line; - } - line->next = NULL; - line0 = line; - } - - - // sort the blocks into xy order - xyBlocks = NULL; - for (line = lines; line; line = line->next) { - for (blk = line->blocks; blk; blk = blk->next) { - for (blk1 = NULL, blk2 = xyBlocks; - blk2 && !xyBefore(blk, blk2); - blk1 = blk2, blk2 = blk2->xyNext) ; - blk->xyNext = blk2; - if (blk1) { - blk1->xyNext = blk; - } else { - xyBlocks = blk; - } - } - } - -#if 0 //~ for debugging - for (blk = xyBlocks; blk; blk = blk->xyNext) { - printf("[block: x=%.2f..%.2f y=%.2f..%.2f len=%d]\n", - blk->xMin, blk->xMax, blk->yMin, blk->yMax, blk->len); - TextString *str; - for (str = blk->strings; str; str = str->next) { - printf(" x=%.2f..%.2f y=%.2f..%.2f size=%.2f '", - str->xMin, str->xMax, str->yMin, str->yMax, - (str->yMax - str->yMin)); - for (i = 0; i < str->len; ++i) { - fputc(str->text[i] & 0xff, stdout); - } - printf("'\n"); - } - } - printf("\n------------------------------------------------------------\n\n"); -#endif - - // do column assignment - for (blk1 = xyBlocks; blk1; blk1 = blk1->xyNext) { - col1 = 0; - for (blk2 = xyBlocks; blk2 != blk1; blk2 = blk2->xyNext) { - if (blk1->xMin >= blk2->xMax) { - d = (int)((blk1->xMin - blk2->xMax) / - (0.4 * (blk1->yMax - blk1->yMin))); - if (d > 4) { - d = 4; - } - col2 = blk2->col[0] + blk2->convertedLen + d; - if (col2 > col1) { - col1 = col2; - } - } else if (blk1->xMin > blk2->xMin) { - for (i = 0; i < blk2->len && blk1->xMin >= blk2->xRight[i]; ++i) ; - col2 = blk2->col[i]; - if (col2 > col1) { - col1 = col2; - } - } - } - for (j = 0; j < blk1->len; ++j) { - blk1->col[j] += col1; - } - } - -#if 0 //~ for debugging - for (line = lines; line; line = line->next) { - printf("[line]\n"); - for (blk = line->blocks; blk; blk = blk->next) { - printf("[block: col=%d, len=%d]\n", blk->col[0], blk->len); - TextString *str; - for (str = blk->strings; str; str = str->next) { - printf(" x=%.2f..%.2f y=%.2f..%.2f size=%.2f '", - str->xMin, str->xMax, str->yMin, str->yMax, - (str->yMax - str->yMin)); - for (i = 0; i < str->len; ++i) { - fputc(str->text[i] & 0xff, stdout); - } - if (str->spaceAfter) { - printf(" [space]\n"); - } - printf("'\n"); - } - } - } - printf("\n------------------------------------------------------------\n\n"); -#endif -} - - -GBool TextPage::findText(Unicode *s, int len, - GBool top, GBool bottom, - double *xMin, double *yMin, - double *xMax, double *yMax) { - TextLine *line; - TextBlock *blk; - Unicode *p; - Unicode u1, u2; - int m, i, j; - double x0, x1, x; - - // scan all blocks on page - for (line = lines; line; line = line->next) { - for (blk = line->blocks; blk; blk = blk->next) { - - // check: above top limit? - if (!top && (blk->yMax < *yMin || - (blk->yMin < *yMin && blk->xMax <= *xMin))) { - continue; - } - - // check: below bottom limit? - if (!bottom && (blk->yMin > *yMax || - (blk->yMax > *yMax && blk->xMin >= *xMax))) { - return gFalse; - } - - // search each position in this block - m = blk->len; - for (i = 0, p = blk->text; i <= m - len; ++i, ++p) { - - x0 = (i == 0) ? blk->xMin : blk->xRight[i-1]; - x1 = blk->xRight[i]; - x = 0.5 * (x0 + x1); - - // check: above top limit? - if (!top && blk->yMin < *yMin) { - if (x < *xMin) { - continue; - } - } - - // check: below bottom limit? - if (!bottom && blk->yMax > *yMax) { - if (x > *xMax) { - return gFalse; - } - } - - // compare the strings - for (j = 0; j < len; ++j) { -#if 1 //~ this lowercases Latin A-Z only -- this will eventually be - //~ extended to handle other character sets - if (p[j] >= 0x41 && p[j] <= 0x5a) { - u1 = p[j] + 0x20; - } else { - u1 = p[j]; - } - if (s[j] >= 0x41 && s[j] <= 0x5a) { - u2 = s[j] + 0x20; - } else { - u2 = s[j]; - } -#endif - if (u1 != u2) { - break; - } - } - - // found it - if (j == len) { - *xMin = x0; - *xMax = blk->xRight[i + len - 1]; - *yMin = blk->yMin; - *yMax = blk->yMax; - return gTrue; - } - } - } - } - - return gFalse; -} - -GString *TextPage::getText(double xMin, double yMin, - double xMax, double yMax) { - GString *s; - UnicodeMap *uMap; - GBool isUnicode; - char space[8], eol[16], buf[8]; - int spaceLen, eolLen, len; - TextLine *line; - TextBlock *blk; - double x0, x1, y; - int firstCol, col, i; - GBool multiLine; - - s = new GString(); - - // get the output encoding - if (!(uMap = globalParams->getTextEncoding())) { - return s; - } - isUnicode = uMap->isUnicode(); - spaceLen = uMap->mapUnicode(0x20, space, sizeof(space)); - eolLen = 0; // make gcc happy - switch (globalParams->getTextEOL()) { - case eolUnix: - eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol)); - break; - case eolDOS: - eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); - eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen); - break; - case eolMac: - eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); - break; - } - - // find the leftmost column - multiLine = gFalse; - firstCol = -1; - for (line = lines; line; line = line->next) { - if (line->yMin > yMax) { - break; - } - if (line->yMax < yMin) { - continue; - } - - for (blk = line->blocks; blk && blk->xMax < xMin; blk = blk->next) ; - if (!blk || blk->xMin > xMax) { - continue; - } - - y = 0.5 * (blk->yMin + blk->yMax); - if (y < yMin || y > yMax) { - continue; - } - - if (firstCol >= 0) { - multiLine = gTrue; - } - - i = 0; - while (1) { - x0 = (i==0) ? blk->xMin : blk->xRight[i-1]; - x1 = blk->xRight[i]; - if (0.5 * (x0 + x1) > xMin) { - break; - } - ++i; - } - col = blk->col[i]; - - if (firstCol < 0 || col < firstCol) { - firstCol = col; - } - } - - // extract the text - for (line = lines; line; line = line->next) { - if (line->yMin > yMax) { - break; - } - if (line->yMax < yMin) { - continue; - } - - for (blk = line->blocks; blk && blk->xMax < xMin; blk = blk->next) ; - if (!blk || blk->xMin > xMax) { - continue; - } - - y = 0.5 * (blk->yMin + blk->yMax); - if (y < yMin || y > yMax) { - continue; - } - - i = 0; - while (1) { - x0 = (i==0) ? blk->xMin : blk->xRight[i-1]; - x1 = blk->xRight[i]; - if (0.5 * (x0 + x1) > xMin) { - break; - } - ++i; - } - - col = firstCol; - - do { - - // line this block up with the correct column - for (; col < blk->col[i]; ++col) { - s->append(space, spaceLen); - } - - // print the block - for (; i < blk->len; ++i) { - - x0 = (i==0) ? blk->xMin : blk->xRight[i-1]; - x1 = blk->xRight[i]; - if (0.5 * (x0 + x1) > xMax) { - break; - } - - len = uMap->mapUnicode(blk->text[i], buf, sizeof(buf)); - s->append(buf, len); - col += isUnicode ? 1 : len; - } - if (i < blk->len) { - break; - } - - // next block - blk = blk->next; - i = 0; - - } while (blk && blk->xMin < xMax); - - if (multiLine) { - s->append(eol, eolLen); - } - } - - uMap->decRefCnt(); - - return s; -} - -void TextPage::dump(void *outputStream, TextOutputFunc outputFunc) { - UnicodeMap *uMap; - char space[8], eol[16], eop[8], buf[8]; - int spaceLen, eolLen, eopLen, len; - TextLine *line; - TextBlock *blk; - int col, d, i; - - // get the output encoding - if (!(uMap = globalParams->getTextEncoding())) { - return; - } - spaceLen = uMap->mapUnicode(0x20, space, sizeof(space)); - eolLen = 0; // make gcc happy - switch (globalParams->getTextEOL()) { - case eolUnix: - eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol)); - break; - case eolDOS: - eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); - eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen); - break; - case eolMac: - eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol)); - break; - } - eopLen = uMap->mapUnicode(0x0c, eop, sizeof(eop)); - - // output - for (line = lines; line; line = line->next) { - col = 0; - for (blk = line->blocks; blk; blk = blk->next) { - - // line this block up with the correct column - if (rawOrder && col == 0) { - col = blk->col[0]; - } else { - for (; col < blk->col[0]; ++col) { - (*outputFunc)(outputStream, space, spaceLen); - } - } - - // print the block - for (i = 0; i < blk->len; ++i) { - len = uMap->mapUnicode(blk->text[i], buf, sizeof(buf)); - (*outputFunc)(outputStream, buf, len); - } - col += blk->convertedLen; - } - - // print a return - (*outputFunc)(outputStream, eol, eolLen); - - // print extra vertical space if necessary - if (line->next) { - d = (int)((line->next->yMin - line->yMax) / - (line->blocks->strings->yMax - lines->blocks->strings->yMin) - + 0.5); - // various things (weird font matrices) can result in bogus - // values here, so do a sanity check - if (rawOrder && d > 2) { - d = 2; - } else if (!rawOrder && d > 5) { - d = 5; - } - for (; d > 0; --d) { - (*outputFunc)(outputStream, eol, eolLen); - } - } - } - - // end of page - (*outputFunc)(outputStream, eol, eolLen); - (*outputFunc)(outputStream, eop, eopLen); - (*outputFunc)(outputStream, eol, eolLen); - - uMap->decRefCnt(); -} - -// Returns true if <str1> should be inserted before <str2> in xy -// order. -GBool TextPage::xyBefore(TextString *str1, TextString *str2) { - return str1->xMin < str2->xMin || - (str1->xMin == str2->xMin && str1->yMin < str2->yMin); -} - -// Returns true if <blk1> should be inserted before <blk2> in xy -// order. -GBool TextPage::xyBefore(TextBlock *blk1, TextBlock *blk2) { - return blk1->xMin < blk2->xMin || - (blk1->xMin == blk2->xMin && blk1->yMin < blk2->yMin); -} - -// Returns true if <blk1> should be inserted before <blk2> in yx -// order, allowing a little slack for vertically overlapping text. -GBool TextPage::yxBefore(TextBlock *blk1, TextBlock *blk2) { - double h1, h2, overlap; - - h1 = blk1->yMax - blk1->yMin; - h2 = blk2->yMax - blk2->yMin; - overlap = ((blk1->yMax < blk2->yMax ? blk1->yMax : blk2->yMax) - - (blk1->yMin > blk2->yMin ? blk1->yMin : blk2->yMin)) / - (h1 < h2 ? h1 : h2); - if (overlap > 0.6) { - return blk1->xMin < blk2->xMin; - } - return blk1->yMin < blk2->yMin; -} - -double TextPage::coalesceFit(TextString *str1, TextString *str2) { - double h1, h2, w1, w2, r, overlap, spacing; - - h1 = str1->yMax - str1->yMin; - h2 = str2->yMax - str2->yMin; - w1 = str1->xMax - str1->xMin; - w2 = str2->xMax - str2->xMin; - r = h1 / h2; - if (r < (1.0 / 3.0) || r > 3) { - return 10; - } - overlap = ((str1->yMax < str2->yMax ? str1->yMax : str2->yMax) - - (str1->yMin > str2->yMin ? str1->yMin : str2->yMin)) / - (h1 < h2 ? h1 : h2); - if (overlap < 0.5) { - return 10; - } - spacing = (str2->xMin - str1->xMax) / (h1 > h2 ? h1 : h2); - if (spacing < -0.5) { - return 10; - } - // separate text that overlaps - duplicated text (so that fake - // boldface and shadowed text can be cleanly removed) - if ((str2->xMin - str1->xMax) / (w1 < w2 ? w1 : w2) < -0.7) { - return 10; - } - return spacing; -} - -void TextPage::clear() { - TextLine *p1, *p2; - TextString *s1, *s2; - - if (curStr) { - delete curStr; - curStr = NULL; - } - if (lines) { - for (p1 = lines; p1; p1 = p2) { - p2 = p1->next; - delete p1; - } - } else if (xyStrings) { - for (s1 = xyStrings; s1; s1 = s2) { - s2 = s1->next; - delete s1; - } - } - xyStrings = NULL; - xyCur1 = xyCur2 = NULL; - lines = NULL; - nest = 0; - nTinyChars = 0; -} - -//------------------------------------------------------------------------ -// TextOutputDev -//------------------------------------------------------------------------ - -static void outputToFile(void *stream, char *text, int len) { - fwrite(text, 1, len, (FILE *)stream); -} - -TextOutputDev::TextOutputDev(char *fileName, GBool rawOrderA, GBool append) { - text = NULL; - rawOrder = rawOrderA; - ok = gTrue; - - // open file - needClose = gFalse; - if (fileName) { - if (!strcmp(fileName, "-")) { - outputStream = stdout; - } else if ((outputStream = fopen(fileName, append ? "ab" : "wb"))) { - needClose = gTrue; - } else { - error(-1, "Couldn't open text file '%s'", fileName); - ok = gFalse; - return; - } - outputFunc = &outputToFile; - } else { - outputStream = NULL; - } - - // set up text object - text = new TextPage(rawOrder); -} - -TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream, - GBool rawOrderA) { - outputFunc = func; - outputStream = stream; - needClose = gFalse; - rawOrder = rawOrderA; - text = new TextPage(rawOrder); - ok = gTrue; -} - -TextOutputDev::~TextOutputDev() { - if (needClose) { -#ifdef MACOS - ICS_MapRefNumAndAssign((short)((FILE *)outputStream)->handle); -#endif - fclose((FILE *)outputStream); - } - if (text) { - delete text; - } -} - -void TextOutputDev::startPage(int /*pageNum*/, GfxState */*state*/) { - text->clear(); -} - -void TextOutputDev::endPage() { - text->coalesce(); - if (outputStream) { - text->dump(outputStream, outputFunc); - } -} - -void TextOutputDev::updateFont(GfxState *state) { - text->updateFont(state); -} - -void TextOutputDev::beginString(GfxState *state, GString */*s*/) { - text->beginString(state, state->getCurX(), state->getCurY()); -} - -void TextOutputDev::endString(GfxState */*state*/) { - text->endString(); -} - -void TextOutputDev::drawChar(GfxState *state, double x, double y, - double dx, double dy, - double /*originX*/, double /*originY*/, - CharCode /*c*/, Unicode *u, int uLen) { - text->addChar(state, x, y, dx, dy, u, uLen); -} - -GBool TextOutputDev::findText(Unicode *s, int len, - GBool top, GBool bottom, - double *xMin, double *yMin, - double *xMax, double *yMax) { - return text->findText(s, len, top, bottom, xMin, yMin, xMax, yMax); -} - -GString *TextOutputDev::getText(double xMin, double yMin, - double xMax, double yMax) { - return text->getText(xMin, yMin, xMax, yMax); -} - |