//C- -*- C++ -*- //C- ------------------------------------------------------------------- //C- DjVuLibre-3.5 //C- Copyright (c) 2002 Leon Bottou and Yann Le Cun. //C- Copyright (c) 2001 AT&T //C- //C- This software is subject to, and may be distributed under, the //C- GNU General Public License, Version 2. The license should have //C- accompanied the software or you may obtain a copy of the license //C- from the Free Software Foundation at http://www.fsf.org . //C- //C- This program is distributed in the hope that it will be useful, //C- but WITHOUT ANY WARRANTY; without even the implied warranty of //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //C- GNU General Public License for more details. //C- //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library //C- distributed by Lizardtech Software. On July 19th 2002, Lizardtech //C- Software authorized us to replace the original DjVu(r) Reference //C- Library notice by the following text (see doc/lizard2002.djvu): //C- //C- ------------------------------------------------------------------ //C- | DjVu (r) Reference Library (v. 3.5) //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. //C- | The DjVu Reference Library is protected by U.S. Pat. No. //C- | 6,058,214 and patents pending. //C- | //C- | This software is subject to, and may be distributed under, the //C- | GNU General Public License, Version 2. The license should have //C- | accompanied the software or you may obtain a copy of the license //C- | from the Free Software Foundation at http://www.fsf.org . //C- | //C- | The computer code originally released by LizardTech under this //C- | license and unmodified by other parties is deemed "the LIZARDTECH //C- | ORIGINAL CODE." Subject to any third party intellectual property //C- | claims, LizardTech grants recipient a worldwide, royalty-free, //C- | non-exclusive license to make, use, sell, or otherwise dispose of //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU //C- | General Public License. This grant only confers the right to //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to //C- | the extent such infringement is reasonably necessary to enable //C- | recipient to make, have made, practice, sell, or otherwise dispose //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to //C- | any greater extent that may be necessary to utilize further //C- | modifications or combinations. //C- | //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. //C- +------------------------------------------------------------------ // // $Id: XMLParser.cpp,v 1.10 2003/11/07 22:08:22 leonb Exp $ // $Name: release_3_5_15 $ #ifdef HAVE_CONFIG_H # include "config.h" #endif #if NEED_GNUG_PRAGMAS # pragma implementation #endif // From: Leon Bottou, 1/31/2002 // This is purely Lizardtech stuff. #include "XMLParser.h" #include "XMLTags.h" #include "ByteStream.h" #include "GOS.h" #include "DjVuDocument.h" #include "DjVuText.h" #include "DjVuAnno.h" #include "DjVuFile.h" #include "DjVuImage.h" #include "debug.h" #include #include #include #ifdef HAVE_NAMESPACES namespace DJVU { # ifdef NOT_DEFINED // Just to fool emacs c++ mode } #endif #endif static const char mimetype[]="image/x.djvu"; static const char bodytag[]="BODY"; static const char areatag[]="AREA"; static const char maptag[]="MAP"; static const char objecttag[]="OBJECT"; static const char paramtag[]="PARAM"; static const char wordtag[]="WORD"; static const char linetag[]="LINE"; static const char paragraphtag[]="PARAGRAPH"; static const char regiontag[]="REGION"; static const char pagecolumntag[]="PAGECOLUMN"; static const char hiddentexttag[]="HIDDENTEXT"; static const char metadatatag[]="METADATA"; class lt_XMLParser::Impl : public lt_XMLParser { public: Impl(void); virtual ~Impl(); /// Parse the specified bytestream. virtual void parse(const GP &bs); /// Parse the specified tags - this one does all the work virtual void parse(const lt_XMLTags &tags); /// write to disk. virtual void save(void); /// erase. virtual void empty(void); protected: GP get_file(const GURL &url,GUTF8String page); void parse_anno(const int width, const int height, const lt_XMLTags &GObject, GMap > &Maps, DjVuFile &dfile); void parse_text(const int width, const int height, const lt_XMLTags &GObject, DjVuFile &dfile); void parse_meta(const lt_XMLTags &GObject, DjVuFile &dfile); void ChangeAnno( const int width, const int height, DjVuFile &dfile, const lt_XMLTags &map); void ChangeInfo(DjVuFile &dfile,const int dpi,const double gamma); void ChangeText( const int width, const int height, DjVuFile &dfile, const lt_XMLTags &map); void ChangeMeta( DjVuFile &dfile, const lt_XMLTags &map); void ChangeTextOCR( const GUTF8String &value, const int width, const int height, const GP &dfile); // we may want to make these list of modified file static so // they only needed to be loaded and saved once. GMap > m_files; GMap > m_docs; GURL m_codebase; GCriticalSection xmlparser_lock; }; static GP OCRcallback( void * const xarg, lt_XMLParser::mapOCRcallback * const xcallback, const GUTF8String &value=GUTF8String(), const GP &image=0 ); static inline GP OCRcallback(const GUTF8String &value, const GP &image) { return OCRcallback(0,0,value,image); } lt_XMLParser::lt_XMLParser() {} lt_XMLParser::~lt_XMLParser() {} lt_XMLParser::Impl::Impl() {} lt_XMLParser::Impl::~Impl() {} GP lt_XMLParser::create(void) { return new lt_XMLParser::Impl; } // helper function for args static void intList(GUTF8String coords, GList &retval) { int pos=0; while(coords.length()) { int epos; unsigned long i=coords.toLong(pos,epos,10); if(epos>=0) { retval.append(i); const int n=coords.nextNonSpace(epos); if(coords[n] != ',') break; pos=n+1; } } } void lt_XMLParser::Impl::empty(void) { GCriticalSectionLock lock(&xmlparser_lock); m_files.empty(); m_docs.empty(); } void lt_XMLParser::Impl::save(void) { GCriticalSectionLock lock(&xmlparser_lock); for(GPosition pos=m_docs;pos;++pos) { const GP doc(m_docs[pos]); const GURL url=doc->get_init_url(); DEBUG_MSG("Saving "<<(const char *)url<<" with new text and annotations\n"); const bool bundle=doc->is_bundled()||(doc->get_doc_type()==DjVuDocument::SINGLE_PAGE); doc->save_as(url,bundle); } empty(); } void lt_XMLParser::Impl::parse(const GP &bs) { const GP tags(lt_XMLTags::create(bs)); parse(*tags); } static const GMap & BorderTypeMap(void) { static GMap typeMap; if (! typeMap.size()) { typeMap["none"]=GMapArea::NO_BORDER; typeMap["xor"]=GMapArea::XOR_BORDER; typeMap["solid"]=GMapArea::SOLID_BORDER; typeMap["default"]=GMapArea::SOLID_BORDER; typeMap["shadowout"]=GMapArea::SHADOW_OUT_BORDER; typeMap["shadowin"]=GMapArea::SHADOW_IN_BORDER; typeMap["etchedin"]=GMapArea::SHADOW_EIN_BORDER; typeMap["etchedout"]=GMapArea::SHADOW_EOUT_BORDER; } return typeMap; } static unsigned long convertToColor(const GUTF8String &s) { unsigned long retval=0; if(s.length()) { int endpos; if(s[0] == '#') { retval=s.substr(1,-1).toULong(0,endpos,16); } if(endpos < 0) { G_THROW( (ERR_MSG("XMLAnno.bad_color") "\t")+s ); } } return retval; } void lt_XMLParser::Impl::ChangeInfo(DjVuFile &dfile,const int dpi,const double gamma) { GP info; if(dpi >= 5 && dpi <= 4800) { dfile.resume_decode(true); if(dfile.info && (dpi != dfile.info->dpi) ) { info=new DjVuInfo(*dfile.info); info->dpi=dpi; } } if(gamma >= 0.1 && gamma <= 5.0) { dfile.resume_decode(true); if(dfile.info && (gamma != dfile.info->gamma) ) { if(!info) info=new DjVuInfo(*dfile.info); info->gamma=gamma; } } if(info) { dfile.change_info(info); } } void lt_XMLParser::Impl::ChangeAnno( const int width, const int height, DjVuFile &dfile, const lt_XMLTags &map ) { dfile.resume_decode(true); const GP info(dfile.info); const GP ganno(DjVuAnno::create()); DjVuAnno &anno=*ganno; GPosition map_pos; map_pos=map.contains(areatag); if(dfile.contains_anno()) { GP annobs=dfile.get_merged_anno(); if(annobs) { anno.decode(annobs); if(anno.ant && info) { anno.ant->map_areas.empty(); } } // dfile.remove_anno(); } if(info && map_pos) { const int h=info->height; const int w=info->width; double ws=1.0; double hs=1.0; if(width && width != w) { ws=((double)w)/((double)width); } if(height && height != h) { hs=((double)h)/((double)height); } if(!anno.ant) { anno.ant=DjVuANT::create(); } GPList &map_areas=anno.ant->map_areas; map_areas.empty(); GPList gareas=map[map_pos]; for(GPosition pos=gareas;pos;++pos) { if(gareas[pos]) { lt_XMLTags &areas=*(gareas[pos]); GMap args(areas.get_args()); GList coords; // ****************************************************** // Parse the coords attribute: first read the raw data into // a list, then scale the x, y data into another list. For // circles, you also get a radius element with (looks like an x // with no matching y). // ****************************************************** { GPosition coords_pos=args.contains("coords"); if(coords_pos) { GList raw_coords; intList(args[coords_pos],raw_coords); for(GPosition raw_pos=raw_coords;raw_pos;++raw_pos) { const int r=raw_coords[raw_pos]; const int x=(int)(ws*(double)r+0.5); coords.append(x); int y=h-1; if(! ++raw_pos) { y-=(int)(hs*(double)r+0.5); }else { y-=(int)(hs*(double)raw_coords[raw_pos]+0.5); } coords.append(y); // DjVuPrintMessage("Coords (%d,%d)\n",x,y); } } } GUTF8String shape; { GPosition shape_pos=args.contains("shape"); if(shape_pos) { shape=args[shape_pos]; } } GP a; if(shape == "default") { GRect rect(0,0,w,h); a=GMapRect::create(rect); }else if(!shape.length() || shape == "rect") { int xx[4]; int i=0; for(GPosition rect_pos=coords;(rect_pos)&&(i<4);++rect_pos,++i) { xx[i]=coords[rect_pos]; } if(i!=4) { G_THROW( ERR_MSG("XMLAnno.bad_rect") ); } int xmin,xmax; if(xx[0]>xx[2]) { xmax=xx[0]; xmin=xx[2]; }else { xmin=xx[0]; xmax=xx[2]; } int ymin,ymax; if(xx[1]>xx[3]) { ymax=xx[1]; ymin=xx[3]; }else { ymin=xx[1]; ymax=xx[3]; } GRect rect(xmin,ymin,xmax-xmin,ymax-ymin); a=GMapRect::create(rect); }else if(shape == "circle") { int xx[4]; int i=0; GPosition rect_pos=coords.lastpos(); if(rect_pos) { coords.append(coords[rect_pos]); for(rect_pos=coords;(rect_pos)&&(i<4);++rect_pos) { xx[i++]=coords[rect_pos]; } } if(i!=4) { G_THROW( ERR_MSG("XMLAnno.bad_circle") ); } int x=xx[0],y=xx[1],rx=xx[2],ry=(h-xx[3])-1; GRect rect(x-rx,y-ry,2*rx,2*ry); a=GMapOval::create(rect); }else if(shape == "oval") { int xx[4]; int i=0; for(GPosition rect_pos=coords;(rect_pos)&&(i<4);++rect_pos,++i) { xx[i]=coords[rect_pos]; } if(i!=4) { G_THROW( ERR_MSG("XMLAnno.bad_oval") ); } int xmin,xmax; if(xx[0]>xx[2]) { xmax=xx[0]; xmin=xx[2]; }else { xmin=xx[0]; xmax=xx[2]; } int ymin,ymax; if(xx[1]>xx[3]) { ymax=xx[1]; ymin=xx[3]; }else { ymin=xx[1]; ymax=xx[3]; } GRect rect(xmin,ymin,xmax-xmin,ymax-ymin); a=GMapOval::create(rect); }else if(shape == "poly") { GP p=GMapPoly::create(); for(GPosition poly_pos=coords;poly_pos;++poly_pos) { int x=coords[poly_pos]; if(! ++poly_pos) break; int y=coords[poly_pos]; p->add_vertex(x,y); } p->close_poly(); a=p; }else { G_THROW( ( ERR_MSG("XMLAnno.unknown_shape") "\t")+shape ); } if(a) { GPosition pos; if((pos=args.contains("href"))) { a->url=args[pos]; } if((pos=args.contains("target"))) { a->target=args[pos]; } if((pos=args.contains("alt"))) { a->comment=args[pos]; } if((pos=args.contains("bordertype"))) { GUTF8String b=args[pos]; static const GMap typeMap=BorderTypeMap(); if((pos=typeMap.contains(b))) { a->border_type=typeMap[pos]; }else { G_THROW( (ERR_MSG("XMLAnno.unknown_border") "\t")+b ); } } a->border_always_visible=!!args.contains("visible"); if((pos=args.contains("bordercolor"))) { a->border_color=convertToColor(args[pos]); } if((pos=args.contains("highlight"))) { a->hilite_color=convertToColor(args[pos]); } if((pos=args.contains("border"))) { a->border_width=args[pos].toInt(); //atoi(args[pos]); } map_areas.append(a); } } } } dfile.set_modified(true); dfile.anno=ByteStream::create(); anno.encode(dfile.anno); } GP lt_XMLParser::Impl::get_file(const GURL &url,GUTF8String id) { GP dfile; GP doc; GCriticalSectionLock lock(&xmlparser_lock); { GPosition pos=m_docs.contains(url.get_string()); if(pos) { doc=m_docs[pos]; }else { doc=DjVuDocument::create_wait(url); if(! doc->wait_for_complete_init()) { G_THROW(( ERR_MSG("XMLAnno.fail_init") "\t")+url.get_string() ); } m_docs[url.get_string()]=doc; } if(id.is_int()) { const int xpage=id.toInt(); //atoi((char const *)page); if(xpage>0) id=doc->page_to_id(xpage-1); }else if(!id.length()) { id=doc->page_to_id(0); } } const GURL fileurl(doc->id_to_url(id)); GPosition dpos(m_files.contains(fileurl.get_string())); if(!dpos) { if(!doc->get_id_list().contains(id)) { G_THROW( ERR_MSG("XMLAnno.bad_page") ); } dfile=doc->get_djvu_file(id,false); if(!dfile) { G_THROW( ERR_MSG("XMLAnno.bad_page") ); } m_files[fileurl.get_string()]=dfile; }else { dfile=m_files[dpos]; } return dfile; } void lt_XMLParser::Impl::parse(const lt_XMLTags &tags) { const GPList Body(tags.get_Tags(bodytag)); GPosition pos=Body; if(!pos || (pos != Body.lastpos())) { G_THROW( ERR_MSG("XMLAnno.extra_body") ); } const GP GBody(Body[pos]); if(!GBody) { G_THROW( ERR_MSG("XMLAnno.no_body") ); } GMap > Maps; lt_XMLTags::get_Maps(maptag,"name",Body,Maps); const GPList Objects(GBody->get_Tags(objecttag)); lt_XMLTags::get_Maps(maptag,"name",Objects,Maps); for(GPosition Objpos=Objects;Objpos;++Objpos) { lt_XMLTags &GObject=*Objects[Objpos]; // Map of attributes to value (e.g. "width" --> "500") const GMap &args=GObject.get_args(); GURL codebase; { DEBUG_MSG("Setting up codebase... m_codebase = " << m_codebase << "\n"); GPosition codebasePos=args.contains("codebase"); // If user specified a codebase attribute, assume it is correct (absolute URL): // the GURL constructor will throw an exception if it isn't if(codebasePos) { codebase=GURL::UTF8(args[codebasePos]); }else if (m_codebase.is_dir()) { codebase=m_codebase; }else { codebase=GURL::Filename::UTF8(GOS::cwd()); } DEBUG_MSG("codebase = " << codebase << "\n"); } // the data attribute specifies the input file. This can be // either an absolute URL (starts with file:/) or a relative // URL (for now, just a path and file name). If it's absolute, // our GURL will adequately wrap it. If it's relative, we need // to use the codebase attribute to form an absolute URL first. GPosition datapos=args.contains("data"); if(datapos) { bool isDjVuType=false; GPosition typePos(args.contains("type")); if(typePos) { if(args[typePos] != mimetype) { // DjVuPrintErrorUTF8("Ignoring %s Object tag\n",mimetype); continue; } isDjVuType=true; } const GURL url=GURL::UTF8(args[datapos],(args[datapos][0] == '/')?codebase.base():codebase); int width; { GPosition widthPos=args.contains("width"); width=(widthPos)?args[widthPos].toInt():0; } int height; { GPosition heightPos=args.contains("height"); height=(heightPos)?args[heightPos].toInt():0; } GUTF8String gamma; GUTF8String dpi; GUTF8String page; GUTF8String do_ocr; { GPosition paramPos(GObject.contains(paramtag)); if(paramPos) { const GPList Params(GObject[paramPos]); for(GPosition loc=Params;loc;++loc) { const GMap &pargs=Params[loc]->get_args(); GPosition namepos=pargs.contains("name"); if(namepos) { GPosition valuepos=pargs.contains("value"); if(valuepos) { const GUTF8String name=pargs[namepos].downcase(); const GUTF8String &value=pargs[valuepos]; if(name == "flags") { GMap args; lt_XMLTags::ParseValues(value,args,true); if(args.contains("page")) { page=args["page"]; } if(args.contains("dpi")) { dpi=args["dpi"]; } if(args.contains("gamma")) { gamma=args["gamma"]; } if(args.contains("ocr")) { do_ocr=args["ocr"]; } }else if(name == "page") { page=value; }else if(name == "dpi") { dpi=value; }else if(name == "gamma") { gamma=value; }else if(name == "ocr") { do_ocr=value; } } } } } } const GP dfile(get_file(url,page)); if(dpi.is_int() || gamma.is_float()) { int pos=0; ChangeInfo(*dfile,dpi.toInt(),gamma.toDouble(pos,pos)); } parse_anno(width,height,GObject,Maps,*dfile); parse_meta(GObject,*dfile); parse_text(width,height,GObject,*dfile); ChangeTextOCR(do_ocr,width,height,dfile); } } } void lt_XMLParser::Impl::parse_anno( const int width, const int height, const lt_XMLTags &GObject, GMap > &Maps, DjVuFile &dfile ) { GP map; { GPosition usemappos=GObject.get_args().contains("usemap"); if(usemappos) { const GUTF8String mapname(GObject.get_args()[usemappos]); GPosition mappos=Maps.contains(mapname); if(!mappos) { G_THROW((ERR_MSG("XMLAnno.map_find") "\t")+mapname ); }else { map=Maps[mappos]; } } } if(map) { ChangeAnno(width,height,dfile,*map); } } #ifdef max #undef max #endif template static inline TYPE max(TYPE a,TYPE b) { return (a>b)?a:b; } #ifdef min #undef min #endif template static inline TYPE min(TYPE a,TYPE b) { return (aztype = DjVuTXT::WORD; sepchar=' '; }else if(name == linetag) { self_ptr=parent.append_child(); self_ptr->ztype = DjVuTXT::LINE; sepchar=DjVuTXT::end_of_line; }else if(name == paragraphtag) { self_ptr=parent.append_child(); self_ptr->ztype = DjVuTXT::PARAGRAPH; sepchar=DjVuTXT::end_of_paragraph; }else if(name == regiontag) { self_ptr=parent.append_child(); self_ptr->ztype = DjVuTXT::REGION; sepchar=DjVuTXT::end_of_region; }else if(name == pagecolumntag) { self_ptr=parent.append_child(); self_ptr->ztype = DjVuTXT::COLUMN; sepchar=DjVuTXT::end_of_column; }else { self_ptr = &parent; self_ptr->ztype = DjVuTXT::PAGE; sepchar=0; } DjVuTXT::Zone &self = *self_ptr; self.text_start = bs.tell(); int &xmin=self.rect.xmin, &ymin=self.rect.ymin, &xmax=self.rect.xmax, &ymax=self.rect.ymax; GRect default_rect; default_rect.xmin=max(parent.rect.xmax,parent.rect.xmin); default_rect.xmax=min(parent.rect.xmax,parent.rect.xmin); default_rect.ymin=max(parent.rect.ymax,parent.rect.ymin); default_rect.ymax=min(parent.rect.ymax,parent.rect.ymin); // Now if there are coordinates, use those. GPosition pos(tag.get_args().contains("coords")); if(pos) { GList rectArgs; intList(tag.get_args()[pos], rectArgs); if((pos=rectArgs)) { xmin=(int)(ws*(double)rectArgs[pos]); if(++pos) { ymin=(height-1)-(int)(hs*(double)rectArgs[pos]); if(++pos) { xmax=(int)(ws*(double)rectArgs[pos]); if(++pos) { ymax=(height-1)-(int)(hs*(double)rectArgs[pos]); if(xmin>xmax) // Make sure xmin is really minimum { const int t=xmin; xmin=xmax; xmax=t; } if(ymin>ymax) // Make sure ymin is really minimum { const int t=ymin; ymin=ymax; ymax=t; } } } } } } if(self.ztype == DjVuTXT::WORD) { if(! pos) { self.rect=default_rect; retval=false; } const GUTF8String raw(tag.get_raw().fromEscaped()); const int i=raw.nextNonSpace(0); bs.writestring(raw.substr(i,raw.firstEndSpace(i)-i)); if(sepchar) bs.write8(sepchar); self.text_length = bs.tell() - self.text_start; }else if(pos) { pos=tag.get_content(); if(pos) { for(pos=tag.get_content(); pos; ++pos) { const GP t(tag.get_content()[pos].tag); make_child_layer(self, *t, bs, height,ws,hs); } if(sepchar) bs.write8(sepchar); self.text_length = bs.tell() - self.text_start; }else { const GUTF8String raw(tag.get_raw().fromEscaped()); const int i=raw.nextNonSpace(0); bs.writestring(raw.substr(i,raw.firstEndSpace(i)-i)); if(sepchar) bs.write8(sepchar); self.text_length = bs.tell() - self.text_start; } }else { self.rect=default_rect; if((pos=tag.get_content())) { do { const GP t(tag.get_content()[pos].tag); const GRect save_rect(self.rect); self.rect=default_rect; if(retval=make_child_layer(self, *t, bs, height,ws,hs)) { xmin=min(save_rect.xmin,xmin); xmax=max(save_rect.xmax,xmax); ymin=min(save_rect.ymin,ymin); ymax=max(save_rect.ymax,ymax); }else { // If the child doesn't have coordinates, we need to use a box // at least as big as the parent's coordinates. xmin=min(save_rect.xmin,default_rect.xmax); xmax=max(save_rect.xmax,default_rect.xmin); ymin=min(save_rect.ymin,default_rect.ymax); ymax=max(save_rect.ymax,default_rect.ymin); for(; pos; ++pos) { const GP t(tag.get_content()[pos].tag); make_child_layer(self, *t, bs, height,ws,hs); } break; } } while(++pos); if(sepchar) bs.write8(sepchar); self.text_length = bs.tell() - self.text_start; }else { const GUTF8String raw(tag.get_raw().fromEscaped()); const int i=raw.nextNonSpace(0); bs.writestring(raw.substr(i,raw.firstEndSpace(i)-i)); if(sepchar) bs.write8(sepchar); self.text_length = bs.tell() - self.text_start; } } parent.rect.xmin=min(xmin,parent.rect.xmin); parent.rect.ymin=min(ymin,parent.rect.ymin); parent.rect.xmax=max(xmax,parent.rect.xmax); parent.rect.ymax=max(ymax,parent.rect.ymax); if(xmin>xmax) { const int t=xmin; xmin=xmax; xmax=t; } if(ymin>ymax) { const int t=ymin; ymin=ymax; ymax=t; } // DjVuPrintMessage("(%d,%d)(%d,%d)<<<\\%o>>>\n", // xmin,ymin,xmax,ymax, sepchar); return retval; } void lt_XMLParser::Impl::ChangeTextOCR( const GUTF8String &value, const int width, const int height, const GP &dfile) { if(value.length() && value.downcase() != "false") { const GP bs=OCRcallback(value,DjVuImage::create(dfile)); if( bs && bs->size() ) { const GP tags(lt_XMLTags::create(bs)); ChangeText(width,height,*dfile,*tags); } } } void lt_XMLParser::Impl::ChangeMeta( DjVuFile &dfile, const lt_XMLTags &tags ) { dfile.resume_decode(true); GP gbs(ByteStream::create()); tags.write(*gbs,false); gbs->seek(0L); GUTF8String raw(gbs->getAsUTF8()); if(raw.length()) { //GUTF8String gs="<"+(metadatatag+(">"+raw))+"\n"); dfile.change_meta(raw+"\n"); }else { dfile.change_meta(GUTF8String()); } } void lt_XMLParser::Impl::ChangeText( const int width, const int height, DjVuFile &dfile, const lt_XMLTags &tags ) { dfile.resume_decode(true); GP text = DjVuText::create(); GP txt = text->txt = DjVuTXT::create(); // to store the new text GP textbs = ByteStream::create(); GP info=(dfile.info); if(info) { const int h=info->height; const int w=info->width; txt->page_zone.text_start = 0; DjVuTXT::Zone &parent=txt->page_zone; parent.rect.xmin=0; parent.rect.ymin=0; parent.rect.ymax=h; parent.rect.xmax=w; double ws=1.0; if(width && width != w) { ws=((double)w)/((double)width); } double hs=1.0; if(height && height != h) { hs=((double)h)/((double)height); } make_child_layer(parent, tags, *textbs, h, ws,hs); textbs->write8(0); long len = textbs->tell(); txt->page_zone.text_length = len; textbs->seek(0,SEEK_SET); textbs->read(txt->textUTF8.getbuf(len), len); dfile.change_text(txt,false); } } void lt_XMLParser::Impl::parse_text( const int width, const int height, const lt_XMLTags &GObject, DjVuFile &dfile ) { GPosition textPos = GObject.contains(hiddentexttag); if(textPos) { // loop through the hidden text - there should only be one // if there are more ??only the last one will be saved?? GPList textTags = GObject[textPos]; GPosition pos = textTags; ChangeText(width,height,dfile,*textTags[pos]); } } void lt_XMLParser::Impl::parse_meta( const lt_XMLTags &GObject, DjVuFile &dfile ) { GPosition metaPos = GObject.contains(metadatatag); if(metaPos) { // loop through the hidden text - there should only be one // if there are more ??only the last one will be saved?? GPList metaTags = GObject[metaPos]; GPosition pos = metaTags; ChangeMeta(dfile,*metaTags[pos]); } } static GP OCRcallback( void * const xarg, lt_XMLParser::mapOCRcallback * const xcallback, const GUTF8String &value, const GP &image ) { GP retval; static void *arg=0; static lt_XMLParser::mapOCRcallback *callback=0; if(image) { if(callback) retval=callback(arg,value,image); }else { arg=xarg; callback=xcallback; } return retval; } void lt_XMLParser::setOCRcallback( void * const arg, mapOCRcallback * const callback) { ::OCRcallback(arg,callback); } #ifdef HAVE_NAMESPACES } # ifndef NOT_USING_DJVU_NAMESPACE using namespace DJVU; # endif #endif