summaryrefslogtreecommitdiffstats
path: root/kviewshell/plugins/djvu/libdjvu/XMLTags.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'kviewshell/plugins/djvu/libdjvu/XMLTags.cpp')
-rw-r--r--kviewshell/plugins/djvu/libdjvu/XMLTags.cpp417
1 files changed, 417 insertions, 0 deletions
diff --git a/kviewshell/plugins/djvu/libdjvu/XMLTags.cpp b/kviewshell/plugins/djvu/libdjvu/XMLTags.cpp
new file mode 100644
index 00000000..2511a585
--- /dev/null
+++ b/kviewshell/plugins/djvu/libdjvu/XMLTags.cpp
@@ -0,0 +1,417 @@
+//C- -*- C++ -*-
+//C- -------------------------------------------------------------------
+//C- DjVuLibre-3.5
+//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun.
+//C- Copyright (c) 2001 AT&T
+//C-
+//C- This software is subject to, and may be distributed under, the
+//C- GNU General Public License, Version 2. The license should have
+//C- accompanied the software or you may obtain a copy of the license
+//C- from the Free Software Foundation at http://www.fsf.org .
+//C-
+//C- This program is distributed in the hope that it will be useful,
+//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
+//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//C- GNU General Public License for more details.
+//C-
+//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
+//C- distributed by Lizardtech Software. On July 19th 2002, Lizardtech
+//C- Software authorized us to replace the original DjVu(r) Reference
+//C- Library notice by the following text (see doc/lizard2002.djvu):
+//C-
+//C- ------------------------------------------------------------------
+//C- | DjVu (r) Reference Library (v. 3.5)
+//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
+//C- | The DjVu Reference Library is protected by U.S. Pat. No.
+//C- | 6,058,214 and patents pending.
+//C- |
+//C- | This software is subject to, and may be distributed under, the
+//C- | GNU General Public License, Version 2. The license should have
+//C- | accompanied the software or you may obtain a copy of the license
+//C- | from the Free Software Foundation at http://www.fsf.org .
+//C- |
+//C- | The computer code originally released by LizardTech under this
+//C- | license and unmodified by other parties is deemed "the LIZARDTECH
+//C- | ORIGINAL CODE." Subject to any third party intellectual property
+//C- | claims, LizardTech grants recipient a worldwide, royalty-free,
+//C- | non-exclusive license to make, use, sell, or otherwise dispose of
+//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
+//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
+//C- | General Public License. This grant only confers the right to
+//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
+//C- | the extent such infringement is reasonably necessary to enable
+//C- | recipient to make, have made, practice, sell, or otherwise dispose
+//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
+//C- | any greater extent that may be necessary to utilize further
+//C- | modifications or combinations.
+//C- |
+//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
+//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
+//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+//C- +------------------------------------------------------------------
+//
+// $Id: XMLTags.cpp,v 1.12 2003/11/07 22:08:22 leonb Exp $
+// $Name: release_3_5_15 $
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#if NEED_GNUG_PRAGMAS
+# pragma implementation
+#endif
+
+// From: Leon Bottou, 1/31/2002
+// This is purely Lizardtech stuff.
+
+#include "XMLTags.h"
+#include "UnicodeByteStream.h"
+#include <ctype.h>
+#if HAS_WCTYPE
+#include <wctype.h>
+#endif
+
+
+#ifdef HAVE_NAMESPACES
+namespace DJVU {
+# ifdef NOT_DEFINED // Just to fool emacs c++ mode
+}
+#endif
+#endif
+
+lt_XMLContents::lt_XMLContents(void) {}
+
+lt_XMLContents::lt_XMLContents(GP<lt_XMLTags> t)
+{
+ tag=t;
+}
+
+static GUTF8String
+getargn(char const tag[], char const *&t)
+{
+ char const *s;
+ for(s=tag;isspace(*s);s++);
+ for(t=s;(*t)&&((*t)!='/')&&((*t)!='>')&&((*t)!='=')&&!isspace(*t);++t);
+ return GUTF8String(s,t-s);
+}
+
+static GUTF8String
+getargv(char const tag[], char const *&t)
+{
+ GUTF8String retval;
+ if(tag && tag[0] == '=')
+ {
+ char const *s=t=tag+1;
+ if((*t == '"')||(*t == '\47'))
+ {
+ char const q=*(t++);
+ for(s++;(*t)&&((*t)!=q)&&((*t)!='>');++t);
+ retval=GUTF8String(s,t-s);
+ if (t[0] == q)
+ {
+ ++t;
+ }
+ }else
+ {
+ for(t=s;(*t)&&((*t)!='/')&&((*t)!='>')&&!isspace(*t);++t);
+ retval=GUTF8String(s,t-s);
+ }
+ }else
+ {
+ t=tag;
+ }
+ return retval;
+}
+
+static GUTF8String
+tagtoname(char const tag[],char const *&t)
+{
+ char const *s;
+ for(s=tag;isspace(*s);s++);
+ for(t=s;(*t)&&((*t)!='>')&&((*t)!='/')&&!isspace(*t);++t);
+ return GUTF8String(s,t-s);
+}
+
+static inline GUTF8String
+tagtoname(char const tag[])
+{
+ char const *t;
+ return tagtoname(tag,t);
+}
+
+static inline bool
+isspaces(const GUTF8String &raw)
+{
+ return (raw.nextNonSpace() == (int)raw.length());
+}
+
+void
+lt_XMLTags::ParseValues(char const *t, GMap<GUTF8String,GUTF8String> &args,bool downcase)
+{
+ GUTF8String argn;
+ char const *tt;
+ while((argn=getargn(t,tt)).length())
+ {
+ if(downcase)
+ argn=argn.downcase();
+ args[argn]=getargv(tt,t).fromEscaped();
+ }
+}
+
+lt_XMLTags::~lt_XMLTags() {}
+
+lt_XMLTags::lt_XMLTags(void) : startline(0) {}
+
+lt_XMLTags::lt_XMLTags(const char n[]) : startline(0)
+{
+ char const *t;
+ name=tagtoname(n,t);
+ ParseValues(t,args);
+}
+
+void
+lt_XMLTags::init(const GP<ByteStream> &bs)
+{
+ GP<XMLByteStream> gxmlbs=XMLByteStream::create(bs);
+ init(*gxmlbs);
+}
+
+void
+lt_XMLTags::init(const GURL &url)
+{
+ const GP<ByteStream> bs=ByteStream::create(url,"rb");
+ init(bs);
+}
+
+void
+lt_XMLTags::init(XMLByteStream &xmlbs)
+{
+ if(!get_count())
+ {
+ G_THROW( ERR_MSG("XMLTags.no_GP") );
+ }
+ GPList<lt_XMLTags> level;
+ GUTF8String tag,raw(xmlbs.gets(0,'<',false));
+ int linesread=xmlbs.get_lines_read();
+ if(!isspaces(raw))
+ {
+ G_THROW( (ERR_MSG("XMLTags.raw_string") "\t")+raw);
+ }
+ GUTF8String encoding;
+ for(int len;(len=(tag=xmlbs.gets(0,'>',true)).length());)
+ {
+ if(tag[len-1] != '>')
+ {
+ G_THROW((ERR_MSG("XMLTags.bad_tag") "\t")+tag);
+ }
+ switch(tag[1])
+ {
+ case '?':
+ {
+ while(len < 4 || tag.substr(len-2,len) != "?>")
+ {
+ GUTF8String cont(xmlbs.gets(0,'>',true));
+ if(!cont.length())
+ {
+ G_THROW( (ERR_MSG("XMLTags.bad_PI") "\t")+tag);
+ }
+ len=((tag+=cont).length());
+ }
+ char const *n;
+ GUTF8String xtag = tag.substr(2,-1);
+ GUTF8String xname = tagtoname(xtag,n);
+ if(xname.downcase() == "xml")
+ {
+ ParseValues(n,args);
+ for(GPosition pos=args;pos;++pos)
+ {
+ if(args.key(pos) == "encoding")
+ {
+ const GUTF8String e=args[pos].upcase();
+ if(e != encoding)
+ {
+ xmlbs.set_encoding((encoding=e));
+ }
+ }
+ }
+ }
+ break;
+ }
+ case '!':
+ {
+ if(tag[2] == '-' && tag[3] == '-')
+ {
+ while((len < 7) ||
+ (tag.substr(len-3,-1) != "-->"))
+ {
+ GUTF8String cont(xmlbs.gets(0,'>',true));
+ if(!cont.length())
+ {
+ GUTF8String mesg;
+ mesg.format( ERR_MSG("XMLTags.bad_comment") "\t%s",(const char *)tag);
+ G_THROW(mesg);
+ }
+ len=((tag+=cont).length());
+ }
+ }
+ break;
+ }
+ case '/':
+ {
+ GUTF8String xname=tagtoname(tag.substr(2,-1));
+ GPosition last=level.lastpos();
+ if(last)
+ {
+ if(level[last]->name != xname)
+ {
+ G_THROW( (ERR_MSG("XMLTags.unmatched_end") "\t")
+ +level[last]->name+("\t"+GUTF8String(level[last]->get_Line()))
+ +("\t"+xname)+("\t"+GUTF8String(linesread+1)));
+ }
+ level.del(last);
+ }else
+ {
+ G_THROW( ERR_MSG("XMLTags.bad_form") );
+ }
+ break;
+ }
+ default:
+ {
+ GPosition last=level.lastpos();
+ GP<lt_XMLTags> t;
+ if(last)
+ {
+ t=new lt_XMLTags(tag.substr(1,len-1));
+ level[last]->addtag(t);
+ if(tag[len-2] != '/')
+ {
+ level.append(t);
+ }
+ }else if(tag[len-2] != '/')
+ {
+ char const *n;
+ GUTF8String xtag = tag.substr(1,-1);
+ name=tagtoname(xtag, n);
+ ParseValues(n,args);
+ t=this;
+ level.append(t);
+ }else
+ {
+ G_THROW( ERR_MSG("XMLTags.no_body") );
+ }
+ t->set_Line(linesread+1);
+ break;
+ }
+ }
+ if((raw=xmlbs.gets(0,'<',false))[0])
+ {
+ linesread=xmlbs.get_lines_read();
+ GPosition last=level.lastpos();
+ if(last)
+ {
+ level[last]->addraw(raw);
+ }else if(!isspaces(raw))
+ {
+ G_THROW(( ERR_MSG("XMLTags.raw_string") "\t")+raw);
+ }
+ }
+ }
+}
+
+GPList<lt_XMLTags>
+lt_XMLTags::get_Tags(char const tagname[]) const
+{
+ GPosition pos=allTags.contains(tagname);
+ GPList<lt_XMLTags> retval;
+ return (pos?allTags[pos]:retval);
+}
+
+void
+lt_XMLTags::get_Maps(char const tagname[],
+ char const argn[],
+ GPList<lt_XMLTags> list,
+ GMap<GUTF8String, GP<lt_XMLTags> > &map)
+{
+ for(GPosition pos=list;pos;++pos)
+ {
+ GP<lt_XMLTags> &tag=list[pos];
+ if(tag)
+ {
+ GPosition loc;
+ if((loc=tag->contains(tagname)))
+ {
+ GPList<lt_XMLTags> maps=(GPList<lt_XMLTags> &)((*tag)[loc]);
+ for(GPosition mloc=maps;mloc;++mloc)
+ {
+ GP<lt_XMLTags> gtag=maps[mloc];
+ if(gtag)
+ {
+ GMap<GUTF8String,GUTF8String> &args=gtag->args;
+ GPosition gpos;
+ if((gpos=args.contains(argn)))
+ {
+ map[args[gpos]]=gtag;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void
+lt_XMLTags::write(ByteStream &bs,bool const top) const
+{
+ if(name.length())
+ {
+ GUTF8String tag="<"+name;
+ for(GPosition pos=args;pos;++pos)
+ {
+ tag+=GUTF8String(' ')+args.key(pos)+GUTF8String("=\42")+args[pos].toEscaped()+GUTF8String("\42");
+ }
+ GPosition tags=content;
+ if(tags||raw.length())
+ {
+ tag+=">";
+ bs.writall((const char *)tag,tag.length());
+ tag="</"+name+">";
+ if(raw.length())
+ {
+ bs.writestring(raw);
+ }
+ for(;tags;++tags)
+ {
+ content[tags].write(bs);
+ }
+ }else if(!raw.length())
+ {
+ tag+="/>";
+ }
+ bs.writall((const char *)tag,tag.length());
+ }
+ if(top)
+ {
+ bs.writall("\n",1);
+ }
+}
+
+void
+lt_XMLContents::write(ByteStream &bs) const
+{
+ if(tag)
+ {
+ tag->write(bs,false);
+ }
+ if(raw.length())
+ {
+ bs.writestring(raw);
+ }
+}
+
+
+#ifdef HAVE_NAMESPACES
+}
+# ifndef NOT_USING_DJVU_NAMESPACE
+using namespace DJVU;
+# endif
+#endif