From dfe289850f068f19ba4a83ab4e7e22a7e09c13c9 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Sat, 26 Jan 2013 13:17:21 -0600 Subject: Rename a number of libraries and executables to avoid conflicts with KDE4 --- khtml/html/htmltokenizer.cpp | 1798 ------------------------------------------ 1 file changed, 1798 deletions(-) delete mode 100644 khtml/html/htmltokenizer.cpp (limited to 'khtml/html/htmltokenizer.cpp') diff --git a/khtml/html/htmltokenizer.cpp b/khtml/html/htmltokenizer.cpp deleted file mode 100644 index 50e3ea69e..000000000 --- a/khtml/html/htmltokenizer.cpp +++ /dev/null @@ -1,1798 +0,0 @@ -/* - This file is part of the KDE libraries - - Copyright (C) 1997 Martin Jones (mjones@kde.org) - (C) 1997 Torben Weis (weis@kde.org) - (C) 1998 Waldo Bastian (bastian@kde.org) - (C) 1999 Lars Knoll (knoll@kde.org) - (C) 1999 Antti Koivisto (koivisto@kde.org) - (C) 2001-2003 Dirk Mueller (mueller@kde.org) - (C) 2004 Apple Computer, Inc. - (C) 2006 Germain Garand (germain@ebooksfrance.org) - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public License - along with this library; see the file COPYING.LIB. If not, write to - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. -*/ -//---------------------------------------------------------------------------- -// -// KDE HTML Widget - Tokenizers - -//#define TOKEN_DEBUG 1 -//#define TOKEN_DEBUG 2 - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "html/htmltokenizer.h" -#include "html/html_documentimpl.h" -#include "html/htmlparser.h" -#include "html/dtd.h" - -#include "misc/loader.h" -#include "misc/htmlhashes.h" - -#include "khtmlview.h" -#include "khtml_part.h" -#include "xml/dom_docimpl.h" -#include "css/csshelper.h" -#include "ecma/kjs_proxy.h" -#include -#include -#include -#include -#include -#include -#include - -#include "kentities.c" - -using namespace khtml; - -static const TQChar commentStart [] = { '<','!','-','-', TQChar::null }; - -static const char scriptEnd [] = "deref(this); - - if ( buffer ) - KHTML_DELETE_QCHAR_VEC(buffer); - buffer = dest = 0; - size = 0; - - if ( scriptCode ) - KHTML_DELETE_QCHAR_VEC(scriptCode); - scriptCode = 0; - scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0; - - if (m_autoCloseTimer) { - killTimer(m_autoCloseTimer); - m_autoCloseTimer = 0; - } - - currToken.reset(); -} - -void HTMLTokenizer::begin() -{ - m_executingScript = 0; - onHold = false; - reset(); - size = 254; - buffer = KHTML_ALLOC_QCHAR_VEC( 255 ); - dest = buffer; - tag = NoTag; - pending = NonePending; - discard = NoneDiscard; - pre = false; - prePos = 0; - plaintext = false; - xmp = false; - processingInstruction = false; - script = false; - escaped = false; - style = false; - skipLF = false; - select = false; - comment = false; - server = false; - textarea = false; - title = false; - startTag = false; - tquote = NoQuote; - searchCount = 0; - Entity = NoEntity; - noMoreData = false; - brokenComments = false; - brokenServer = false; - brokenScript = false; - lineno = 0; - scriptStartLineno = 0; - tagStartLineno = 0; -} - -void HTMLTokenizer::processListing(TokenizerString list) -{ - bool old_pre = pre; - - // This function adds the listing 'list' as - // preformatted text-tokens to the token-collection - // thereby converting TABs. - if(!style) pre = true; - prePos = 0; - - while ( !list.isEmpty() ) - { - checkBuffer(3*TAB_SIZE); - - if (skipLF && ( *list != '\n' )) - { - skipLF = false; - } - - if (skipLF) - { - skipLF = false; - ++list; - } - else if (( *list == '\n' ) || ( *list == '\r' )) - { - if (discard == LFDiscard) - { - // Ignore this LF - discard = NoneDiscard; // We have discarded 1 LF - } - else - { - // Process this LF - if (pending) - addPending(); - - // we used to do it not at all and we want to have - // it fixed for textarea. So here we are - if ( textarea ) { - prePos++; - *dest++ = *list; - } else - pending = LFPending; - } - /* Check for MS-DOS CRLF sequence */ - if (*list == '\r') - { - skipLF = true; - } - ++list; - } - else if (( *list == ' ' ) || ( *list == '\t')) - { - if (pending) - addPending(); - if (*list == ' ') - pending = SpacePending; - else - pending = TabPending; - - ++list; - } - else - { - discard = NoneDiscard; - if (pending) - addPending(); - - prePos++; - *dest++ = *list; - ++list; - } - - } - - if ((pending == SpacePending) || (pending == TabPending)) - addPending(); - else - pending = NonePending; - - prePos = 0; - pre = old_pre; -} - -void HTMLTokenizer::parseSpecial(TokenizerString &src) -{ - assert( textarea || title || !Entity ); - assert( !tag ); - assert( xmp+textarea+title+style+script == 1 ); - if (script) - scriptStartLineno = lineno+src.lineCount(); - - if ( comment ) parseComment( src ); - - while ( !src.isEmpty() ) { - checkScriptBuffer(); - unsigned char ch = src->latin1(); - if ( !scriptCodeResync && !brokenComments && !textarea && !xmp && ch == '-' && scriptCodeSize >= 3 && !src.escaped() && TQConstString( scriptCode+scriptCodeSize-3, 3 ).string() == "' ) ) { - ++src; - scriptCodeSize = scriptCodeResync-1; - scriptCodeResync = 0; - scriptCode[ scriptCodeSize ] = scriptCode[ scriptCodeSize + 1 ] = 0; - if ( script ) - scriptHandler(); - else { - processListing(TokenizerString(scriptCode, scriptCodeSize)); - processToken(); - if ( style ) { currToken.tid = ID_STYLE + ID_CLOSE_TAG; } - else if ( textarea ) { currToken.tid = ID_TEXTAREA + ID_CLOSE_TAG; } - else if ( title ) { currToken.tid = ID_TITLE + ID_CLOSE_TAG; } - else if ( xmp ) { currToken.tid = ID_XMP + ID_CLOSE_TAG; } - processToken(); - script = style = textarea = title = xmp = false; - tquote = NoQuote; - scriptCodeSize = scriptCodeResync = 0; - } - return; - } - // possible end of tagname, lets check. - if ( !scriptCodeResync && !escaped && !src.escaped() && ( ch == '>' || ch == '/' || ch <= ' ' ) && ch && - scriptCodeSize >= searchStopperLen && - !TQConstString( scriptCode+scriptCodeSize-searchStopperLen, searchStopperLen ).string().find( searchStopper, 0, false )) { - scriptCodeResync = scriptCodeSize-searchStopperLen+1; - tquote = NoQuote; - continue; - } - if ( scriptCodeResync && !escaped ) { - if(ch == '\"') - tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote); - else if(ch == '\'') - tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote; - else if (tquote != NoQuote && (ch == '\r' || ch == '\n')) - tquote = NoQuote; - } - escaped = ( !escaped && ch == '\\' ); - if (!scriptCodeResync && (textarea||title) && !src.escaped() && ch == '&') { - TQChar *scriptCodeDest = scriptCode+scriptCodeSize; - ++src; - parseEntity(src,scriptCodeDest,true); - scriptCodeSize = scriptCodeDest-scriptCode; - } - else { - scriptCode[ scriptCodeSize++ ] = *src; - ++src; - } - } -} - -void HTMLTokenizer::scriptHandler() -{ - TQString currentScriptSrc = scriptSrc; - scriptSrc = TQString::null; - - processListing(TokenizerString(scriptCode, scriptCodeSize)); - TQString exScript( buffer, dest-buffer ); - - processToken(); - currToken.tid = ID_SCRIPT + ID_CLOSE_TAG; - processToken(); - - // Scripts following a frameset element should not be executed or even loaded in the case of extern scripts. - bool followingFrameset = (parser->doc()->body() && parser->doc()->body()->id() == ID_FRAMESET); - bool effectiveScript = !parser->skipMode() && !followingFrameset; - bool deferredScript = false; - - if ( effectiveScript ) { - CachedScript* cs = 0; - - // forget what we just got, load from src url instead - if ( !currentScriptSrc.isEmpty() && javascript && - (cs = parser->doc()->docLoader()->requestScript(currentScriptSrc, scriptSrcCharset) )) { - cachedScript.enqueue(cs); - } - - if (cs) { - pendingQueue.push(src); - uint scriptCount = cachedScript.count(); - setSrc(TokenizerString()); - scriptCodeSize = scriptCodeResync = 0; - cs->ref(this); - if (cachedScript.count() == scriptCount) - deferredScript = true; - } - else if (currentScriptSrc.isEmpty() && view && javascript ) { - pendingQueue.push(src); - setSrc(TokenizerString()); - scriptCodeSize = scriptCodeResync = 0; - scriptExecution( exScript, TQString::null, tagStartLineno /*scriptStartLineno*/ ); - } else { - // script was filtered or disallowed - effectiveScript = false; - } - } - - script = false; - scriptCodeSize = scriptCodeResync = 0; - - if ( !effectiveScript ) - return; - - if ( !m_executingScript && cachedScript.isEmpty() ) { - src.append(pendingQueue.pop()); - } else if ( cachedScript.isEmpty() ) { - write( pendingQueue.pop(), false ); - } else if ( !deferredScript && pendingQueue.count() > 1) { - TokenizerString t = pendingQueue.pop(); - pendingQueue.top().prepend( t ); - } -} - -void HTMLTokenizer::scriptExecution( const TQString& str, const TQString& scriptURL, - int baseLine) -{ - bool oldscript = script; - m_executingScript++; - script = false; - TQString url; - if (scriptURL.isNull() && view) - url = static_cast(view->part()->document().handle())->URL().url(); - else - url = scriptURL; - - if (view) - view->part()->executeScript(url,baseLine+1,Node(),str); - m_executingScript--; - script = oldscript; -} - -void HTMLTokenizer::parseComment(TokenizerString &src) -{ - // SGML strict - bool strict = parser->doc()->inStrictMode() && parser->doc()->htmlMode() != DocumentImpl::XHtml && !script && !style; - int delimiterCount = 0; - bool canClose = false; - - checkScriptBuffer(src.length()); - while ( src.length() ) { - scriptCode[ scriptCodeSize++ ] = *src; - -#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 - tqDebug("comment is now: *%s*", src.toString().left(16).latin1()); -#endif - - if (strict) - { - if (src->unicode() == '-') { - delimiterCount++; - if (delimiterCount == 2) { - delimiterCount = 0; - canClose = !canClose; - } - } - else - delimiterCount = 0; - } - - if ((!strict || canClose) && src->unicode() == '>') - { - bool handleBrokenComments = brokenComments && !( script || style ); - bool scriptEnd=false; - if (!strict) - { - if ( scriptCodeSize > 2 && scriptCode[scriptCodeSize-3] == '-' && - scriptCode[scriptCodeSize-2] == '-' ) - scriptEnd=true; - } - - if (canClose || handleBrokenComments || scriptEnd ){ - ++src; - if ( !( title || script || xmp || textarea || style) ) { -#ifdef COMMENTS_IN_DOM - checkScriptBuffer(); - scriptCode[ scriptCodeSize ] = 0; - scriptCode[ scriptCodeSize + 1 ] = 0; - currToken.tid = ID_COMMENT; - processListing(DOMStringIt(scriptCode, scriptCodeSize - 2)); - processToken(); - currToken.tid = ID_COMMENT + ID_CLOSE_TAG; - processToken(); -#endif - scriptCodeSize = 0; - } - comment = false; - return; // Finished parsing comment - } - } - ++src; - } -} - -void HTMLTokenizer::parseServer(TokenizerString &src) -{ - checkScriptBuffer(src.length()); - while ( !src.isEmpty() ) { - scriptCode[ scriptCodeSize++ ] = *src; - if (src->unicode() == '>' && - scriptCodeSize > 1 && scriptCode[scriptCodeSize-2] == '%') { - ++src; - server = false; - scriptCodeSize = 0; - return; // Finished parsing server include - } - ++src; - } -} - -void HTMLTokenizer::parseProcessingInstruction(TokenizerString &src) -{ - char oldchar = 0; - while ( !src.isEmpty() ) - { - unsigned char chbegin = src->latin1(); - if(chbegin == '\'') { - tquote = tquote == SingleQuote ? NoQuote : SingleQuote; - } - else if(chbegin == '\"') { - tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote; - } - // Look for '?>' - // some crappy sites omit the "?" before it, so - // we look for an unquoted '>' instead. (IE compatible) - else if ( chbegin == '>' && ( !tquote || oldchar == '?' ) ) - { - // We got a '?>' sequence - processingInstruction = false; - ++src; - discard=LFDiscard; - return; // Finished parsing comment! - } - ++src; - oldchar = chbegin; - } -} - -void HTMLTokenizer::parseText(TokenizerString &src) -{ - while ( !src.isEmpty() ) - { - // do we need to enlarge the buffer? - checkBuffer(); - - // ascii is okay because we only do ascii comparisons - unsigned char chbegin = src->latin1(); - - if (skipLF && ( chbegin != '\n' )) - { - skipLF = false; - } - - if (skipLF) - { - skipLF = false; - ++src; - } - else if (( chbegin == '\n' ) || ( chbegin == '\r' )) - { - if (chbegin == '\r') - skipLF = true; - - *dest++ = '\n'; - ++src; - } - else { - *dest++ = *src; - ++src; - } - } -} - - -void HTMLTokenizer::parseEntity(TokenizerString &src, TQChar *&dest, bool start) -{ - if( start ) - { - cBufferPos = 0; - entityLen = 0; - Entity = SearchEntity; - } - - while( !src.isEmpty() ) - { - ushort cc = src->unicode(); - switch(Entity) { - case NoEntity: - return; - - break; - case SearchEntity: - if(cc == '#') { - cBuffer[cBufferPos++] = cc; - ++src; - Entity = NumericSearch; - } - else - Entity = EntityName; - - break; - - case NumericSearch: - if(cc == 'x' || cc == 'X') { - cBuffer[cBufferPos++] = cc; - ++src; - Entity = Hexadecimal; - } - else if(cc >= '0' && cc <= '9') - Entity = Decimal; - else - Entity = SearchSemicolon; - - break; - - case Hexadecimal: - { - int uc = EntityChar.unicode(); - int ll = kMin(src.length(), 8); - while(ll--) { - TQChar csrc(src->lower()); - cc = csrc.cell(); - - if(csrc.row() || !((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f'))) { - break; - } - uc = uc*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10)); - cBuffer[cBufferPos++] = cc; - ++src; - } - EntityChar = TQChar(uc); - Entity = SearchSemicolon; - break; - } - case Decimal: - { - int uc = EntityChar.unicode(); - int ll = kMin(src.length(), 9-cBufferPos); - while(ll--) { - cc = src->cell(); - - if(src->row() || !(cc >= '0' && cc <= '9')) { - Entity = SearchSemicolon; - break; - } - - uc = uc * 10 + (cc - '0'); - cBuffer[cBufferPos++] = cc; - ++src; - } - EntityChar = TQChar(uc); - if(cBufferPos == 9) Entity = SearchSemicolon; - break; - } - case EntityName: - { - int ll = kMin(src.length(), 9-cBufferPos); - while(ll--) { - TQChar csrc = *src; - cc = csrc.cell(); - - if(csrc.row() || !((cc >= 'a' && cc <= 'z') || - (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) { - Entity = SearchSemicolon; - break; - } - - cBuffer[cBufferPos++] = cc; - ++src; - - // be IE compatible and interpret even unterminated entities - // outside tags. like "foo  stuff bla". - if ( tag == NoTag ) { - const entity* e = kde_findEntity(cBuffer, cBufferPos); - if ( e && e->code < 256 ) { - EntityChar = e->code; - entityLen = cBufferPos; - } - } - } - if(cBufferPos == 9) Entity = SearchSemicolon; - if(Entity == SearchSemicolon) { - if(cBufferPos > 1) { - const entity *e = kde_findEntity(cBuffer, cBufferPos); - // IE only accepts unterminated entities < 256, - // Gecko accepts them all, but only outside tags - if(e && ( tag == NoTag || e->code < 256 || *src == ';' )) { - EntityChar = e->code; - entityLen = cBufferPos; - } - } - } - break; - } - case SearchSemicolon: -#ifdef TOKEN_DEBUG - kdDebug( 6036 ) << "ENTITY " << EntityChar.unicode() << endl; -#endif - fixUpChar(EntityChar); - - if (*src == ';') - ++src; - - if ( !EntityChar.isNull() ) { - checkBuffer(); - if (entityLen > 0 && entityLen < cBufferPos) { - int rem = cBufferPos - entityLen; - src.prepend( TokenizerString(TQString::fromAscii(cBuffer+entityLen, rem)) ); - } - src.push( EntityChar ); - } else { -#ifdef TOKEN_DEBUG - kdDebug( 6036 ) << "unknown entity!" << endl; -#endif - checkBuffer(11); - // ignore the sequence, add it to the buffer as plaintext - *dest++ = '&'; - for(unsigned int i = 0; i < cBufferPos; i++) - dest[i] = cBuffer[i]; - dest += cBufferPos; - if (pre) - prePos += cBufferPos+1; - } - - Entity = NoEntity; - EntityChar = TQChar::null; - return; - }; - } -} - -void HTMLTokenizer::parseTag(TokenizerString &src) -{ - assert(!Entity ); - checkScriptBuffer( src.length() ); - - while ( !src.isEmpty() ) - { - checkBuffer(); -#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 - uint l = 0; - while(l < src.length() && (src.toString()[l]).latin1() != '>') - l++; - tqDebug("src is now: *%s*, tquote: %d", - src.toString().left(l).latin1(), tquote); -#endif - switch(tag) { - case NoTag: - return; - case TagName: - { -#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 - tqDebug("TagName"); -#endif - if (searchCount > 0) - { - if (*src == commentStart[searchCount]) - { - searchCount++; - if (searchCount == 4) - { -#ifdef TOKEN_DEBUG - kdDebug( 6036 ) << "Found comment" << endl; -#endif - // Found ' - searchCount = 1; // Look for ' " << name << " id = " << currToken.tid << endl; - if (currToken.flat) - kdDebug( 6036 ) << "Token is FLAT!" << endl; - if(!text.isNull()) - kdDebug( 6036 ) << "text: \"" << text << "\"" << endl; - unsigned long l = currToken.attrs ? currToken.attrs->length() : 0; - if(l) { - kdDebug( 6036 ) << "Attributes: " << l << endl; - for (unsigned long i = 0; i < l; ++i) { - NodeImpl::Id tid = currToken.attrs->idAt(i); - DOMString value = currToken.attrs->valueAt(i); - kdDebug( 6036 ) << " " << tid << " " << parser->doc()->getDocument()->getName(NodeImpl::AttributeId, tid).string() - << "=\"" << value.string() << "\"" << endl; - } - } - kdDebug( 6036 ) << endl; -#endif - - // In some cases, parseToken() can cause javascript code to be executed - // (for example, when setting an attribute that causes an event handler - // to be created). So we need to protect against re-entrancy into the parser - m_executingScript++; - - // pass the token over to the parser, the parser DOES NOT delete the token - parser->parseToken(&currToken); - - m_executingScript--; - - if ( currToken.flat && currToken.tid != ID_TEXT && !parser->noSpaces() ) - discard = NoneDiscard; - - currToken.reset(); - if (jsProxy) - jsProxy->setEventHandlerLineno(1); -} - - -HTMLTokenizer::~HTMLTokenizer() -{ - reset(); - delete parser; -} - - -void HTMLTokenizer::enlargeBuffer(int len) -{ - int newsize = kMax(size*2, size+len); - int oldoffs = (dest - buffer); - - buffer = KHTML_REALLOC_QCHAR_VEC(buffer, newsize); - dest = buffer + oldoffs; - size = newsize; -} - -void HTMLTokenizer::enlargeScriptBuffer(int len) -{ - int newsize = kMax(scriptCodeMaxSize*2, scriptCodeMaxSize+len); - scriptCode = KHTML_REALLOC_QCHAR_VEC(scriptCode, newsize); - scriptCodeMaxSize = newsize; -} - -void HTMLTokenizer::notifyFinished(CachedObject* /*finishedObj*/) -{ - assert(!cachedScript.isEmpty()); - bool done = false; - while (!done && cachedScript.head()->isLoaded()) { - - kdDebug( 6036 ) << "Finished loading an external script" << endl; - - CachedScript* cs = cachedScript.dequeue(); - DOMString scriptSource = cs->script(); -#ifdef TOKEN_DEBUG - kdDebug( 6036 ) << "External script is:" << endl << scriptSource.string() << endl; -#endif - setSrc(TokenizerString()); - - // make sure we forget about the script before we execute the new one - // infinite recursion might happen otherwise - TQString cachedScriptUrl( cs->url().string() ); - cs->deref(this); - - scriptExecution( scriptSource.string(), cachedScriptUrl ); - - done = cachedScript.isEmpty(); - - // 'script' is true when we are called synchronously from - // scriptHandler(). In that case scriptHandler() will take care - // of 'scriptOutput'. - if ( !script ) { - while (pendingQueue.count() > 1) { - TokenizerString t = pendingQueue.pop(); - pendingQueue.top().prepend( t ); - } - if (done) { - write(pendingQueue.pop(), false); - } - // we might be deleted at this point, do not - // access any members. - } - } -} - -bool HTMLTokenizer::isWaitingForScripts() const -{ - return cachedScript.count(); -} - -bool HTMLTokenizer::isExecutingScript() const -{ - return (m_executingScript > 0); -} - -void HTMLTokenizer::setSrc(const TokenizerString& source) -{ - lineno += src.lineCount(); - src = source; - src.resetLineCount(); -} - -void HTMLTokenizer::setOnHold(bool _onHold) -{ - if (onHold == _onHold) return; - onHold = _onHold; -} - -- cgit v1.2.3