diff options
Diffstat (limited to 'filters/karbon/ai/ailexer.cpp')
-rw-r--r-- | filters/karbon/ai/ailexer.cpp | 514 |
1 files changed, 514 insertions, 0 deletions
diff --git a/filters/karbon/ai/ailexer.cpp b/filters/karbon/ai/ailexer.cpp new file mode 100644 index 000000000..352b8f48a --- /dev/null +++ b/filters/karbon/ai/ailexer.cpp @@ -0,0 +1,514 @@ +/* This file is part of the KDE project + Copyright (C) 2002, Dirk Schönberger <dirk.schoenberger@sz-online.de> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. +*/ + +#include <ctype.h> +#include <stdlib.h> +#include <tqstringlist.h> +#include "ailexer.h" + +#define CATEGORY_WHITESPACE -1 +#define CATEGORY_ALPHA -2 +#define CATEGORY_DIGIT -3 +#define CATEGORY_SPECIAL -4 +#define CATEGORY_LETTERHEX -5 +#define CATEGORY_INTTOOLONG -6 + +#define CATEGORY_ANY -127 + +#define MAX_INTLEN 9 +#define MIN_HEXCHARS 6 + +#define STOP 0 + +int iswhitespace(char c){ + return (c==' ')||(c=='\n')||(c=='\t')||(c=='\r'); +} + +int isSpecial(char c){ + return (c=='*')||(c=='_')||(c=='?')||(c=='~')||(c=='-')||(c=='^')||(c=='`')||(c=='!')||(c=='.')||(c=='@')||(c=='&')||(c=='$')||(c=='='); +} + +int isletterhex(char c){ + return (c=='A')||(c=='B')||(c=='C')||(c=='D')||(c=='E')||(c=='F'); +} + +const char*statetoa (State state){ + switch (state) + { + case State_Comment : return "comment"; + case State_Integer : return "integer"; + case State_Float : return "float"; + case State_String : return "string"; + case State_Token : return "token"; + case State_Reference : return "reference"; + case State_Start : return "start"; + case State_BlockStart : return "block start"; + case State_BlockEnd : return "block end"; + case State_ArrayStart : return "array start"; + case State_ArrayEnd : return "array end"; + case State_Byte : return "byte"; + case State_ByteArray : return "byte array"; + case State_StringEncodedChar : return "encoded char (string)"; + case State_CommentEncodedChar : return "encoded char (comment)"; + case State_ByteArray2 : return "byte array (mode 2)"; + default : return "unknown"; + } +} + +typedef struct { + State oldState; + signed char c; + State newState; + Action action; +} Transition; + +static Transition transitions[] = { + { State_Comment, '\n', State_Start, Action_Output}, + { State_Comment, '\r', State_Start, Action_Output}, + { State_Comment, '\\', State_CommentEncodedChar, Action_InitTemp}, + { State_Comment, CATEGORY_ANY, State_Comment, Action_Copy}, + { State_Integer, CATEGORY_DIGIT, State_Integer, Action_Copy}, + { State_Integer, CATEGORY_WHITESPACE, State_Start, Action_Output}, + { State_Integer, '.', State_Float, Action_Copy}, + { State_Integer, ']', State_Start, Action_OutputUnget}, + { State_Integer, '}', State_Start, Action_OutputUnget}, + { State_Integer, '#', State_Byte, Action_Copy }, + { State_Integer, '/', State_Start, Action_OutputUnget }, + { State_Integer, '{', State_Start, Action_OutputUnget }, + { State_Integer, '%', State_Start, Action_OutputUnget }, + { State_Integer, CATEGORY_LETTERHEX, State_ByteArray2, Action_Copy }, + { State_Integer, CATEGORY_INTTOOLONG, State_ByteArray2, Action_Copy }, + { State_Integer, CATEGORY_ANY, State_Start, Action_Abort}, + { State_Float, CATEGORY_DIGIT, State_Float, Action_Copy}, + { State_Float, CATEGORY_WHITESPACE, State_Start, Action_Output}, + { State_Float, ']', State_Start, Action_OutputUnget}, + { State_Float, '}', State_Start, Action_OutputUnget}, + { State_Float, CATEGORY_ANY, State_Start, Action_Abort}, + { State_Token, CATEGORY_ALPHA, State_Token, Action_Copy}, + { State_Token, CATEGORY_DIGIT, State_Token, Action_Copy}, + { State_Token, CATEGORY_SPECIAL, State_Token, Action_Copy}, + { State_Token, '}', State_Start, Action_OutputUnget}, + { State_Token, ']', State_Start, Action_OutputUnget}, + { State_Token, '{', State_BlockStart, Action_Output}, + { State_Token, '}', State_BlockEnd, Action_Output}, + { State_Token, '/', State_Start, Action_OutputUnget}, + { State_Token, CATEGORY_WHITESPACE, State_Start, Action_Output}, + { State_Token, CATEGORY_ANY, State_Start, Action_Abort}, + { State_String, ')', State_Start, Action_Output}, + { State_String, '\\', State_StringEncodedChar, Action_InitTemp}, + { State_String, CATEGORY_ANY, State_String, Action_Copy}, +// { State_Array, CATEGORY_ALPHA, State_Array, Action_Copy}, +// { State_Array, CATEGORY_DIGIT, State_Array, Action_Copy}, +// { State_Array, ' ', State_Array, Action_Copy}, + { State_BlockStart, CATEGORY_ANY, State_Start, Action_OutputUnget }, + { State_BlockEnd, CATEGORY_ANY, State_Start, Action_OutputUnget }, + { State_ArrayStart, CATEGORY_ANY, State_Start, Action_OutputUnget }, + { State_ArrayEnd, CATEGORY_ANY, State_Start, Action_OutputUnget }, + { State_Reference, '#', State_Reference, Action_Copy }, + { State_Reference, CATEGORY_ALPHA, State_Reference, Action_Copy }, + { State_Reference, CATEGORY_DIGIT, State_Reference, Action_Copy }, + { State_Reference, CATEGORY_SPECIAL, State_Reference, Action_Copy }, + { State_Reference, CATEGORY_ANY, State_Start, Action_OutputUnget }, + { State_Byte, '/', State_Start, Action_OutputUnget }, + { State_Byte, CATEGORY_DIGIT, State_Byte, Action_Copy}, + { State_Byte, CATEGORY_ALPHA, State_Byte, Action_Copy}, + { State_Byte, CATEGORY_WHITESPACE, State_Start, Action_Output}, + { State_ByteArray, '>', State_Start, Action_Output }, + { State_ByteArray, CATEGORY_ALPHA, State_ByteArray, Action_Copy }, + { State_ByteArray, CATEGORY_DIGIT, State_ByteArray, Action_Copy }, + { State_ByteArray, CATEGORY_WHITESPACE, State_ByteArray, Action_Ignore }, + { State_ByteArray, CATEGORY_ANY, State_Start, Action_Abort }, + { State_StringEncodedChar, '\\', State_String, Action_Copy}, + { State_StringEncodedChar, CATEGORY_DIGIT, State_StringEncodedChar, Action_CopyTemp}, + { State_StringEncodedChar, CATEGORY_ANY, State_String, Action_DecodeUnget}, + { State_CommentEncodedChar, '\\', State_Comment, Action_Copy}, + { State_CommentEncodedChar, CATEGORY_DIGIT, State_CommentEncodedChar, Action_CopyTemp}, + { State_CommentEncodedChar, CATEGORY_ANY, State_Comment, Action_DecodeUnget}, + { State_ByteArray2, '\n', State_Start, Action_Output}, + { State_ByteArray2, '\r', State_Start, Action_Output}, + { State_ByteArray2, '}', State_Start, Action_ByteArraySpecial}, + { State_ByteArray2, CATEGORY_WHITESPACE, State_Start, Action_Output}, + { State_ByteArray2, CATEGORY_DIGIT, State_ByteArray2, Action_Copy}, + { State_ByteArray2, CATEGORY_LETTERHEX, State_ByteArray2, Action_Copy}, + { State_ByteArray2, CATEGORY_ALPHA, State_Token, Action_Copy}, + { State_ByteArray2, CATEGORY_ANY, State_Start, Action_Abort}, + { State_Start, '%', State_Comment, Action_Ignore}, + { State_Start, CATEGORY_DIGIT, State_Integer, Action_Copy}, + { State_Start, '-', State_Integer, Action_Copy}, + { State_Start, '+', State_Integer, Action_Copy}, + { State_Start, '.', State_Float, Action_Copy}, + { State_Start, '/', State_Reference, Action_Ignore }, + { State_Start, '(', State_String, Action_Ignore}, + { State_Start, '{', State_BlockStart, Action_Copy}, + { State_Start, '}', State_BlockEnd, Action_Copy}, + { State_Start, '[', State_ArrayStart, Action_Copy}, + { State_Start, ']', State_ArrayEnd, Action_Copy}, + { State_Start, '<', State_ByteArray, Action_Ignore}, + { State_Start, CATEGORY_ALPHA, State_Token, Action_Copy}, + { State_Start, CATEGORY_WHITESPACE, State_Start, Action_Output}, + { State_Start, CATEGORY_SPECIAL, State_Token, Action_Copy}, + { State_Start, CATEGORY_LETTERHEX, State_ByteArray2, Action_Copy}, + { State_Start, CATEGORY_ANY, State_Start, Action_Abort}, + { State_Start, STOP, State_Start, Action_Abort} +}; + +AILexer::AILexer(){ +} +AILexer::~AILexer(){ +} + +bool AILexer::parse (TQIODevice& fin){ + char c; + + m_buffer.clear(); + m_curState = State_Start; + + parsingStarted(); + + while (!fin.atEnd()) + { + c = fin.getch (); + +// tqDebug ("got %c", c); + + State newState; + Action action; + + nextStep (c, &newState, &action); + + switch (action) + { + case Action_Copy : + m_buffer.append (c); + break; + case Action_CopyOutput : + m_buffer.append (c); + doOutput(); + break; + case Action_Output : + doOutput(); + break; + case Action_OutputUnget : + doOutput(); + fin.ungetch(c); + break; + case Action_Ignore : + /* ignore */ + break; + case Action_Abort : + tqWarning ( "state %s / %s char %c (%d)" , statetoa(m_curState), statetoa(newState), c, c ); + parsingAborted(); + return false; + break; + case Action_InitTemp : + m_temp.clear(); + break; + case Action_CopyTemp : + m_temp.append (c); + break; + case Action_DecodeUnget : + m_buffer.append (decode()); + fin.ungetch(c); + break; + // in Postscript Quelltext: Kombination F} + case Action_ByteArraySpecial : + m_curState = State_Token; + doOutput(); + fin.ungetch(c); + break; + default : + tqWarning ( "unknown action: %d ", action); + } + + m_curState = newState; + } + + parsingFinished(); + return true; +} + +void AILexer::doOutput () +{ + if (m_buffer.length() == 0) return; + switch (m_curState) + { + case State_Comment : + gotComment (m_buffer.latin1()); + break; + case State_Integer : + gotIntValue (m_buffer.toInt()); + break; + case State_Float : + gotDoubleValue (m_buffer.toFloat()); + break; + case State_String : + gotStringValue (m_buffer.latin1()); + break; + case State_Token : + gotToken (m_buffer.latin1()); + break; + case State_Reference : + gotReference (m_buffer.latin1()); + break; + case State_BlockStart : + gotBlockStart (); + break; + case State_BlockEnd : + gotBlockEnd (); + break; + case State_Start : + break; + case State_ArrayStart : + gotArrayStart (); + break; + case State_ArrayEnd : + gotArrayEnd (); + break; + case State_Byte : + gotByte (getByte()); + break; + case State_ByteArray : + case State_ByteArray2 : + doHandleByteArray (); + break; + default: + tqWarning ( "unknown state: %d", m_curState ); + } + + m_buffer.clear(); +} + +void AILexer::gotComment (const char *value) { + tqDebug ( "gotComment: %s ", value ); +} + +void AILexer::gotIntValue (int value) { + tqDebug ( "gotInt: %d ", value ); +} + +void AILexer::gotDoubleValue (double value) { + tqDebug ( "gotDouble: %f ", value ); +} + +void AILexer::gotStringValue (const char *value) { + tqDebug ( "gotString: %s ", value ); +} + +void AILexer::gotToken (const char *value) { + tqDebug ( "gotToken: %s ", value ); +} + +void AILexer::gotReference (const char *value) { + tqDebug ( "gotReference: %s ", value ); +} + +void AILexer::gotBlockStart (){ + tqDebug ( "gotBlockStart" ); +} + +void AILexer::gotBlockEnd (){ + tqDebug ( "gotBlockEnd" ); +} + +void AILexer::gotArrayStart (){ + tqDebug ( "gotArrayStart" ); +} + +void AILexer::gotArrayEnd (){ + tqDebug ( "gotArrayEnd" ); +} + +void AILexer::parsingStarted() { + tqDebug ( "parsing started" ); +} + +void AILexer::parsingFinished() { + tqDebug ( "parsing finished" ); +} + +void AILexer::parsingAborted() { + tqDebug ( "parsing aborted" ); +} + +void AILexer::gotByte (uchar value) { + tqDebug ( "got byte %d" , value ); +} + +void AILexer::gotByteArray (const TQByteArray &data) { + tqDebug ( "got byte array" ); +/* for ( uint i = 0; i < data.size(); i++ ) + { + uchar value = data[i]; + tqDebug( "%d: %x", i, value ); + } + tqDebug ( "/byte array" ); */ + +} + + +void AILexer::nextStep (char c, State *newState, Action *newAction) { + int i=0; + + while (true) { + Transition trans = transitions[i]; + + if (trans.c == STOP) { + *newState = trans.newState; + *newAction = trans.action; + return; + } + + bool found = false; + + if (trans.oldState == m_curState) { + switch (trans.c) { + case CATEGORY_WHITESPACE : found = isspace(c); break; + case CATEGORY_ALPHA : found = isalpha(c); break; + case CATEGORY_DIGIT : found = isdigit(c); break; + case CATEGORY_SPECIAL : found = isSpecial(c); break; + case CATEGORY_LETTERHEX : found = isletterhex(c); break; + case CATEGORY_INTTOOLONG : found = m_buffer.length() > MAX_INTLEN; break; + case CATEGORY_ANY : found = true; break; + default : found = (trans.c == c); + } + + if (found) { + *newState = trans.newState; + *newAction = trans.action; + + return; + } + } + + + i++; + } +} + +void AILexer::doHandleByteArray () +{ + // Special case - too short + if (m_buffer.length () < MIN_HEXCHARS) + { + gotToken (m_buffer.latin1()); + return; + } + + uint strIdx = 0; + uint arrayIdx = 0; + + TQByteArray data (m_buffer.length() >> 1); + + while (strIdx < m_buffer.length()) + { + const TQString &item = m_buffer.mid (strIdx, 2); + uchar val = item.toShort(NULL, 16); + data[arrayIdx] = val; + strIdx += 2; + arrayIdx++; + } + + gotByteArray (data); +} + +uchar AILexer::getByte() +{ +// tqDebug ("convert string to byte (%s)", m_buffer.latin1()); + + TQStringList list = TQStringList::split ("#", m_buffer.toString()); + int radix = list[0].toShort(); + uchar value = list[1].toShort (NULL, radix); + + return value; +} + +uchar AILexer::decode() +{ + uchar value = m_temp.toString().toShort(NULL, 8); +// tqDebug ("got encoded char %c",value); + return value; +} + +/* StringBuffer implementation */ + +int initialSize = 20; +int addSize = 10; + +StringBuffer::StringBuffer () { + m_buffer = (char*)calloc (initialSize, sizeof(char)); + m_length = 0; + m_capacity = initialSize; +} + +StringBuffer::~StringBuffer (){ + free(m_buffer); +} + +void StringBuffer::append (char c){ + ensureCapacity(m_length + 1); + m_buffer[m_length] = c; + m_length++; +} + +void StringBuffer::clear(){ + for (uint i=0; i<m_length; i++) m_buffer[i] = '\0'; + m_length = 0; +} + +TQString StringBuffer::toString() const { + TQString ret(m_buffer); + return ret; +} + +void StringBuffer::ensureCapacity (int p_capacity) { + if (m_capacity >= p_capacity) return; + + int newSize = m_capacity + addSize; + if (p_capacity > newSize) newSize = p_capacity; + + char* oldBuffer = m_buffer; + char *newBuffer = (char*)calloc (newSize, sizeof(char)); + strcpy (newBuffer, m_buffer); + free(oldBuffer); + m_buffer = newBuffer; + m_capacity = newSize; +} + +uint StringBuffer::length() { + return m_length; +} + +double StringBuffer::toFloat() { + TQString data = toString(); + return data.toFloat(); +} + +int StringBuffer::toInt() { + TQString data = toString(); + return data.toInt(); +} + +const char *StringBuffer::latin1() { + return m_buffer; +} + +TQString StringBuffer::mid( uint index, uint len) const { + TQString data = toString(); + return data.mid(index,len); +} |