summaryrefslogtreecommitdiffstats
path: root/kturtle/src/lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'kturtle/src/lexer.cpp')
-rw-r--r--kturtle/src/lexer.cpp411
1 files changed, 411 insertions, 0 deletions
diff --git a/kturtle/src/lexer.cpp b/kturtle/src/lexer.cpp
new file mode 100644
index 00000000..1a480641
--- /dev/null
+++ b/kturtle/src/lexer.cpp
@@ -0,0 +1,411 @@
+/*
+ Copyright (C) 2003 by Walter Schreppers
+ Copyright (C) 2004 by Cies Breijs
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of version 2 of the GNU General Public
+ License as published by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <qdom.h>
+#include <qfile.h>
+
+#include <kdebug.h>
+#include <klocale.h>
+
+#include "settings.h"
+
+#include "lexer.h"
+
+
+Lexer::Lexer(QTextIStream& iStream)
+{
+ inputStream = &iStream;
+ row = 1;
+ col = 1;
+ prevCol = 1;
+ translate = new Translate();
+}
+
+
+Token Lexer::lex()
+{
+ skipSpaces(); // skips the white space that it quite likely (indentation) infront of the Token
+
+ Token currentToken;
+ currentToken.type = tokNotSet; // not really needed
+ currentToken.look = "";
+ currentToken.value = 0;
+ currentToken.start.row = row;
+ currentToken.start.col = col;
+
+ QChar currentChar = getChar();
+
+ if ( inputStream->atEnd() )
+ {
+ kdDebug(0)<<"Lexer::lex(), got EOF."<<endl;
+ currentToken.type = tokEOF;
+ currentToken.look = "EOF";
+ ungetChar(currentChar); // unget the currentChar and fix the row/col values
+ return currentToken;
+ }
+
+ if (currentChar == '#')
+ {
+ while ( !inputStream->atEnd() && !(currentChar == '\x0a' || currentChar == '\n') )
+ currentChar = getChar();
+ }
+
+ // if (currentChar.category() == QChar::Separator_Line) somehow doesnt work
+ if (currentChar == '\x0a' || currentChar == '\n')
+ {
+ currentToken.type = tokEOL;
+ currentToken.look = "EOL";
+ }
+ else if (currentChar.isLetter() || currentChar == '[' || currentChar == ']')
+ {
+ ungetChar(currentChar);
+ // sets currentToken.look by reference, and set the currentToken.type to tokUnknown
+ currentToken.type = getWord(currentToken.look);
+ setTokenType(currentToken); // gets the actual tokenType
+ }
+ else if ( currentChar.isNumber() )
+ {
+ ungetChar(currentChar);
+ // set currentToken.value/look by reference, and set the currentToken.type to tokNumber
+ currentToken.type = getNumber(currentToken.value, currentToken.look);
+ }
+ else if (currentChar == '>')
+ {
+ currentChar = getChar();
+ if (currentChar == '=')
+ {
+ currentToken.type = tokGe;
+ currentToken.look = ">=";
+ }
+ else
+ {
+ ungetChar(currentChar);
+ currentToken.type = tokGt;
+ currentToken.look = ">";
+ }
+ }
+ else if (currentChar == '<')
+ {
+ currentChar = getChar();
+ if ( currentChar == '=' )
+ {
+ currentToken.type = tokLe;
+ currentToken.look = "<=";
+ }
+ else
+ {
+ ungetChar(currentChar);
+ currentToken.type = tokLt;
+ currentToken.look = ">";
+ }
+ }
+ else if (currentChar == '!')
+ {
+ currentChar = getChar();
+ if (currentChar == '=')
+ {
+ currentToken.type = tokNe;
+ currentToken.look = "!=";
+ }
+ else
+ {
+ ungetChar(currentChar);
+ currentToken.type = tokNot;
+ currentToken.look = "!";
+ }
+ }
+ else if (currentChar == '=')
+ {
+ currentChar = getChar();
+ if (currentChar == '=')
+ {
+ currentToken.type = tokEq;
+ currentToken.look = "==";
+ }
+ else
+ {
+ ungetChar(currentChar);
+ currentToken.type = tokAssign;
+ currentToken.look = "=";
+ }
+ }
+ else if (currentChar == '(')
+ {
+ currentToken.type = tokBraceOpen;
+ currentToken.look = "(";
+ }
+ else if (currentChar == ')')
+ {
+ currentToken.type = tokBraceClose;
+ currentToken.look = ")";
+ }
+ else if (currentChar == '+')
+ {
+ currentToken.type = tokPlus;
+ currentToken.look = "+";
+ }
+ else if (currentChar == '-')
+ {
+ currentToken.type = tokMinus;
+ currentToken.look = "-";
+ }
+ else if (currentChar == '*')
+ {
+ currentToken.type = tokMul;
+ currentToken.look = "*";
+ }
+ else if (currentChar == '/')
+ {
+ currentToken.type = tokDev;
+ currentToken.look = "/";
+ }
+ else if (currentChar == ',')
+ {
+ currentToken.type = tokComma;
+ currentToken.look = ",";
+ }
+ else if (currentChar == '"')
+ {
+ getString(currentToken);
+ }
+ else
+ {
+ currentToken.type = tokUnknown;
+ currentToken.look = currentChar;
+ }
+
+ currentToken.end.row = row;
+ currentToken.end.col = col;
+ return currentToken;
+}
+
+
+
+// PRIVATEs
+
+QChar Lexer::getChar()
+{
+ QChar c;
+ if ( !putBackChar.isNull() )
+ {
+ c = putBackChar; // use the char that is stored to be put back
+ // kdDebug(0)<<"Lexer::getChar(), restored: '"<<c<<"' @ ("<<row<<", "<<col<<")"<<endl;
+ putBackChar = QChar(); // and set putBackChar back to NULL
+ if (c == '\x0a' || c == '\n')
+ {
+ row++;
+ prevCol = col;
+ col = 1;
+ }
+ else
+ {
+ col++;
+ }
+ }
+ else
+ {
+ *inputStream >> c; // take a QChar of the inputStream
+ // kdDebug(0)<<"Lexer::getChar(): '"<<c<<"' @ ("<<row<<", "<<col<<")"<<endl;
+ if (c == '\x0a' || c == '\n')
+ {
+ row++;
+ prevCol = col;
+ col = 1;
+ }
+ else
+ {
+ col++;
+ }
+ }
+ return c;
+}
+
+void Lexer::ungetChar(QChar c)
+{
+ if (c == '\x0a' || c == '\n')
+ {
+ row--;
+ col = prevCol;
+ }
+ else
+ {
+ col--;
+ }
+ putBackChar = c;
+ // kdDebug(0)<<"Lexer::ungetChar(), saved char: '"<<c<<"' and steped back to ("<<row<<", "<<col<<")"<<endl;
+}
+
+int Lexer::getWord(QString& word)
+{
+ // kdDebug(0)<<"Lexer::getWord()"<<endl;
+ QChar currentChar = getChar();
+ if ( currentChar.isLetter() || currentChar == '[' || currentChar == ']' ) {
+ while ( ( currentChar.isLetterOrNumber() || currentChar == '_' || currentChar == '[' || currentChar == ']' ) && !inputStream->atEnd() )
+ {
+ word += currentChar;
+ currentChar = getChar();
+ }
+ kdDebug(0)<<"Lexer::getWord(), got NAME: '"<<word<<"'"<<endl;
+ ungetChar(currentChar); //read one too much
+ return tokUnknown; // returns tokUnknown, actual token is to be determained later in Lexer::setTokenType
+ }
+ else return tokError;
+}
+
+void Lexer::setTokenType(Token& currentToken)
+{
+ if (currentToken.type == tokUnknown)
+ {
+ // make lowercase copy of the word as it was found in the inputStream
+ QString k = currentToken.look.lower();
+ // if the key is an alias translate that alias to a key
+ if ( !translate->alias2key(k).isEmpty() ) k = translate->alias2key(k);
+
+ if (k == translate->name2key("begin") ) currentToken.type = tokBegin;
+ else if (k == translate->name2key("end") ) currentToken.type = tokEnd;
+ else if (k == translate->name2key("while") ) currentToken.type = tokWhile;
+ else if (k == translate->name2key("if") ) currentToken.type = tokIf;
+ else if (k == translate->name2key("else") ) currentToken.type = tokElse;
+ else if (k == translate->name2key("for") ) currentToken.type = tokFor;
+ else if (k == translate->name2key("to") ) currentToken.type = tokTo;
+ else if (k == translate->name2key("step") ) currentToken.type = tokStep;
+ else if (k == translate->name2key("and") ) currentToken.type = tokAnd;
+ else if (k == translate->name2key("or") ) currentToken.type = tokOr;
+ else if (k == translate->name2key("not") ) currentToken.type = tokNot;
+ else if (k == translate->name2key("return") ) currentToken.type = tokReturn;
+ else if (k == translate->name2key("break") ) currentToken.type = tokBreak;
+ else if (k == translate->name2key("run") ) currentToken.type = tokRun;
+ else if (k == translate->name2key("foreach") ) currentToken.type = tokForEach;
+ else if (k == translate->name2key("in") ) currentToken.type = tokIn;
+
+ else if (k == translate->name2key("learn") ) currentToken.type = tokLearn;
+
+ else if (k == translate->name2key("clear") ) currentToken.type = tokClear;
+ else if (k == translate->name2key("go") ) currentToken.type = tokGo;
+ else if (k == translate->name2key("gox") ) currentToken.type = tokGoX;
+ else if (k == translate->name2key("goy") ) currentToken.type = tokGoY;
+ else if (k == translate->name2key("forward") ) currentToken.type = tokForward;
+ else if (k == translate->name2key("backward") ) currentToken.type = tokBackward;
+ else if (k == translate->name2key("direction") ) currentToken.type = tokDirection;
+ else if (k == translate->name2key("turnleft") ) currentToken.type = tokTurnLeft;
+ else if (k == translate->name2key("turnright") ) currentToken.type = tokTurnRight;
+ else if (k == translate->name2key("center") ) currentToken.type = tokCenter;
+ else if (k == translate->name2key("setpenwidth") ) currentToken.type = tokSetPenWidth;
+ else if (k == translate->name2key("penup") ) currentToken.type = tokPenUp;
+ else if (k == translate->name2key("pendown") ) currentToken.type = tokPenDown;
+ else if (k == translate->name2key("setfgcolor") ) currentToken.type = tokSetFgColor;
+ else if (k == translate->name2key("setbgcolor") ) currentToken.type = tokSetBgColor;
+ else if (k == translate->name2key("resizecanvas") ) currentToken.type = tokResizeCanvas;
+ else if (k == translate->name2key("spriteshow") ) currentToken.type = tokSpriteShow;
+ else if (k == translate->name2key("spritehide") ) currentToken.type = tokSpriteHide;
+ else if (k == translate->name2key("spritepress") ) currentToken.type = tokSpritePress;
+ else if (k == translate->name2key("spritechange") ) currentToken.type = tokSpriteChange;
+
+ else if (k == translate->name2key("do") ) currentToken.type = tokDo; // dummy commands
+
+ else if (k == translate->name2key("message") ) currentToken.type = tokMessage;
+ else if (k == translate->name2key("inputwindow") ) currentToken.type = tokInputWindow;
+ else if (k == translate->name2key("print") ) currentToken.type = tokPrint;
+ else if (k == translate->name2key("fonttype") ) currentToken.type = tokFontType;
+ else if (k == translate->name2key("fontsize") ) currentToken.type = tokFontSize;
+ else if (k == translate->name2key("repeat") ) currentToken.type = tokRepeat;
+ else if (k == translate->name2key("random") ) currentToken.type = tokRandom;
+ else if (k == translate->name2key("wait") ) currentToken.type = tokWait;
+ else if (k == translate->name2key("wrapon") ) currentToken.type = tokWrapOn;
+ else if (k == translate->name2key("wrapoff") ) currentToken.type = tokWrapOff;
+ else if (k == translate->name2key("reset") ) currentToken.type = tokReset;
+ else
+ {
+ kdDebug(0)<<"Lexer::setTokenType, found UNKNOWN word @ ("<<currentToken.start.row<<", "<<currentToken.start.col<<"), can be anything"<<endl;
+ // t.type = tokUnknown; is allready
+ }
+
+ kdDebug(0)<<"Lexer::setTokenType, found tok-number: '"<<currentToken.type<<"' with the key: '"<<k<<"' @ ("<<currentToken.start.row<<", "<<currentToken.start.col<<")"<<endl;
+ }
+}
+
+
+void Lexer::skipSpaces()
+{
+ // kdDebug(0)<<"Lexer::skipSpaces(), skipping SPACES."<<endl;
+ QChar currentChar = getChar();
+ // when the Separator_* groups can be identified in the QChar thing would be easier
+ while ( !inputStream->atEnd() && ( currentChar.isSpace() && !(currentChar == '\x0a' || currentChar == '\n') ) )
+ {
+ currentChar = getChar();
+ }
+ ungetChar(currentChar); // unget the tokEOL we likely just found
+}
+
+
+int Lexer::getNumber(Value& num, QString& look)
+{
+ // by reference the value (Value) and look part are set
+ // kdDebug(0)<<"Lexer::getNumber()"<<endl;
+ bool hasPoint = false;
+ QChar currentChar = getChar();
+ if ( currentChar.isNumber() )
+ {
+ while ( ( currentChar.isNumber() || (currentChar == '.' && !hasPoint) ) && !inputStream->atEnd() )
+ {
+ if (currentChar == '.')
+ {
+ hasPoint = true;
+ }
+ look += currentChar;
+ currentChar = getChar();
+ }
+ ungetChar(currentChar); //read one too much
+ num.setNumber( look.toDouble() );
+ kdDebug(0)<<"Lexer::getNumber(), got NUMBER: '"<<num.Number()<<"'"<<endl;
+ return tokNumber;
+ }
+ else return tokError;
+}
+
+void Lexer::getString(Token& currentToken)
+{
+ QString str = "\""; // start with a " cauz it just got lost
+ QChar currentChar = QChar(); // start empty
+ while ( currentChar != '"' && !(currentChar == '\x0a' || currentChar == '\n') && !inputStream->atEnd() )
+ {
+ currentChar = getChar();
+ if (currentChar == '\\') // escape sequence
+ {
+ currentChar = getChar();
+ switch (currentChar)
+ {
+ case 'n': str += '\n'; break;
+ case 't': str += '\t'; break;
+ case 'f': str += '\f'; break;
+ case '"': str += '"'; break;
+ }
+ }
+ else if (currentChar == '\x0a' || currentChar == '\n') // if the user forgot to delimit the string
+ {
+ ungetChar(currentChar);
+ break;
+ }
+ else str += currentChar;
+ }
+ currentToken.type = tokString;
+ currentToken.look = str;
+
+ kdDebug(0)<<"Lexer::getStringConstant, got STRINGCONSTANT: "<<currentToken.look<<"'"<<endl;
+}
+