/* This file is part of the KDE project Copyright (C) 2001 Ewald Snel Copyright (C) 2001 Tomasz Grobelny Copyright (C) 2005 Tommi Rantala This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. */ #include #include "rtfimport_tokenizer.h" RTFTokenizer::RTFTokenizer() { tokenText.resize( 4113 ); fileBuffer.resize( 4096 ); infile = 0L; } /** * Open tokenizer from file. * @param in the input file */ void RTFTokenizer::open( TQFile *in ) { fileBufferPtr = 0L; fileBufferEnd = 0L; infile = in; type = RTFTokenizer::PlainText; } int RTFTokenizer::nextChar() { if ( fileBufferPtr == fileBufferEnd ) { int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); fileBufferPtr = ( uchar* ) fileBuffer.data(); fileBufferEnd = fileBufferPtr; if ( n <= 0 ) return -1; fileBufferEnd = fileBufferPtr + n; } return *fileBufferPtr++; } /** * Reads the next token. */ void RTFTokenizer::next() { int ch; value=0; if (!infile) return; do { int n = nextChar(); if ( n <= 0 ) { ch = '}'; break; } ch = n; } while (ch == '\n' || ch == '\r' && ch != 0); // Skip one byte for prepend '@' to destinations text = (tokenText.data() + 1); hasParam = false; uchar *_text = (uchar *)text; if (ch == '{') type = RTFTokenizer::OpenGroup; else if (ch == '}') type = RTFTokenizer::CloseGroup; else if (ch == '\\') { type = RTFTokenizer::ControlWord; int n = nextChar(); if ( n <= 0 ) { // Return CloseGroup on EOF type = RTFTokenizer::CloseGroup; return; } ch = n; // Type is either control word or control symbol if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { int v = 0; // Read alphabetic string (command) while (_text < ( uchar* )tokenText.data()+tokenText.size()-3 && ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) ) { *_text++ = ch; int n = nextChar(); if ( n <= 0 ) { ch = ' '; break; } ch = n; } // Read numeric parameter (param) bool isneg = (ch == '-'); if (isneg) { int n = nextChar(); if ( n <= 0 ) { type = RTFTokenizer::CloseGroup; return; } ch = n; } while (ch >= '0' && ch <= '9') { v = (10 * v) + ch - '0'; hasParam = true; int n = nextChar(); if ( n <= 0 ) n = ' '; ch = n; } value = isneg ? -v : v; // If delimiter is a space, it's part of the control word if (ch != ' ') { --fileBufferPtr; } *_text = 0; // Just put an end of string for the test, it can then be over-written again if ( !memcmp( tokenText.data()+1, "bin", 4 ) ) { // We have \bin, so we need to read the bytes kdDebug(30515) << "Token:" << tokenText << endl; if (value > 0) { kdDebug(30515) << "\\bin" << value << endl; type = RTFTokenizer::BinaryData; binaryData.resize(value); for (int i=0; i= fileBufferEnd) break; ch = *fileBufferPtr++; } if(fileBufferPtr < fileBufferEnd) --fileBufferPtr; // give back the last char } *_text++ = 0; }