summaryrefslogtreecommitdiffstats
path: root/libksieve/tests/lexertest.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libksieve/tests/lexertest.cpp')
-rw-r--r--libksieve/tests/lexertest.cpp484
1 files changed, 484 insertions, 0 deletions
diff --git a/libksieve/tests/lexertest.cpp b/libksieve/tests/lexertest.cpp
new file mode 100644
index 00000000..46149950
--- /dev/null
+++ b/libksieve/tests/lexertest.cpp
@@ -0,0 +1,484 @@
+/* -*- c++ -*-
+ tests/lexertest.cpp
+
+ This file is part of the testsuite of KSieve,
+ the KDE internet mail/usenet news message filtering library.
+ Copyright (c) 2003 Marc Mutz <mutz@kde.org>
+
+ KSieve is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License, version 2, as
+ published by the Free Software Foundation.
+
+ KSieve is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+ In addition, as a special exception, the copyright holders give
+ permission to link the code of this program with any edition of
+ the Qt library by Trolltech AS, Norway (or with modified versions
+ of Qt that use the same license as Qt), and distribute linked
+ combinations including the two. You must obey the GNU General
+ Public License in all respects for all of the code used other than
+ Qt. If you modify this file, you may extend this exception to
+ your version of the file, but you are not obligated to do so. If
+ you do not wish to do so, delete this exception statement from
+ your version.
+*/
+#include <config.h>
+#include <ksieve/lexer.h>
+using KSieve::Lexer;
+
+#include <ksieve/error.h>
+using KSieve::Error;
+
+#include <qcstring.h> // qstrlen
+#include <qstring.h>
+
+#include <iostream>
+using std::cout;
+using std::cerr;
+using std::endl;
+
+static const char * token2string( Lexer::Token t ) {
+ switch ( t ) {
+#define CASE(x) case Lexer::x: return #x
+ CASE( None );
+ CASE( HashComment );
+ CASE( BracketComment );
+ CASE( Identifier );
+ CASE( Tag );
+ CASE( Number );
+ CASE( MultiLineString );
+ CASE( QuotedString );
+ CASE( Special );
+ CASE( LineFeeds );
+ }
+ return "";
+#undef CASE
+}
+
+struct TestCase {
+ const char * name;
+ const char * string;
+ struct {
+ Lexer::Token token;
+ const char * result;
+ } expected[16]; // end with { None, 0 }
+ Error::Type expectedError;
+ int errorLine, errorCol;
+};
+
+static const TestCase testcases[] = {
+ //
+ // Whitespace:
+ //
+
+ { "Null script", 0,
+ { { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+
+ { "Empty script", "",
+ { { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+
+ { "Whitespace-only script", " \t\n\t \n",
+ { { Lexer::LineFeeds, "2" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+
+ { "Lone CR", "\r",
+ { { Lexer::None, 0 } },
+ Error::CRWithoutLF, 0, 1
+ },
+
+ { "CR+Space", "\r ",
+ { { Lexer::None, 0 } },
+ Error::CRWithoutLF, 0, 1
+ },
+
+ { "CRLF alone", "\r\n",
+ { { Lexer::LineFeeds, "1" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+
+ //
+ // hash comments:
+ //
+
+ { "Basic hash comment (no newline)", "#comment",
+ { { Lexer::HashComment, "comment" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+
+ { "Basic hash comment (LF)", "#comment\n",
+ { { Lexer::HashComment, "comment" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+
+ { "Basic hash comment (CRLF)", "#comment\r\n",
+ { { Lexer::HashComment, "comment" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+
+ { "Basic hash comment (CR)", "#comment\r",
+ { { Lexer::HashComment, 0 } },
+ Error::CRWithoutLF, 0, 9
+ },
+
+ { "Non-UTF-8 in hash comment", "#\xA9 copyright",
+ { { Lexer::HashComment, 0 } },
+ Error::InvalidUTF8, 0, 12
+ },
+
+ //
+ // bracket comments:
+ //
+
+ { "Basic bracket comment", "/* comment */",
+ { { Lexer::BracketComment, " comment " }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+
+ { "Basic bracket comment - missing trailing slash", "/* comment *",
+ { { Lexer::BracketComment, 0 } },
+ Error::UnfinishedBracketComment, 0, 0
+ },
+
+ { "Basic bracket comment - missing trailing asterisk + slash", "/* comment ",
+ { { Lexer::BracketComment, 0 } },
+ Error::UnfinishedBracketComment, 0, 0
+ },
+
+ { "Basic bracket comment - missing leading slash", "* comment */",
+ { { Lexer::None, 0 } },
+ Error::IllegalCharacter, 0, 0
+ },
+
+ { "Basic bracket comment - missing leading asterisk + slash", "comment */",
+ { { Lexer::Identifier, "comment" }, { Lexer::None, 0 } },
+ Error::IllegalCharacter, 0, 8
+ },
+
+ { "Basic multiline bracket comment (LF)", "/* comment\ncomment */",
+ { { Lexer::BracketComment, " comment\ncomment " }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+
+ { "Basic multiline bracket comment (CRLF)", "/* comment\r\ncomment */",
+ { { Lexer::BracketComment, " comment\ncomment " }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+
+ { "Basic multiline bracket comment (CR)", "/* comment\rcomment */",
+ { { Lexer::BracketComment, 0 } },
+ Error::CRWithoutLF, 0, 11
+ },
+
+ { "Non-UTF-8 in bracket comment", "/*\xA9 copyright*/",
+ { { Lexer::BracketComment, 0 } },
+ Error::InvalidUTF8, 0, 14
+ },
+
+ //
+ // numbers:
+ //
+ { "Basic number 1", "1",
+ { { Lexer::Number, "1" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Basic number 01", "01",
+ { { Lexer::Number, "01" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Qualified number 1k", "1k",
+ { { Lexer::Number, "1k" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Qualified number 1M", "1M",
+ { { Lexer::Number, "1M" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Qualified number 1G", "1G",
+ { { Lexer::Number, "1G" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ //
+ // identifiers:
+ //
+ { "Basic identifier \"id\"", "id",
+ { { Lexer::Identifier, "id" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Basic identifier \"_id\"", "_id",
+ { { Lexer::Identifier, "_id" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ //
+ // tags:
+ //
+ { "Basic tag \":tag\"", ":tag",
+ { { Lexer::Tag, "tag" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Basic tag \":_tag\"", ":_tag",
+ { { Lexer::Tag, "_tag" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ //
+ // specials:
+ //
+ { "Basic special \"{}[]();,\"", "{}[]();,",
+ { { Lexer::Special, "{" }, { Lexer::Special, "}" },
+ { Lexer::Special, "[" }, { Lexer::Special, "]" },
+ { Lexer::Special, "(" }, { Lexer::Special, ")" },
+ { Lexer::Special, ";" }, { Lexer::Special, "," }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ //
+ // quoted-string:
+ //
+ { "Basic quoted string \"foo\"", "\"foo\"",
+ { { Lexer::QuotedString, "foo" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Basic quoted string, UTF-8", "\"foo\xC3\xB1" "foo\"", // fooäfoo
+ { { Lexer::QuotedString, "foo\xC3\xB1" "foo" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Quoted string, escaped '\"'", "\"foo\\\"bar\"",
+ { { Lexer::QuotedString, "foo\"bar" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Quoted string, escaped '\\'", "\"foo\\\\bar\"",
+ { { Lexer::QuotedString, "foo\\bar" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Quoted string, excessive escapes", "\"\\fo\\o\"",
+ { { Lexer::QuotedString, "foo" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Quoted string across lines (LF)", "\"foo\nbar\"",
+ { { Lexer::QuotedString, "foo\nbar" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Quoted string across lines (CRLF)", "\"foo\r\nbar\"",
+ { { Lexer::QuotedString, "foo\nbar" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ //
+ // multiline strings:
+ //
+ { "Basic multiline string I (LF)", "text:\nfoo\n.",
+ { { Lexer::MultiLineString, "foo" /* "foo\n" ? */ }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Basic multiline string I (CRLF)", "text:\r\nfoo\r\n.",
+ { { Lexer::MultiLineString, "foo" /* "foo\n" ? */ }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Basic multiline string II (LF)", "text:\nfoo\n.\n",
+ { { Lexer::MultiLineString, "foo" /* "foo\n" ? */ }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Basic multiline string II (CRLF)", "text:\r\nfoo\r\n.\r\n",
+ { { Lexer::MultiLineString, "foo" /* "foo\n" ? */ }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Dotstuffed multiline string (LF)", "text:\n..foo\n.",
+ { { Lexer::MultiLineString, ".foo" /* ".foo\n" ? */ }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Dotstuffed multiline string (CRLF)", "text:\r\n..foo\r\n.",
+ { { Lexer::MultiLineString, ".foo" /* ".foo\n" ? */ }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Incompletely dotstuffed multiline string (LF)", "text:\n.foo\n.",
+ { { Lexer::MultiLineString, ".foo" /* ".foo\n" ? */ }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Incompletely dotstuffed multiline string (CRLF)", "text:\r\n.foo\r\n.",
+ { { Lexer::MultiLineString, ".foo" /* ".foo\n" ? */ }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+ { "Mutiline with a line with only one '.'","text:\r\nfoo\r\n..\r\nbar\r\n.",
+ { { Lexer::MultiLineString, "foo\n.\nbar" }, { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+
+
+ //
+ // Errors in single tokens:
+ //
+
+ //
+ // numbers:
+ //
+ { "Number, unknown qualifier", "100f",
+ { { Lexer::Number, "100" } },
+ Error::UnexpectedCharacter, 0, 3
+ },
+ { "Negative number", "-100",
+ { { Lexer::None, 0 } },
+ Error::IllegalCharacter, 0, 0
+ },
+ //
+ // identifiers:
+ //
+ { "Identifier, leading digits", "0id",
+ { { Lexer::Number, "0" } },
+ Error::UnexpectedCharacter, 0, 1
+ },
+ { "Identifier, embedded umlaut", "idäid",
+ { { Lexer::Identifier, "id" } },
+ Error::IllegalCharacter, 0, 2
+ },
+ //
+ // tags:
+ //
+ { "Lone ':' (at end)", ":",
+ { { Lexer::Tag, 0 } },
+ Error::UnexpectedCharacter, 0, 0
+ },
+ { "Lone ':' (in stream)", ": ",
+ { { Lexer::Tag, 0 } },
+ Error::UnexpectedCharacter, 0, 1
+ },
+ { "Tag, leading digits", ":0tag",
+ { { Lexer::Tag, 0 } },
+ Error::NoLeadingDigits, 0, 1
+ },
+ { "Tag, embedded umlaut", ":tagätag",
+ { { Lexer::Tag, "tag" } },
+ Error::IllegalCharacter, 0, 4
+ },
+ //
+ // specials: (none)
+ // quoted string:
+ //
+ { "Premature end of quoted string", "\"foo",
+ { { Lexer::QuotedString, "foo" } },
+ Error::PrematureEndOfQuotedString, 0, 0
+ },
+ { "Invalid UTF-8 in quoted string", "\"foo\xC0\xA0" "foo\"",
+ { { Lexer::QuotedString, "foo" } },
+ Error::InvalidUTF8, 0, 4
+ },
+
+ //
+ // Whitespace / token separation: valid
+ //
+
+ { "Two identifiers with linebreaks", "foo\nbar\n",
+ { { Lexer::Identifier, "foo" },
+ { Lexer::LineFeeds, "1" },
+ { Lexer::Identifier, "bar" },
+ { Lexer::LineFeeds, "1" },
+ { Lexer::None, 0 } },
+ Error::None, 0, 0
+ },
+
+ //
+ // Whitespace / token separation: invalid
+ //
+
+};
+
+static const int numTestCases = sizeof testcases / sizeof *testcases ;
+
+int main( int argc, char * argv[] ) {
+
+ if ( argc == 2 ) { // manual test
+
+ const char * scursor = argv[1];
+ const char * const send = argv[1] + qstrlen( argv[1] );
+
+ Lexer lexer( scursor, send );
+
+ cout << "Begin" << endl;
+ while ( !lexer.atEnd() ) {
+ QString result;
+ Lexer::Token token = lexer.nextToken( result );
+ if ( lexer.error() ) {
+ cout << "Error " << token2string( token ) << ": \""
+ << lexer.error().asString().latin1() << "\" at ("
+ << lexer.error().line() << "," << lexer.error().column()
+ << ")" << endl;
+ break;
+ } else
+ cout << "Got " << token2string( token ) << ": \""
+ << result.utf8().data() << "\" at ("
+ << lexer.line() << "," << lexer.column() << ")" << endl;
+ }
+ cout << "End" << endl;
+
+ } else if ( argc == 1 ) { // automated test
+ bool success = true;
+ for ( int i = 0 ; i < numTestCases ; ++i ) {
+ bool ok = true;
+ const TestCase & t = testcases[i];
+ const char * const send = t.string + qstrlen( t.string );
+ Lexer lexer( t.string, send, Lexer::IncludeComments );
+ cerr << t.name << ":";
+ for ( int j = 0 ; !lexer.atEnd() ; ++j ) {
+ QString result;
+ Lexer::Token token = lexer.nextToken( result );
+ Error error = lexer.error();
+ if ( t.expected[j].token != token ) {
+ ok = false;
+ cerr << " expected token " << token2string( t.expected[j].token )
+ << ", got " << token2string( token );
+ }
+ if ( QString::fromUtf8( t.expected[j].result ) != result ) {
+ ok = false;
+ if ( t.expected[j].result )
+ cerr << " expected string \"" << t.expected[j].result << "\"";
+ else
+ cerr << " expected null string";
+ if ( !result.utf8().isNull() )
+ cerr << ", got \"" << result.utf8().data() << "\"";
+ else
+ cerr << ", got null string";
+ }
+ if ( error && error.type() != t.expectedError ) {
+ ok = false;
+ cerr << " expected error #" << (int)t.expectedError
+ << ", got #" << (int)error.type();
+ }
+ if ( error && ( error.line() != t.errorLine || error.column() != t.errorCol ) ) {
+ ok = false;
+ cerr << " expected position (" << t.errorLine << "," << t.errorCol
+ << "), got (" << error.line() << "," << error.column() << ")";
+ }
+ if ( error )
+ goto ErrorOut;
+ if ( t.expected[j].token == Lexer::None &&
+ t.expected[j].result == 0 )
+ break;
+ }
+ if ( !lexer.atEnd() ) {
+ ok = false;
+ cerr << " premature end of expected token list";
+ }
+ ErrorOut:
+ if ( ok )
+ cerr << " ok";
+ cerr << endl;
+ if ( !ok )
+ success = false;
+ }
+ if ( !success )
+ return 1;
+ } else { // usage error
+ cerr << "usage: lexertest [ <string> ]" << endl;
+ exit( 1 );
+ }
+
+ return 0;
+}