From 460c52653ab0dcca6f19a4f492ed2c5e4e963ab0 Mon Sep 17 00:00:00 2001 From: toma Date: Wed, 25 Nov 2009 17:56:58 +0000 Subject: Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features. BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdepim@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da --- indexlib/tests/tokenizer-test.cpp | 69 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 indexlib/tests/tokenizer-test.cpp (limited to 'indexlib/tests/tokenizer-test.cpp') diff --git a/indexlib/tests/tokenizer-test.cpp b/indexlib/tests/tokenizer-test.cpp new file mode 100644 index 00000000..372859d9 --- /dev/null +++ b/indexlib/tests/tokenizer-test.cpp @@ -0,0 +1,69 @@ +#include +#include "tokenizer.h" +#include + +using namespace ::boost::unit_test; +namespace indexlib { namespace tests { namespace tokenizer_test { + +using indexlib::detail::tokenizer; +using indexlib::detail::get_tokenizer; + +void simple() { + std::auto_ptr tokenizer = get_tokenizer( "latin-1:european" ); + assert(tokenizer.get()); + std::vector tokens = tokenizer->string_to_words( "one ,as, ''#`:ThReE, בבאחי" ); + std::vector expected; + expected.push_back( "ONE" ); + expected.push_back( "AS" ); + expected.push_back( "THREE" ); + expected.push_back( "AAACE" ); + std::sort( tokens.begin(), tokens.end() ); + std::sort( expected.begin(), expected.end() ); + BOOST_CHECK_EQUAL( expected.size(), tokens.size() ); + for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) { + BOOST_CHECK_EQUAL( expected[ i ], tokens[ i ] ); + } +} + +void with_newlines() { + std::auto_ptr tokenizer = get_tokenizer( "latin-1:european" ); + assert(tokenizer.get()); + std::vector tokens = tokenizer->string_to_words( "one\ntwo\nthree" ); + std::vector expected; + expected.push_back( "ONE" ); + expected.push_back( "TWO" ); + expected.push_back( "THREE" ); + std::sort( tokens.begin(), tokens.end() ); + std::sort( expected.begin(), expected.end() ); + BOOST_CHECK_EQUAL( expected.size(), tokens.size() ); + for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) { + BOOST_CHECK_EQUAL( expected.at( i ), tokens.at( i ) ); + } +} + +void with_numbers() { + std::auto_ptr tokenizer = get_tokenizer( "latin-1:european" ); + assert(tokenizer.get()); + std::vector tokens = tokenizer->string_to_words( "one 012 123 four" ); + std::vector expected; + expected.push_back( "ONE" ); + expected.push_back( "012" ); + expected.push_back( "123" ); + expected.push_back( "FOUR" ); + std::sort( tokens.begin(), tokens.end() ); + std::sort( expected.begin(), expected.end() ); + BOOST_CHECK_EQUAL( expected.size(), tokens.size() ); + for ( int i = 0; i < expected.size() && i < tokens.size(); ++i ) { + BOOST_CHECK_EQUAL( expected.at( i ), tokens.at( i ) ); + } +} + +test_suite* get_suite() { + test_suite* test = BOOST_TEST_SUITE( "Tokenizer tests" ); + test->add( BOOST_TEST_CASE( &simple ) ); + test->add( BOOST_TEST_CASE( &with_newlines ) ); + test->add( BOOST_TEST_CASE( &with_numbers ) ); + return test; +} + +}}} //namespaces -- cgit v1.2.3