/*************************************************************************** * Copyright (C) 2004-2009 by Thomas Fischer * * fischer@unix-ag.uni-kl.de * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #include "encoderlatex.h" namespace BibTeX { EncoderLaTeX *EncoderLaTeX::encoderLaTeX = NULL; static struct Decomposition { const char *latexCommand; unsigned int unicode; } decompositions[] = { {"`", 0x0300}, {"'", 0x0301}, {"^", 0x0302}, {"~", 0x0303}, {"=", 0x0304}, /*{"x", 0x0305}, OVERLINE */ {"u", 0x0306}, {".", 0x0307}, /*{"x", 0x0309}, HOOK ABOVE */ {"r", 0x030a}, {"H", 0x030b}, {"v", 0x030c}, /*{"x", 0x030d}, VERTICAL LINE ABOVE */ /*{"x", 0x030e}, DOUBLE VERTICAL LINE ABOVE */ /*{"x", 0x030f}, DOUBLE GRAVE ACCENT */ /*{"x", 0x0310}, CANDRABINDU */ /*{"x", 0x0311}, INVERTED BREVE */ /*{"x", 0x0312}, TURNED COMMA ABOVE */ /*{"x", 0x0313}, COMMA ABOVE */ /*{"x", 0x0314}, REVERSED COMMA ABOVE */ /*{"x", 0x0315}, */ /*{"x", 0x0316}, */ /*{"x", 0x0317}, */ /*{"x", 0x0318}, */ /*{"x", 0x0319}, */ /*{"x", 0x031a}, */ /*{"x", 0x031b}, */ /*{"x", 0x031c}, */ /*{"x", 0x031d}, */ /*{"x", 0x031e}, */ /*{"x", 0x031f}, */ /*{"x", 0x0320}, */ /*{"x", 0x0321}, */ /*{"x", 0x0322}, */ {"d", 0x0323}, /*{"x", 0x0324}, */ /*{"x", 0x0325}, */ /*{"x", 0x0326}, */ {"d", 0x0327}, {"k", 0x0328}, /*{"x", 0x0329}, */ /*{"x", 0x032a}, */ /*{"x", 0x032b}, */ /*{"x", 0x032c}, */ /*{"x", 0x032d}, */ /*{"x", 0x032e}, */ /*{"x", 0x032f}, */ {"b", 0x0331}, {"t", 0x0361} }; static const int decompositionscount = sizeof( decompositions ) / sizeof( decompositions[ 0 ] ) ; static const struct EncoderLaTeXCommandMapping { const char *letters; unsigned int unicode; } commandmappingdatalatex[] = { {"AA", 0x00C5}, {"AE", 0x00C6}, {"ss", 0x00DF}, {"aa", 0x00E5}, {"ae", 0x00E6}, {"OE", 0x0152}, {"oe", 0x0153}, {"ldots", 0x2026}, {"L", 0x0141}, {"l", 0x0142}, {"grqq", 0x201C}, {"glqq", 0x201E}, {"frqq", 0x00BB}, {"flqq", 0x00AB}, // awk -F '[{}\\\\]+' '/DeclareUnicodeCharacter/ { print "{\""$4"\", 0x"$3"},"}' /usr/share/texmf-dist/tex/latex/base/t2aenc.dfu | grep '0x04' | sort -r -f {"cyrzhdsc", 0x0497}, {"CYRZHDSC", 0x0496}, {"cyrzh", 0x0436}, {"CYRZH", 0x0416}, {"cyrzdsc", 0x0499}, {"CYRZDSC", 0x0498}, {"cyrz", 0x0437}, {"CYRZ", 0x0417}, {"cyryu", 0x044E}, {"CYRYU", 0x042E}, {"cyryo", 0x0451}, {"CYRYO", 0x0401}, {"cyryi", 0x0457}, {"CYRYI", 0x0407}, {"cyryhcrs", 0x04B1}, {"CYRYHCRS", 0x04B0}, {"cyrya", 0x044F}, {"CYRYA", 0x042F}, {"cyry", 0x04AF}, {"CYRY", 0x04AE}, {"cyrv", 0x0432}, {"CYRV", 0x0412}, {"cyrushrt", 0x045E}, {"CYRUSHRT", 0x040E}, {"cyru", 0x0443}, {"CYRU", 0x0423}, {"cyrtshe", 0x045B}, {"CYRTSHE", 0x040B}, {"cyrtdsc", 0x04AD}, {"CYRTDSC", 0x04AC}, {"cyrt", 0x0442}, {"CYRT", 0x0422}, {"cyrshha", 0x04BB}, {"CYRSHHA", 0x04BA}, {"cyrshch", 0x0449}, {"CYRSHCH", 0x0429}, {"cyrsh", 0x0448}, {"CYRSH", 0x0428}, {"cyrsftsn", 0x044C}, {"CYRSFTSN", 0x042C}, {"cyrsdsc", 0x04AB}, {"CYRSDSC", 0x04AA}, {"cyrschwa", 0x04D9}, {"CYRSCHWA", 0x04D8}, {"cyrs", 0x0441}, {"CYRS", 0x0421}, {"cyrr", 0x0440}, {"CYRR", 0x0420}, {"CYRpalochka", 0x04C0}, {"cyrp", 0x043F}, {"CYRP", 0x041F}, {"cyrotld", 0x04E9}, {"CYROTLD", 0x04E8}, {"cyro", 0x043E}, {"CYRO", 0x041E}, {"cyrnje", 0x045A}, {"CYRNJE", 0x040A}, {"cyrng", 0x04A5}, {"CYRNG", 0x04A4}, {"cyrndsc", 0x04A3}, {"CYRNDSC", 0x04A2}, {"cyrn", 0x043D}, {"CYRN", 0x041D}, {"cyrm", 0x043C}, {"CYRM", 0x041C}, {"cyrlje", 0x0459}, {"CYRLJE", 0x0409}, {"cyrl", 0x043B}, {"CYRL", 0x041B}, {"cyrkvcrs", 0x049D}, {"CYRKVCRS", 0x049C}, {"cyrkdsc", 0x049B}, {"CYRKDSC", 0x049A}, {"cyrk", 0x043A}, {"CYRK", 0x041A}, {"cyrje", 0x0458}, {"CYRJE", 0x0408}, {"cyrishrt", 0x0439}, {"CYRISHRT", 0x0419}, {"cyrii", 0x0456}, {"CYRII", 0x0406}, {"cyrie", 0x0454}, {"CYRIE", 0x0404}, {"cyri", 0x0438}, {"CYRI", 0x0418}, {"cyrhrdsn", 0x044A}, {"CYRHRDSN", 0x042A}, {"cyrhdsc", 0x04B3}, {"CYRHDSC", 0x04B2}, {"cyrh", 0x0445}, {"CYRH", 0x0425}, {"cyrgup", 0x0491}, {"CYRGUP", 0x0490}, {"cyrghcrs", 0x0493}, {"CYRGHCRS", 0x0492}, {"cyrg", 0x0433}, {"CYRG", 0x0413}, {"cyrf", 0x0444}, {"CYRF", 0x0424}, {"cyrery", 0x044B}, {"CYRERY", 0x042B}, {"cyrerev", 0x044D}, {"CYREREV", 0x042D}, {"cyre", 0x0435}, {"CYRE", 0x0415}, {"cyrdzhe", 0x045F}, {"CYRDZHE", 0x040F}, {"cyrdze", 0x0455}, {"CYRDZE", 0x0405}, {"cyrdje", 0x0452}, {"CYRDJE", 0x0402}, {"cyrd", 0x0434}, {"CYRD", 0x0414}, {"cyrchvcrs", 0x04B9}, {"CYRCHVCRS", 0x04B8}, {"cyrchrdsc", 0x04B7}, {"CYRCHRDSC", 0x04B6}, {"cyrch", 0x0447}, {"CYRCH", 0x0427}, {"cyrc", 0x0446}, {"CYRC", 0x0426}, {"cyrb", 0x0431}, {"CYRB", 0x0411}, {"cyrae", 0x04D5}, {"CYRAE", 0x04D4}, {"cyra", 0x0430}, {"CYRA", 0x0410} }; static const int commandmappingdatalatexcount = sizeof( commandmappingdatalatex ) / sizeof( commandmappingdatalatex[ 0 ] ) ; /** Command can be either (1) {embraced} (2) delimited by {}, (3) , line end, (4) \following_command (including \, which must be maintained!), (5) } (end of entry or group) **/ const char *expansionsCmd[] = {"\\{\\\\%1\\}", "\\\\%1\\{\\}", "\\\\%1(\\n|\\r|\\\\|\\})", "\\\\%1\\s"}; static const int expansionscmdcount = sizeof( expansionsCmd ) / sizeof( expansionsCmd[0] ); static const struct EncoderLaTeXModCharMapping { const char *modifier; const char *letter; unsigned int unicode; } modcharmappingdatalatex[] = { {"\\\\`", "A", 0x00C0}, {"\\\\'", "A", 0x00C1}, {"\\\\\\^", "A", 0x00C2}, {"\\\\~", "A", 0x00C3}, {"\\\\\"", "A", 0x00C4}, {"\\\\r", "A", 0x00C5}, /** 0x00C6 */ {"\\\\c", "C", 0x00C7}, {"\\\\`", "E", 0x00C8}, {"\\\\'", "E", 0x00C9}, {"\\\\\\^", "E", 0x00CA}, {"\\\\\"", "E", 0x00CB}, {"\\\\`", "I", 0x00CC}, {"\\\\'", "I", 0x00CD}, {"\\\\\\^", "I", 0x00CE}, {"\\\\\"", "I", 0x00CF}, /** 0x00D0 */ {"\\\\~", "N", 0x00D1}, {"\\\\`", "O", 0x00D2}, {"\\\\'", "O", 0x00D3}, {"\\\\\\^", "O", 0x00D4}, /** 0x00D5 */ {"\\\\\"", "O", 0x00D6}, /** 0x00D7 */ {"\\\\", "O", 0x00D8}, {"\\\\`", "U", 0x00D9}, {"\\\\'", "U", 0x00DA}, {"\\\\\\^", "U", 0x00DB}, {"\\\\\"", "U", 0x00DC}, {"\\\\'", "Y", 0x00DD}, /** 0x00DE */ {"\\\\\"", "s", 0x00DF}, {"\\\\`", "a", 0x00E0}, {"\\\\'", "a", 0x00E1}, {"\\\\\\^", "a", 0x00E2}, {"\\\\~", "a", 0x00E3}, {"\\\\\"", "a", 0x00E4}, {"\\\\r", "a", 0x00E5}, /** 0x00E6 */ {"\\\\c", "c", 0x00E7}, {"\\\\`", "e", 0x00E8}, {"\\\\'", "e", 0x00E9}, {"\\\\\\^", "e", 0x00EA}, {"\\\\\"", "e", 0x00EB}, {"\\\\`", "i", 0x00EC}, {"\\\\'", "i", 0x00ED}, {"\\\\'", "\\\\i", 0x00ED}, {"\\\\\\^", "i", 0x00EE}, /** 0x00EF */ /** 0x00F0 */ {"\\\\~", "n", 0x00F1}, {"\\\\`", "o", 0x00F2}, {"\\\\'", "o", 0x00F3}, {"\\\\\\^", "o", 0x00F4}, /** 0x00F5 */ {"\\\\\"", "o", 0x00F6}, /** 0x00F7 */ {"\\\\", "o", 0x00F8}, {"\\\\`", "u", 0x00F9}, {"\\\\'", "u", 0x00FA}, {"\\\\\\^", "u", 0x00FB}, {"\\\\\"", "u", 0x00FC}, {"\\\\'", "y", 0x00FD}, /** 0x00FE */ /** 0x00FF */ /** 0x0100 */ /** 0x0101 */ {"\\\\u", "A", 0x0102}, {"\\\\u", "a", 0x0103}, /** 0x0104 */ /** 0x0105 */ {"\\\\'", "C", 0x0106}, {"\\\\'", "c", 0x0107}, /** 0x0108 */ /** 0x0109 */ /** 0x010A */ /** 0x010B */ {"\\\\v", "C", 0x010C}, {"\\\\v", "c", 0x010D}, {"\\\\v", "D", 0x010E}, /** 0x010F */ /** 0x0110 */ /** 0x0111 */ /** 0x0112 */ /** 0x0113 */ /** 0x0114 */ /** 0x0115 */ /** 0x0116 */ /** 0x0117 */ {"\\\\c", "E", 0x0118}, {"\\\\c", "e", 0x0119}, {"\\\\v", "E", 0x011A}, {"\\\\v", "e", 0x011B}, /** 0x011C */ /** 0x011D */ {"\\\\u", "G", 0x011E}, {"\\\\u", "g", 0x011F}, /** 0x0120 */ /** 0x0121 */ /** 0x0122 */ /** 0x0123 */ /** 0x0124 */ /** 0x0125 */ /** 0x0126 */ /** 0x0127 */ /** 0x0128 */ /** 0x0129 */ /** 0x012A */ /** 0x012B */ {"\\\\u", "I", 0x012C}, {"\\\\u", "i", 0x012D}, /** 0x012E */ /** 0x012F */ /** 0x0130 */ /** 0x0131 */ /** 0x0132 */ /** 0x0133 */ /** 0x0134 */ /** 0x0135 */ /** 0x0136 */ /** 0x0137 */ /** 0x0138 */ {"\\\\'", "L", 0x0139}, {"\\\\'", "l", 0x013A}, /** 0x013B */ /** 0x013C */ /** 0x013D */ /** 0x013E */ /** 0x013F */ /** 0x0140 */ /** 0x0141 */ /** 0x0142 */ {"\\\\'", "N", 0x0143}, {"\\\\'", "n", 0x0144}, /** 0x0145 */ /** 0x0146 */ {"\\\\v", "N", 0x0147}, {"\\\\v", "n", 0x0148}, /** 0x0149 */ /** 0x014A */ /** 0x014B */ /** 0x014C */ /** 0x014D */ {"\\\\u", "O", 0x014E}, {"\\\\u", "o", 0x014F}, {"\\\\H", "O", 0x0150}, {"\\\\H", "o", 0x0151}, /** 0x0152 */ /** 0x0153 */ {"\\\\'", "R", 0x0154}, {"\\\\'", "r", 0x0155}, /** 0x0156 */ /** 0x0157 */ {"\\\\v", "R", 0x0158}, {"\\\\v", "r", 0x0159}, {"\\\\'", "S", 0x015A}, {"\\\\'", "s", 0x015B}, /** 0x015C */ /** 0x015D */ {"\\\\c", "S", 0x015E}, {"\\\\c", "s", 0x015F}, {"\\\\v", "S", 0x0160}, {"\\\\v", "s", 0x0161}, /** 0x0162 */ /** 0x0163 */ {"\\\\v", "T", 0x0164}, /** 0x0165 */ /** 0x0166 */ /** 0x0167 */ /** 0x0168 */ /** 0x0169 */ /** 0x016A */ /** 0x016B */ {"\\\\u", "U", 0x016C}, {"\\\\u", "u", 0x016D}, {"\\\\r", "U", 0x016E}, {"\\\\r", "u", 0x016F}, /** 0x0170 */ /** 0x0171 */ /** 0x0172 */ /** 0x0173 */ /** 0x0174 */ /** 0x0175 */ /** 0x0176 */ /** 0x0177 */ {"\\\\\"", "Y", 0x0178}, {"\\\\'", "Z", 0x0179}, {"\\\\'", "z", 0x017A}, /** 0x017B */ /** 0x017C */ {"\\\\v", "Z", 0x017D}, {"\\\\v", "z", 0x017E}, /** 0x017F */ /** 0x0180 */ {"\\\\v", "A", 0x01CD}, {"\\\\v", "a", 0x01CE}, {"\\\\v", "G", 0x01E6}, {"\\\\v", "g", 0x01E7} }; const char *expansionsMod1[] = {"\\{%1\\{%2\\}\\}", "\\{%1 %2\\}", "%1\\{%2\\}"}; static const int expansionsmod1count = sizeof( expansionsMod1 ) / sizeof( expansionsMod1[0] ); const char *expansionsMod2[] = {"\\{%1%2\\}", "%1%2\\{\\}", "%1%2"}; static const int expansionsmod2count = sizeof( expansionsMod2 ) / sizeof( expansionsMod2[0] ); static const int modcharmappingdatalatexcount = sizeof( modcharmappingdatalatex ) / sizeof( modcharmappingdatalatex[ 0 ] ) ; static const struct EncoderLaTeXCharMapping { const char *regexp; unsigned int unicode; const char *latex; } charmappingdatalatex[] = { {"\\\\#", 0x0023, "\\#"}, {"\\\\&", 0x0026, "\\&"}, {"\\\\_", 0x005F, "\\_"}, {"!`", 0x00A1, "!`"}, {"\"<", 0x00AB, "\"<"}, {"\">", 0x00BB, "\">"}, {"[?]`", 0x00BF, "?`"}, {"--", 0x2013, "--"} }; static const int charmappingdatalatexcount = sizeof( charmappingdatalatex ) / sizeof( charmappingdatalatex[ 0 ] ) ; EncoderLaTeX::EncoderLaTeX() { buildCharMapping(); buildCombinedMapping(); } EncoderLaTeX::~EncoderLaTeX() { // nothing } TQString EncoderLaTeX::decode( const TQString & text ) { const TQString splitMarker = "|KBIBTEX|"; /** start-stop marker ensures that each text starts and stops * with plain text and not with an inline math environment. * This invariant is exploited implicitly in the code below. */ const TQString startStopMarker="|STARTSTOP|"; TQString result = startStopMarker + text + startStopMarker; /** Collect (all?) urls from the BibTeX file and store them in urls */ /** Problem is that the replace function below will replace * character sequences in the URL rendering the URL invalid. * Later, all URLs will be replaced back to their original * in the hope nothing breaks ... */ TQStringList urls; TQRegExp httpRegExp( "(ht|f)tp://[^\"} ]+" ); httpRegExp.setMinimal( false ); int pos = 0; while ( pos >= 0 ) { pos = httpRegExp.search( result, pos ); if ( pos >= 0 ) { ++pos; TQString url = httpRegExp.cap( 0 ); urls << url; } } decomposedUTF8toLaTeX( result ); /** split text into math and non-math regions */ TQStringList intermediate = TQStringList::split( '$', result, true ); TQStringList::Iterator it = intermediate.begin(); while ( it != intermediate.end() ) { /** * Sometimes we split strings like "\$", which is not intended. * So, we have to manually fix things by checking for strings * ending with "\" and append both the removed dollar sign and * the following string (which was never supposed to be an * independent string). Finally, we remove the unnecessary * string and continue. */ if (( *it ).endsWith( "\\" ) ) { TQStringList::Iterator cur = it; ++it; ( *cur ).append( '$' ).append( *it ); intermediate.remove( it ); it = cur; } else ++it; } tqApp->processEvents(); result = ""; for ( TQStringList::Iterator it = intermediate.begin(); it != intermediate.end(); ++it ) { if ( !result.isEmpty() ) result.append( splitMarker ); result.append( *it ); ++it; if ( it == intermediate.end() ) break; if (( *it ).length() > 256 ) tqDebug( "Very long math equation using $ found, maybe due to broken inline math: %s", ( *it ).left( 48 ).latin1() ); } tqApp->processEvents(); for ( TQValueList::ConstIterator cmit = m_charMapping.begin(); cmit != m_charMapping.end(); ++cmit ) result.replace(( *cmit ).regExp, ( *cmit ).unicode ); tqApp->processEvents(); TQStringList transformed = TQStringList::split( splitMarker, result, true ); tqApp->processEvents(); result = ""; for ( TQStringList::Iterator itt = transformed.begin(), iti = intermediate.begin(); itt != transformed.end() && iti != intermediate.end(); ++itt, ++iti ) { result.append( *itt ); ++iti; if ( iti == intermediate.end() ) break; result.append( "$" ).append( *iti ).append( "$" ); } tqApp->processEvents(); /** Reinserting original URLs as explained above */ pos = 0; int idx = 0; while ( pos >= 0 ) { pos = httpRegExp.search( result, pos ); if ( pos >= 0 ) { ++pos; int len = httpRegExp.cap( 0 ).length(); result = result.left( pos - 1 ).append( urls[idx++] ).append( result.mid( pos + len - 1 ) ); } } return result.replace( startStopMarker,"" ); } TQString EncoderLaTeX::encode( const TQString & text ) { const TQString splitMarker = "|KBIBTEX|"; /** start-stop marker ensures that each text starts and stops * with plain text and not with an inline math environment. * This invariant is exploited implicitly in the code below. */ const TQString startStopMarker="|STARTSTOP|"; TQString result = startStopMarker + text + startStopMarker; /** Collect (all?) urls from the BibTeX file and store them in urls */ /** Problem is that the replace function below will replace * character sequences in the URL rendering the URL invalid. * Later, all URLs will be replaced back to their original * in the hope nothing breaks ... */ TQStringList urls; TQRegExp httpRegExp( "(ht|f)tp://[^\"} ]+" ); httpRegExp.setMinimal( false ); int pos = 0; while ( pos >= 0 ) { pos = httpRegExp.search( result, pos ); if ( pos >= 0 ) { ++pos; TQString url = httpRegExp.cap( 0 ); urls << url; } } /** split text into math and non-math regions */ TQStringList intermediate = TQStringList::split( '$', result, true ); TQStringList::Iterator it = intermediate.begin(); while ( it != intermediate.end() ) { /** * Sometimes we split strings like "\$", which is not intended. * So, we have to manually fix things by checking for strings * ending with "\" and append both the removed dollar sign and * the following string (which was never supposed to be an * independent string). Finally, we remove the unnecessary * string and continue. */ if (( *it ).endsWith( "\\" ) ) { TQStringList::Iterator cur = it; ++it; ( *cur ).append( '$' ).append( *it ); intermediate.remove( it ); it = cur; } else ++it; } tqApp->processEvents(); result = ""; for ( TQStringList::Iterator it = intermediate.begin(); it != intermediate.end(); ++it ) { if ( !result.isEmpty() ) result.append( splitMarker ); result.append( *it ); ++it; if ( it == intermediate.end() ) break; if (( *it ).length() > 256 ) tqDebug( "Very long math equation using $ found, maybe due to broken inline math: %s", ( *it ).left( 48 ).latin1() ); } tqApp->processEvents(); for ( TQValueList::ConstIterator cmit = m_charMapping.begin(); cmit != m_charMapping.end(); ++cmit ) result.replace(( *cmit ).unicode, ( *cmit ).latex ); tqApp->processEvents(); TQStringList transformed = TQStringList::split( splitMarker, result, true ); tqApp->processEvents(); result = ""; for ( TQStringList::Iterator itt = transformed.begin(), iti = intermediate.begin(); itt != transformed.end() && iti != intermediate.end(); ++itt, ++iti ) { result.append( *itt ); ++iti; if ( iti == intermediate.end() ) break; result.append( "$" ).append( *iti ).append( "$" ); } tqApp->processEvents(); /** \url accepts unquotet & and _ May introduce new problem tough */ if ( result.contains( "\\url{" ) ) result.replace( "\\&", "&" ).replace( "\\_", "_" ).replace( TQChar( 0x2013 ), "--" ).replace( "\\#", "#" ); decomposedUTF8toLaTeX( result ); /** Reinserting original URLs as explained above */ pos = 0; int idx = 0; while ( pos >= 0 ) { pos = httpRegExp.search( result, pos ); if ( pos >= 0 ) { ++pos; int len = httpRegExp.cap( 0 ).length(); result = result.left( pos - 1 ).append( urls[idx++] ).append( result.mid( pos + len - 1 ) ); } } return result.replace( startStopMarker,"" ); } TQString EncoderLaTeX::encode( const TQString &text, const TQChar &replace ) { TQString result = text; for ( TQValueList::ConstIterator it = m_charMapping.begin(); it != m_charMapping.end(); ++it ) if (( *it ).unicode == replace ) result.replace(( *it ).unicode, ( *it ).latex ); return result; } TQString EncoderLaTeX::encodeSpecialized( const TQString & text, const EntryField::FieldType fieldType ) { TQString result = encode( text ); switch ( fieldType ) { case EntryField::ftPages: result.replace( TQChar( 0x2013 ), "--" ); break; case EntryField::ftURL: result.replace( "\\&", "&" ).replace( "\\_", "_" ).replace( TQChar( 0x2013 ), "--" ).replace( "\\#", "#" ); break; default: break; } return result; } TQString& EncoderLaTeX::decomposedUTF8toLaTeX( TQString &text ) { for ( TQValueList::Iterator it = m_combinedMapping.begin(); it != m_combinedMapping.end(); ++it ) { int i = ( *it ).regExp.search( text ); while ( i >= 0 ) { TQString a = ( *it ).regExp.cap( 1 ); text = text.left( i ) + "\\" + ( *it ).latex + "{" + a + "}" + text.mid( i + 2 ); i = ( *it ).regExp.search( text, i + 1 ); } } return text; } void EncoderLaTeX::buildCombinedMapping() { for ( int i = 0; i < decompositionscount; i++ ) { CombinedMappingItem item; item.regExp = TQRegExp( "(.)" + TQString( TQChar( decompositions[i].unicode ) ) ); item.latex = decompositions[i].latexCommand; m_combinedMapping.append( item ); } } void EncoderLaTeX::buildCharMapping() { /** encoding and decoding for digraphs such as -- or ?` */ for ( int i = 0; i < charmappingdatalatexcount; i++ ) { CharMappingItem charMappingItem; charMappingItem.regExp = TQRegExp( charmappingdatalatex[ i ].regexp ); charMappingItem.unicode = TQChar( charmappingdatalatex[ i ].unicode ); charMappingItem.latex = TQString( charmappingdatalatex[ i ].latex ); m_charMapping.append( charMappingItem ); } /** encoding and decoding for commands such as \AA or \ss */ for ( int i = 0; i < commandmappingdatalatexcount; ++i ) { /** different types of writing such as {\AA} or \AA{} possible */ for ( int j = 0; j < expansionscmdcount; ++j ) { CharMappingItem charMappingItem; charMappingItem.regExp = TQRegExp( TQString( expansionsCmd[j] ).arg( commandmappingdatalatex[i].letters ) ); charMappingItem.unicode = TQChar( commandmappingdatalatex[i].unicode ); if ( charMappingItem.regExp.numCaptures() > 0 ) charMappingItem.unicode += TQString( "\\1" ); charMappingItem.latex = TQString( "{\\%1}" ).arg( commandmappingdatalatex[i].letters ); m_charMapping.append( charMappingItem ); } } /** encoding and decoding for letters such as \"a */ for ( int i = 0; i < modcharmappingdatalatexcount; ++i ) { TQString modifierRegExp = TQString( modcharmappingdatalatex[i].modifier ); TQString modifier = modifierRegExp; modifier.replace( "\\^", "^" ).replace( "\\\\", "\\" ); /** first batch of replacement rules, where no separator is required between modifier and character (e.g. \"a) */ if ( !modifierRegExp.at( modifierRegExp.length() - 1 ).isLetter() ) for ( int j = 0; j < expansionsmod2count; ++j ) { CharMappingItem charMappingItem; charMappingItem.regExp = TQRegExp( TQString( expansionsMod2[j] ).arg( modifierRegExp ).arg( modcharmappingdatalatex[i].letter ) ); charMappingItem.unicode = TQChar( modcharmappingdatalatex[i].unicode ); charMappingItem.latex = TQString( "{%1%2}" ).arg( modifier ).arg( modcharmappingdatalatex[i].letter ); m_charMapping.append( charMappingItem ); } /** second batch of replacement rules, where a separator is required between modifier and character (e.g. \v{g}) */ for ( int j = 0; j < expansionsmod1count; ++j ) { CharMappingItem charMappingItem; charMappingItem.regExp = TQRegExp( TQString( expansionsMod1[j] ).arg( modifierRegExp ).arg( modcharmappingdatalatex[i].letter ) ); charMappingItem.unicode = TQChar( modcharmappingdatalatex[i].unicode ); charMappingItem.latex = TQString( "%1{%2}" ).arg( modifier ).arg( modcharmappingdatalatex[i].letter ); m_charMapping.append( charMappingItem ); } } } EncoderLaTeX* EncoderLaTeX::currentEncoderLaTeX() { if ( encoderLaTeX == NULL ) encoderLaTeX = new EncoderLaTeX(); return encoderLaTeX; } void EncoderLaTeX::deleteCurrentEncoderLaTeX() { if ( encoderLaTeX != NULL ) { delete encoderLaTeX; encoderLaTeX = NULL; } } char EncoderLaTeX::unicodeToASCII( unsigned int unicode ) { if ( unicode < 128 ) return ( char )unicode; for ( int i = 0; i < modcharmappingdatalatexcount; ++i ) if ( modcharmappingdatalatex[i].unicode == unicode ) return *modcharmappingdatalatex[i].letter; return '?'; } }