diff options
Diffstat (limited to 'kvoctrain/kvoctrain/tools')
-rw-r--r-- | kvoctrain/kvoctrain/tools/extractlang.README | 23 | ||||
-rw-r--r-- | kvoctrain/kvoctrain/tools/extractlang.cpp | 161 | ||||
-rwxr-xr-x | kvoctrain/kvoctrain/tools/extractlang.mak | 3 |
3 files changed, 187 insertions, 0 deletions
diff --git a/kvoctrain/kvoctrain/tools/extractlang.README b/kvoctrain/kvoctrain/tools/extractlang.README new file mode 100644 index 00000000..3d3cdd04 --- /dev/null +++ b/kvoctrain/kvoctrain/tools/extractlang.README @@ -0,0 +1,23 @@ +Hi! + +Hab die neue Version des Parsers fertig und ein neues Language file gener= +iert. +Ich attache auch den parser selbst. + +nachdem man ihn kompiliert hat einfach das html file durch pipen +#> cat englangn.html | ./extractlang > languages.txt + +Leider sind noch ein paar Duplikate drin (zB Spanish, Castillian und +Castillian, Spanish) + +Die Namen hab ich diesmal nicht ge=E4ndert, die meisten passen sowieso, n= +ur bei=20 +einigen steht Zusatzinformationen dabei. + +ciao, +kev + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Kevin Krammer <kevin.krammer@gmx.at> +Developer at the Kmud Project http://www.kmud.de/ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/kvoctrain/kvoctrain/tools/extractlang.cpp b/kvoctrain/kvoctrain/tools/extractlang.cpp new file mode 100644 index 00000000..6123214c --- /dev/null +++ b/kvoctrain/kvoctrain/tools/extractlang.cpp @@ -0,0 +1,161 @@ +#include <qtextstream.h> +#include <qstring.h> + +/////////////////////////////////////////////////////////// +// +// (c) Kevin Krammer <kevin.krammer@gmx.at> +// +// 2001-12-08 +// +// automatically extracts a structure from +// http://www.loc.gov/standards/iso639-2/englangn.html +// +// see also extractlang.README +// +/////////////////////////////////////////////////////////// + +QString twoLangs; +QString threeLangs; + +void parseLanguage(QString name, QString three, QString two) +{ + int star = three.find('*'); + if (star > 0) + three.remove(star, 1); + + QString three_1 = three; + + int slash = three_1.find('/'); + if (slash > 0) { + three_1.remove(slash, 1); + three_1.insert(slash, ", "); + } + + if (!two.contains(" ")) { + twoLangs.append("\t{{\""); + twoLangs.append(two); + twoLangs.append("\"}, {\""); + twoLangs.append(three_1); + twoLangs.append("\"}, {I18N_NOOP(\""); + twoLangs.append(name); + twoLangs.append("\")}},\n"); + } + else + two = ""; + + slash = three.find('/'); + if (slash > 0) { + if (two.length() == 0) + two = three.mid(slash+1); + three.remove(slash, three.length()); + } + + threeLangs.append("\t{{\""); + threeLangs.append(two); + threeLangs.append("\"}, {\""); + threeLangs.append(three); + threeLangs.append("\"}, {I18N_NOOP(\""); + threeLangs.append(name); + threeLangs.append("\")}},\n"); +} + +QString parseTD(QString& line) +{ + QCString cline = line.local8Bit(); + uint clineLength = cline.length(); + QCString result = cline; + int pos = -1; + bool inTag = false; + + for (uint i = 0; i < clineLength; ++i) + { + if (cline[i] == '<') + inTag = true; + else if (cline[i] == '>') + inTag = false; + else if (!inTag) + { + pos++; + result[pos] = cline[i]; + } + } + + QString retVal(result); + retVal = retVal.left(pos+1); + + return retVal.simplifyWhiteSpace(); +} + +bool parseTR(QString& line, QTextIStream& in) +{ + QString text[4]; + + line = in.readLine(); + if (line.isNull()) + return true; + + + uint count = 0; + while (line.contains("<td")) + { + if (count > 3) + return true; + + text[count] = parseTD(line); + count++; + line = in.readLine(); + } + + if (count == 4) + { + parseLanguage(text[0], text[2], text[3]); + } + + if (line.contains("/tr")) + return true; + + return false; +} + +int main() +{ + QString line; + bool readNext = true; + + QTextIStream in(stdin); + QTextOStream out(stdout); + + while (!in.atEnd()) + { + if (readNext) + line = in.readLine(); + + if (line.contains("<tr")) + { + readNext = parseTR(line, in); + } + else + readNext = true; + } + + out << "struct KV_ISO639_Code {\n" + " const char *iso1code;\n" + " const char *iso2code;\n" + " const char *langname;\n" + "};\n" + "\n"; + + out << "KV_ISO639_Code kv_iso639_1[] = {\n" + << twoLangs.left(twoLangs.length()-1) << endl + << "\t{{0}, {0}, {0}}\n" + << "};\n"; + + out << endl; + + out << "KV_ISO639_Code kv_iso639_2[] = {\n" + << threeLangs.left(threeLangs.length()-1) << endl + <<"\t{{0}, {0}, {0}}\n" + << "};\n"; + + return 0; +} diff --git a/kvoctrain/kvoctrain/tools/extractlang.mak b/kvoctrain/kvoctrain/tools/extractlang.mak new file mode 100755 index 00000000..740844df --- /dev/null +++ b/kvoctrain/kvoctrain/tools/extractlang.mak @@ -0,0 +1,3 @@ +#!/bin/sh +g++ -I $QTDIR/include -L $QTDIR/lib extractlang.cpp -o extractlang -lqt +cat englangn.html | ./extractlang > languages.txt
\ No newline at end of file |