diff options
Diffstat (limited to 'src/translators')
142 files changed, 28972 insertions, 0 deletions
diff --git a/src/translators/Makefile.am b/src/translators/Makefile.am new file mode 100644 index 0000000..68ee4cc --- /dev/null +++ b/src/translators/Makefile.am @@ -0,0 +1,70 @@ +####### kdevelop will overwrite this part!!! (begin)########## +noinst_LIBRARIES = libtranslators.a + +## AM_CPPFLAGS were found outside kdevelop specific part + +libtranslators_a_METASOURCES = AUTO + +libtranslators_a_SOURCES = alexandriaexporter.cpp alexandriaimporter.cpp \ + amcimporter.cpp audiofileimporter.cpp bibtexexporter.cpp bibtexhandler.cpp \ + bibteximporter.cpp bibtexmlexporter.cpp bibtexmlimporter.cpp csvexporter.cpp \ + csvimporter.cpp dcimporter.cpp deliciousimporter.cpp exporter.cpp \ + filelistingimporter.cpp freedb_util.cpp freedbimporter.cpp gcfilmsexporter.cpp \ + gcfilmsimporter.cpp griffithimporter.cpp grs1importer.cpp htmlexporter.cpp libcsv.c \ + onixexporter.cpp pdfimporter.cpp pilotdbexporter.cpp referencerimporter.cpp \ + risimporter.cpp tellico_xml.cpp tellicoimporter.cpp tellicoxmlexporter.cpp \ + tellicozipexporter.cpp textimporter.cpp xmlimporter.cpp xsltexporter.cpp xslthandler.cpp \ + xsltimporter.cpp + +if !USE_LIBBTPARSE + SUBDIR_LIBBTPARSE = btparse +endif + +SUBDIRS = pilotdb $(SUBDIR_LIBBTPARSE) + +CLEANFILES = *~ + +EXTRA_DIST = bibtex-translation.xml \ +bibtexexporter.cpp tellicoxmlexporter.h pilotdbexporter.cpp \ +bibtexexporter.h tellicozipexporter.cpp pilotdbexporter.h \ +bibtexhandler.cpp tellicozipexporter.h \ +bibtexhandler.h csvexporter.cpp textimporter.cpp \ +bibteximporter.cpp csvexporter.h textimporter.h \ +bibteximporter.h csvimporter.cpp xmlimporter.cpp \ +bibtexmlexporter.cpp csvimporter.h xmlimporter.h \ +bibtexmlexporter.h xsltexporter.cpp \ +bibtexmlimporter.cpp dataimporter.h xsltexporter.h \ +bibtexmlimporter.h exporter.h xslthandler.cpp \ +tellicoimporter.cpp htmlexporter.cpp xslthandler.h \ +tellicoimporter.h htmlexporter.h xsltimporter.cpp \ +tellicoxmlexporter.cpp importer.h xsltimporter.h \ +audiofileimporter.h audiofileimporter.cpp alexandriaimporter.h \ +alexandriaimporter.cpp alexandriaexporter.h alexandriaexporter.cpp \ +freedbimporter.h freedbimporter.cpp freedb_util.cpp \ +risimporter.h risimporter.cpp tellico_xml.h \ +tellico_xml.cpp translators.h exporter.cpp \ +onixexporter.h onixexporter.cpp gcfilmsimporter.h \ +gcfilmsimporter.cpp gcfilmsexporter.h gcfilmsexporter.cpp \ +filelistingimporter.h filelistingimporter.cpp grs1importer.h \ +grs1importer.cpp amcimporter.h amcimporter.cpp \ +dcimporter.h dcimporter.cpp griffithimporter.h \ +griffithimporter.cpp griffith2tellico.py pdfimporter.h \ +pdfimporter.cpp referencerimporter.h referencerimporter.cpp \ +libcsv.h libcsv.c \ +deliciousimporter.h deliciousimporter.cpp + +####### kdevelop will overwrite this part!!! (end)############ + +AM_CPPFLAGS = \ + $(LIBXML_CFLAGS) \ + $(LIBXSLT_CFLAGS) \ + $(TAGLIB_CFLAGS) \ + $(KCDDB_CFLAGS) \ + $(all_includes) \ + $(POPPLER_CFLAGS) + +KDE_OPTIONS = noautodist + +appdir = $(kde_datadir)/tellico +app_DATA = bibtex-translation.xml +app_SCRIPTS = griffith2tellico.py diff --git a/src/translators/alexandriaexporter.cpp b/src/translators/alexandriaexporter.cpp new file mode 100644 index 0000000..186b866 --- /dev/null +++ b/src/translators/alexandriaexporter.cpp @@ -0,0 +1,183 @@ +/*************************************************************************** + copyright : (C) 2003-2007 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "alexandriaexporter.h" +#include "../document.h" +#include "../collection.h" +#include "../tellico_kernel.h" +#include "../imagefactory.h" +#include "../image.h" +#include "../tellico_utils.h" +#include "../tellico_debug.h" +#include "../progressmanager.h" + +#include <klocale.h> +#include <kmessagebox.h> +#include <kapplication.h> + +#include <qdir.h> + +namespace { + static const int ALEXANDRIA_MAX_SIZE_SMALL = 60; + static const int ALEXANDRIA_MAX_SIZE_MEDIUM = 140; +} + +using Tellico::Export::AlexandriaExporter; + +QString& AlexandriaExporter::escapeText(QString& str_) { + str_.replace('"', QString::fromLatin1("\\\"")); + return str_; +} + +QString AlexandriaExporter::formatString() const { + return i18n("Alexandria"); +} + +bool AlexandriaExporter::exec() { + Data::CollPtr coll = collection(); + if(!coll || (coll->type() != Data::Collection::Book && coll->type() != Data::Collection::Bibtex)) { + myLog() << "AlexandriaExporter::exec() - bad collection" << endl; + return false; + } + + const QString alexDirName = QString::fromLatin1(".alexandria"); + + // create if necessary + QDir libraryDir = QDir::home(); + if(!libraryDir.cd(alexDirName)) { + if(!libraryDir.mkdir(alexDirName) || !libraryDir.cd(alexDirName)) { + myLog() << "AlexandriaExporter::exec() - can't locate directory" << endl; + return false; + } + } + + // the collection title is the name of the directory to create + if(libraryDir.cd(coll->title())) { + int ret = KMessageBox::warningContinueCancel(Kernel::self()->widget(), + i18n("<qt>An Alexandria library called <i>%1</i> already exists. " + "Any existing books in that library could be overwritten.</qt>") + .arg(coll->title())); + if(ret == KMessageBox::Cancel) { + return false; + } + } else if(!libraryDir.mkdir(coll->title()) || !libraryDir.cd(coll->title())) { + return false; // could not create and cd to the dir + } + + ProgressItem& item = ProgressManager::self()->newProgressItem(this, QString::null, false); + item.setTotalSteps(entries().count()); + ProgressItem::Done done(this); + const uint stepSize = QMIN(1, entries().count()/100); + const bool showProgress = options() & ExportProgress; + + GUI::CursorSaver cs; + bool success = true; + uint j = 0; + for(Data::EntryVec::ConstIterator entryIt = entries().begin(); entryIt != entries().end(); ++entryIt, ++j) { + success &= writeFile(libraryDir, entryIt.data()); + if(showProgress && j%stepSize == 0) { + item.setProgress(j); + kapp->processEvents(); + } + } + return success; +} + +// this isn't true YAML export, of course +// everything is put between quotes except for the rating, just to be sure it's interpreted as a string +bool AlexandriaExporter::writeFile(const QDir& dir_, Data::ConstEntryPtr entry_) { + // the filename is the isbn without dashes, followed by .yaml + QString isbn = entry_->field(QString::fromLatin1("isbn")); + if(isbn.isEmpty()) { + return false; // can't write it since Alexandria uses isbn as name of file + } + isbn.remove('-'); // remove dashes + + QFile file(dir_.absPath() + QDir::separator() + isbn + QString::fromLatin1(".yaml")); + if(!file.open(IO_WriteOnly)) { + return false; + } + + // do we format? + bool format = options() & Export::ExportFormatted; + + QTextStream ts(&file); + // alexandria uses utf-8 all the time + ts.setEncoding(QTextStream::UnicodeUTF8); + ts << "--- !ruby/object:Alexandria::Book\n"; + ts << "authors:\n"; + QStringList authors = entry_->fields(QString::fromLatin1("author"), format); + for(QStringList::Iterator it = authors.begin(); it != authors.end(); ++it) { + ts << " - " << escapeText(*it) << "\n"; + } + // Alexandria crashes when no authors, and uses n/a when none + if(authors.count() == 0) { + ts << " - n/a\n"; + } + + QString tmp = entry_->field(QString::fromLatin1("title"), format); + ts << "title: \"" << escapeText(tmp) << "\"\n"; + + // Alexandria refers to the binding as the edition + tmp = entry_->field(QString::fromLatin1("binding"), format); + ts << "edition: \"" << escapeText(tmp) << "\"\n"; + + // sometimes Alexandria interprets the isbn as a number instead of a string + // I have no idea how to debug ruby, so err on safe side and add quotes + ts << "isbn: \"" << isbn << "\"\n"; + + tmp = entry_->field(QString::fromLatin1("comments"), format); + ts << "notes: \"" << escapeText(tmp) << "\"\n"; + + tmp = entry_->field(QString::fromLatin1("publisher"), format); + // publisher uses n/a when empty + ts << "publisher: \"" << (tmp.isEmpty() ? QString::fromLatin1("n/a") : escapeText(tmp)) << "\"\n"; + + tmp = entry_->field(QString::fromLatin1("pub_year"), format); + if(!tmp.isEmpty()) { + ts << "publishing_year: \"" << escapeText(tmp) << "\"\n"; + } + + tmp = entry_->field(QString::fromLatin1("rating")); + bool ok; + int rating = Tellico::toUInt(tmp, &ok); + if(ok) { + ts << "rating: " << rating << "\n"; + } + + file.close(); + + QString cover = entry_->field(QString::fromLatin1("cover")); + if(cover.isEmpty() || !(options() & Export::ExportImages)) { + return true; // all done + } + + QImage img1(ImageFactory::imageById(cover)); + QImage img2; + QString filename = dir_.absPath() + QDir::separator() + isbn; + if(img1.height() > ALEXANDRIA_MAX_SIZE_SMALL) { + if(img1.height() > ALEXANDRIA_MAX_SIZE_MEDIUM) { // limit maximum size + img1 = img1.scale(ALEXANDRIA_MAX_SIZE_MEDIUM, ALEXANDRIA_MAX_SIZE_MEDIUM, QImage::ScaleMin); + } + img2 = img1.scale(ALEXANDRIA_MAX_SIZE_SMALL, ALEXANDRIA_MAX_SIZE_SMALL, QImage::ScaleMin); + } else { + img2 = img1.smoothScale(ALEXANDRIA_MAX_SIZE_MEDIUM, ALEXANDRIA_MAX_SIZE_MEDIUM, QImage::ScaleMin); // scale up + } + if(!img1.save(filename + QString::fromLatin1("_medium.jpg"), "JPEG") + || !img2.save(filename + QString::fromLatin1("_small.jpg"), "JPEG")) { + return false; + } + return true; +} + +#include "alexandriaexporter.moc" diff --git a/src/translators/alexandriaexporter.h b/src/translators/alexandriaexporter.h new file mode 100644 index 0000000..033bb14 --- /dev/null +++ b/src/translators/alexandriaexporter.h @@ -0,0 +1,51 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef ALEXANDRIAEXPORTER_H +#define ALEXANDRIAEXPORTER_H + +class QDir; + +#include "exporter.h" + +namespace Tellico { + namespace Data { + class Entry; + } + namespace Export { + +/** + * @author Robby Stephenson + */ +class AlexandriaExporter : public Exporter { +Q_OBJECT + +public: + AlexandriaExporter() : Exporter() {} + + virtual bool exec(); + virtual QString formatString() const; + virtual QString fileFilter() const { return QString::null; } // no need for this + + // no config options + virtual QWidget* widget(QWidget*, const char*) { return 0; } + +private: + static QString& escapeText(QString& str); + + bool writeFile(const QDir& dir, Data::ConstEntryPtr entry); +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/alexandriaimporter.cpp b/src/translators/alexandriaimporter.cpp new file mode 100644 index 0000000..5e49e86 --- /dev/null +++ b/src/translators/alexandriaimporter.cpp @@ -0,0 +1,255 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "alexandriaimporter.h" +#include "../collections/bookcollection.h" +#include "../entry.h" +#include "../field.h" +#include "../latin1literal.h" +#include "../isbnvalidator.h" +#include "../imagefactory.h" +#include "../progressmanager.h" +#include "../tellico_debug.h" + +#include <kcombobox.h> +#include <kapplication.h> +#include <kstringhandler.h> + +#include <qlayout.h> +#include <qlabel.h> +#include <qgroupbox.h> + +using Tellico::Import::AlexandriaImporter; + +bool AlexandriaImporter::canImport(int type) const { + return type == Data::Collection::Book; +} + +Tellico::Data::CollPtr AlexandriaImporter::collection() { + if(!m_widget || m_library->count() == 0) { + return 0; + } + + m_coll = new Data::BookCollection(true); + + QDir dataDir = m_libraryDir; + dataDir.cd(m_library->currentText()); + dataDir.setFilter(QDir::Files | QDir::Readable | QDir::NoSymLinks); + + const QString title = QString::fromLatin1("title"); + const QString author = QString::fromLatin1("author"); + const QString year = QString::fromLatin1("pub_year"); + const QString binding = QString::fromLatin1("binding"); + const QString isbn = QString::fromLatin1("isbn"); + const QString pub = QString::fromLatin1("publisher"); + const QString rating = QString::fromLatin1("rating"); + const QString cover = QString::fromLatin1("cover"); + const QString comments = QString::fromLatin1("comments"); + + // start with yaml files + dataDir.setNameFilter(QString::fromLatin1("*.yaml")); + const QStringList files = dataDir.entryList(); + const uint numFiles = files.count(); + const uint stepSize = QMAX(s_stepSize, numFiles/100); + const bool showProgress = options() & ImportProgress; + + ProgressItem& item = ProgressManager::self()->newProgressItem(this, progressLabel(), true); + item.setTotalSteps(numFiles); + connect(&item, SIGNAL(signalCancelled(ProgressItem*)), SLOT(slotCancel())); + ProgressItem::Done done(this); + + QStringList covers; + covers << QString::fromLatin1(".cover") + << QString::fromLatin1("_medium.jpg") + << QString::fromLatin1("_small.jpg"); + + QTextStream ts; + ts.setEncoding(QTextStream::UnicodeUTF8); // YAML is always utf8? + uint j = 0; + for(QStringList::ConstIterator it = files.begin(); !m_cancelled && it != files.end(); ++it, ++j) { + QFile file(dataDir.absFilePath(*it)); + if(!file.open(IO_ReadOnly)) { + continue; + } + + Data::EntryPtr entry = new Data::Entry(m_coll); + + bool readNextLine = true; + ts.unsetDevice(); + ts.setDevice(&file); + QString line; + while(!ts.atEnd()) { + if(readNextLine) { + line = ts.readLine(); + } else { + readNextLine = true; + } + // skip the line that starts with --- + if(line.isEmpty() || line.startsWith(QString::fromLatin1("---"))) { + continue; + } + if(line.endsWith(QChar('\\'))) { + line.truncate(line.length()-1); // remove last character + line += ts.readLine(); + } + + cleanLine(line); + QString alexField = line.section(':', 0, 0); + QString alexValue = line.section(':', 1).stripWhiteSpace(); + clean(alexValue); + + // Alexandria uses "n/a for empty values, and it is translated + // only thing we can do is check for english value and continue + if(alexValue == Latin1Literal("n/a")) { + continue; + } + + if(alexField == Latin1Literal("authors")) { + QStringList authors; + line = ts.readLine(); + QRegExp begin(QString::fromLatin1("^\\s*-\\s+")); + while(!line.isNull() && line.find(begin) > -1) { + line.remove(begin); + authors += clean(line); + line = ts.readLine(); + } + entry->setField(author, authors.join(QString::fromLatin1("; "))); + // the next line has already been read + readNextLine = false; + + // Alexandria calls the edition the binding + } else if(alexField == Latin1Literal("edition")) { + // special case if it's "Hardcover" + if(alexValue.lower() == Latin1Literal("hardcover")) { + alexValue = i18n("Hardback"); + } + entry->setField(binding, alexValue); + + } else if(alexField == Latin1Literal("publishing_year")) { + entry->setField(year, alexValue); + + } else if(alexField == Latin1Literal("isbn")) { + const ISBNValidator val(0); + val.fixup(alexValue); + entry->setField(isbn, alexValue); + + // now find cover image + KURL u; + alexValue.remove('-'); + for(QStringList::Iterator ext = covers.begin(); ext != covers.end(); ++ext) { + u.setPath(dataDir.absFilePath(alexValue + *ext)); + if(!QFile::exists(u.path())) { + continue; + } + QString id = ImageFactory::addImage(u, true); + if(!id.isEmpty()) { + entry->setField(cover, id); + break; + } + } + } else if(alexField == Latin1Literal("notes")) { + entry->setField(comments, alexValue); + + // now try by name then title + } else if(m_coll->fieldByName(alexField)) { + entry->setField(alexField, alexValue); + + } else if(m_coll->fieldByTitle(alexField)) { + entry->setField(m_coll->fieldByTitle(alexField), alexValue); + } + } + m_coll->addEntries(entry); + + if(showProgress && j%stepSize == 0) { + ProgressManager::self()->setProgress(this, j); + kapp->processEvents(); + } + } + + return m_coll; +} + +QWidget* AlexandriaImporter::widget(QWidget* parent_, const char* name_/*=0*/) { + if(m_widget) { + return m_widget; + } + + m_libraryDir = QDir::home(); + m_libraryDir.setFilter(QDir::Dirs | QDir::Readable | QDir::NoSymLinks); + + m_widget = new QWidget(parent_, name_); + QVBoxLayout* l = new QVBoxLayout(m_widget); + + QGroupBox* box = new QGroupBox(2, Qt::Horizontal, i18n("Alexandria Options"), m_widget); + QLabel* label = new QLabel(i18n("&Library:"), box); + m_library = new KComboBox(box); + label->setBuddy(m_library); + + // .alexandria might not exist + if(m_libraryDir.cd(QString::fromLatin1(".alexandria"))) { + QStringList dirs = m_libraryDir.entryList(); + dirs.remove(QString::fromLatin1(".")); // why can't I tell QDir not to include these? QDir::Hidden doesn't work + dirs.remove(QString::fromLatin1("..")); + m_library->insertStringList(dirs); + } + + l->addWidget(box); + l->addStretch(1); + return m_widget; +} + +QString& AlexandriaImporter::cleanLine(QString& str_) { + static QRegExp escRx(QString::fromLatin1("\\\\x(\\w\\w)"), false); + str_.remove(QString::fromLatin1("\\r")); + str_.replace(QString::fromLatin1("\\n"), QString::fromLatin1("\n")); + str_.replace(QString::fromLatin1("\\t"), QString::fromLatin1("\t")); + + // YAML uses escape sequences like \xC3 + int pos = escRx.search(str_); + int origPos = pos; + QCString bytes; + while(pos > -1) { + bool ok; + char c = escRx.cap(1).toInt(&ok, 16); + if(ok) { + bytes += c; + } else { + bytes = QCString(); + break; + } + pos = escRx.search(str_, pos+1); + } + if(!bytes.isEmpty()) { + str_.replace(origPos, bytes.length()*4, QString::fromUtf8(bytes.data())); + } + return str_; +} + +QString& AlexandriaImporter::clean(QString& str_) { + const QRegExp quote(QString::fromLatin1("\\\\\"")); // equals \" + if(str_.startsWith(QChar('\'')) || str_.startsWith(QChar('"'))) { + str_.remove(0, 1); + } + if(str_.endsWith(QChar('\'')) || str_.endsWith(QChar('"'))) { + str_.truncate(str_.length()-1); + } + // we ignore YAML tags, this is not actually a good parser, but will do for now + str_.remove(QRegExp(QString::fromLatin1("^![^\\s]*\\s+"))); + return str_.replace(quote, QChar('"')); +} + +void AlexandriaImporter::slotCancel() { + m_cancelled = true; +} + +#include "alexandriaimporter.moc" diff --git a/src/translators/alexandriaimporter.h b/src/translators/alexandriaimporter.h new file mode 100644 index 0000000..2c12923 --- /dev/null +++ b/src/translators/alexandriaimporter.h @@ -0,0 +1,72 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef ALEXANDRIAIMPORTER_H +#define ALEXANDRIAIMPORTER_H + +class KComboBox; + +#include "importer.h" +#include "../datavectors.h" + +#include <qdir.h> + +namespace Tellico { + namespace Import { + +/** + * An importer for importing collections used by Alexandria, the Gnome book collection manager. + * + * The libraries are assumed to be in $HOME/.alexandria. The file format is YAML, but instead + * using a real YAML reader, the file is parsed line-by-line, so it's very crude. When Alexandria + * adds new fields or types, this will have to be updated. + * + * @author Robby Stephenson + */ +class AlexandriaImporter : public Importer { +Q_OBJECT + +public: + /** + */ + AlexandriaImporter() : Importer(), m_coll(0), m_widget(0), m_cancelled(false) {} + /** + */ + virtual ~AlexandriaImporter() {} + + /** + */ + virtual Data::CollPtr collection(); + /** + */ + virtual QWidget* widget(QWidget* parent, const char* name=0); + virtual bool canImport(int type) const; + +public slots: + void slotCancel(); + +private: + static QString& cleanLine(QString& str); + static QString& clean(QString& str); + + Data::CollPtr m_coll; + QWidget* m_widget; + KComboBox* m_library; + + QDir m_libraryDir; + bool m_cancelled : 1; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/amcimporter.cpp b/src/translators/amcimporter.cpp new file mode 100644 index 0000000..8e45cb7 --- /dev/null +++ b/src/translators/amcimporter.cpp @@ -0,0 +1,294 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +// The information about the AMC file format was taken from the source code for +// GCfilms, (GPL) (c) 2005 Tian +// Monotheka, (GPL) (c) 2004, 2005 Michael Dominic K. +// 2005 Aurelien Mino + +#include "amcimporter.h" +#include "../collections/videocollection.h" +#include "../imagefactory.h" +#include "../latin1literal.h" +#include "../progressmanager.h" +#include "../tellico_debug.h" + +#include <kapplication.h> + +#include <qfile.h> +#include <qimage.h> + +#include <limits.h> + +namespace { + static const QCString AMC_FILE_ID = " AMC_X.Y Ant Movie Catalog 3.5.x www.buypin.com www.antp.be "; +} + +using Tellico::Import::AMCImporter; + +AMCImporter::AMCImporter(const KURL& url_) : DataImporter(url_), m_coll(0), m_cancelled(false) { +} + +AMCImporter::~AMCImporter() { +} + +bool AMCImporter::canImport(int type) const { + return type == Data::Collection::Video; +} + +Tellico::Data::CollPtr AMCImporter::collection() { + if(m_coll) { + return m_coll; + } + + if(!fileRef().open()) { + return 0; + } + + QIODevice* f = fileRef().file(); + m_ds.setDevice(f); + // AMC is always little-endian? can't confirm + m_ds.setByteOrder(QDataStream::LittleEndian); + + const uint l = AMC_FILE_ID.length(); + QMemArray<char> buffer(l+1); + m_ds.readRawBytes(buffer.data(), l); + QString version = QString::fromLocal8Bit(buffer, l); + QRegExp versionRx(QString::fromLatin1(".+AMC_(\\d+)\\.(\\d+).+")); + if(version.find(versionRx) == -1) { + myDebug() << "AMCImporter::collection() - no file id match" << endl; + return 0; + } + + ProgressItem& item = ProgressManager::self()->newProgressItem(this, progressLabel(), true); + item.setTotalSteps(f->size()); + connect(&item, SIGNAL(signalCancelled(ProgressItem*)), SLOT(slotCancel())); + ProgressItem::Done done(this); + + m_coll = new Data::VideoCollection(true); + + m_majVersion = versionRx.cap(1).toInt(); + m_minVersion = versionRx.cap(2).toInt(); +// myDebug() << m_majVersion << "::" << m_minVersion << endl; + + readString(); // name + readString(); // email + if(m_majVersion <= 3 && m_minVersion < 5) { + readString(); // icq + } + readString(); // webpage + readString(); // description + + const bool showProgress = options() & ImportProgress; + + while(!m_cancelled && !f->atEnd()) { + readEntry(); + if(showProgress) { + ProgressManager::self()->setProgress(this, f->at()); + kapp->processEvents(); + } + } + + return m_coll; +} + +bool AMCImporter::readBool() { + Q_UINT8 b; + m_ds >> b; + return b; +} + +Q_UINT32 AMCImporter::readInt() { + Q_UINT32 i; + m_ds >> i; + if(i >= UINT_MAX) { + i = 0; + } + return i; +} + +QString AMCImporter::readString() { + // The serialization format is a length specifier first, then l bytes of data + uint l = readInt(); + if(l == 0) { + return QString(); + } + QMemArray<char> buffer(l+1); + m_ds.readRawBytes(buffer.data(), l); + QString s = QString::fromLocal8Bit(buffer, l); +// myDebug() << "string: " << s << endl; + return s; +} + +QString AMCImporter::readImage(const QString& format_) { + uint l = readInt(); + if(l == 0) { + return QString(); + } + QMemArray<char> buffer(l+1); + m_ds.readRawBytes(buffer.data(), l); + QByteArray bytes; + bytes.setRawData(buffer.data(), l); + QImage img(bytes); + bytes.resetRawData(buffer.data(), l); + if(img.isNull()) { + myDebug() << "AMCImporter::readImage() - null image" << endl; + return QString(); + } + QString format = QString::fromLatin1("PNG"); + if(format_ == Latin1Literal(".jpg")) { + format = QString::fromLatin1("JPEG"); + } else if(format_ == Latin1Literal(".gif")) { + format = QString::fromLatin1("GIF"); + } + return ImageFactory::addImage(img, format); +} + +void AMCImporter::readEntry() { + Data::EntryPtr e = new Data::Entry(m_coll); + + int id = readInt(); + if(id > 0) { + e->setId(id); + } + readInt(); // add date + + int rating = readInt(); + if(m_majVersion >= 3 && m_minVersion >= 5) { + rating /= 10; + } + e->setField(QString::fromLatin1("rating"), QString::number(rating)); + int year = readInt(); + if(year > 0) { + e->setField(QString::fromLatin1("year"), QString::number(year)); + } + int time = readInt(); + if(time > 0) { + e->setField(QString::fromLatin1("running-time"), QString::number(time)); + } + + readInt(); // video bitrate + readInt(); // audio bitrate + readInt(); // number of files + readBool(); // checked + readString(); // media label + e->setField(QString::fromLatin1("medium"), readString()); + readString(); // source + readString(); // borrower + QString s = readString(); // title + if(!s.isEmpty()) { + e->setField(QString::fromLatin1("title"), s); + } + QString s2 = readString(); // translated title + if(s.isEmpty()) { + e->setField(QString::fromLatin1("title"), s2); + } + + e->setField(QString::fromLatin1("director"), readString()); + s = readString(); + QRegExp roleRx(QString::fromLatin1("(.+) \\(([^(]+)\\)")); + roleRx.setMinimal(true); + if(s.find(roleRx) > -1) { + QString role = roleRx.cap(2).lower(); + if(role == Latin1Literal("story") || role == Latin1Literal("written by")) { + e->setField(QString::fromLatin1("writer"), roleRx.cap(1)); + } else { + e->setField(QString::fromLatin1("producer"), s); + } + } else { + e->setField(QString::fromLatin1("producer"), s); + } + e->setField(QString::fromLatin1("nationality"), readString()); + e->setField(QString::fromLatin1("genre"), readString().replace(QString::fromLatin1(", "), QString::fromLatin1("; "))); + + e->setField(QString::fromLatin1("cast"), parseCast(readString()).join(QString::fromLatin1("; "))); + + readString(); // url + e->setField(QString::fromLatin1("plot"), readString()); + e->setField(QString::fromLatin1("comments"), readString()); + s = readString(); // video format + QRegExp regionRx(QString::fromLatin1("Region \\d")); + if(s.find(regionRx) > -1) { + e->setField(QString::fromLatin1("region"), regionRx.cap(0)); + } + e->setField(QString::fromLatin1("audio-track"), readString()); // audio format + readString(); // resolution + readString(); // frame rate + e->setField(QString::fromLatin1("language"), readString()); // audio language + e->setField(QString::fromLatin1("subtitle"), readString()); // subtitle + readString(); // file size + s = readString(); // picture extension + s = readImage(s); // picture + if(!s.isEmpty()) { + e->setField(QString::fromLatin1("cover"), s); + } + + m_coll->addEntries(e); +} + +QStringList AMCImporter::parseCast(const QString& text_) { + QStringList cast; + int nPar = 0; + QRegExp castRx(QString::fromLatin1("[,()]")); + QString person, role; + int oldPos = 0; + for(int pos = text_.find(castRx); pos > -1; pos = text_.find(castRx, pos+1)) { + if(text_.at(pos) == ',' && nPar%2 == 0) { + // we're done with this one + person += text_.mid(oldPos, pos-oldPos).stripWhiteSpace(); + QString all = person; + if(!role.isEmpty()) { + if(role.startsWith(QString::fromLatin1("as "))) { + role = role.mid(3); + } + all += "::" + role; + } + cast << all; + person.truncate(0); + role.truncate(0); + oldPos = pos+1; // add one to go past comma + } else if(text_.at(pos) == '(') { + if(nPar == 0) { + person = text_.mid(oldPos, pos-oldPos).stripWhiteSpace(); + oldPos = pos+1; // add one to go past parenthesis + } + ++nPar; + } else if(text_.at(pos) == ')') { + --nPar; + if(nPar == 0) { + role = text_.mid(oldPos, pos-oldPos).stripWhiteSpace(); + oldPos = pos+1; // add one to go past parenthesis + } + } + } + // grab the last one + if(nPar%2 == 0) { + int pos = text_.length(); + person += text_.mid(oldPos, pos-oldPos).stripWhiteSpace(); + QString all = person; + if(!role.isEmpty()) { + if(role.startsWith(QString::fromLatin1("as "))) { + role = role.mid(3); + } + all += "::" + role; + } + cast << all; + } + return cast; +} + +void AMCImporter::slotCancel() { + m_cancelled = true; +} + +#include "amcimporter.moc" diff --git a/src/translators/amcimporter.h b/src/translators/amcimporter.h new file mode 100644 index 0000000..d1b9d1a --- /dev/null +++ b/src/translators/amcimporter.h @@ -0,0 +1,55 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_IMPORT_AMCIMPORTER_H +#define TELLICO_IMPORT_AMCIMPORTER_H + +#include "dataimporter.h" + +namespace Tellico { + namespace Import { + +/** + @author Robby Stephenson + */ +class AMCImporter : public DataImporter { +Q_OBJECT +public: + AMCImporter(const KURL& url); + virtual ~AMCImporter(); + + virtual Data::CollPtr collection(); + bool canImport(int type) const; + +public slots: + void slotCancel(); + +private: + bool readBool(); + Q_UINT32 readInt(); + QString readString(); + QString readImage(const QString& format); + void readEntry(); + QStringList parseCast(const QString& text); + + Data::CollPtr m_coll; + bool m_cancelled : 1; + QDataStream m_ds; + int m_majVersion; + int m_minVersion; +}; + + } // end namespace +} // end namespace + +#endif diff --git a/src/translators/audiofileimporter.cpp b/src/translators/audiofileimporter.cpp new file mode 100644 index 0000000..f825964 --- /dev/null +++ b/src/translators/audiofileimporter.cpp @@ -0,0 +1,424 @@ +/*************************************************************************** + copyright : (C) 2004-2007 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include <config.h> + +#include "audiofileimporter.h" +#include "../collections/musiccollection.h" +#include "../entry.h" +#include "../field.h" +#include "../latin1literal.h" +#include "../imagefactory.h" +#include "../tellico_utils.h" +#include "../tellico_kernel.h" +#include "../progressmanager.h" +#include "../tellico_debug.h" + +#ifdef HAVE_TAGLIB +#include <taglib/fileref.h> +#include <taglib/tag.h> +#include <taglib/id3v2tag.h> +#include <taglib/mpegfile.h> +#include <taglib/vorbisfile.h> +#include <taglib/flacfile.h> +#include <taglib/audioproperties.h> +#endif + +#include <klocale.h> +#include <kapplication.h> + +#include <qlabel.h> +#include <qlayout.h> +#include <qvgroupbox.h> +#include <qcheckbox.h> +#include <qdir.h> +#include <qwhatsthis.h> + +using Tellico::Import::AudioFileImporter; + +AudioFileImporter::AudioFileImporter(const KURL& url_) : Tellico::Import::Importer(url_) + , m_coll(0) + , m_widget(0) + , m_cancelled(false) { +} + +bool AudioFileImporter::canImport(int type) const { + return type == Data::Collection::Album; +} + +Tellico::Data::CollPtr AudioFileImporter::collection() { +#ifndef HAVE_TAGLIB + return 0; +#else + + if(m_coll) { + return m_coll; + } + + ProgressItem& item = ProgressManager::self()->newProgressItem(this, i18n("Scanning audio files..."), true); + item.setTotalSteps(100); + connect(&item, SIGNAL(signalCancelled(ProgressItem*)), SLOT(slotCancel())); + ProgressItem::Done done(this); + + // TODO: allow remote audio file importing + QStringList dirs = url().path(); + if(m_recursive->isChecked()) { + dirs += Tellico::findAllSubDirs(dirs[0]); + } + + if(m_cancelled) { + return 0; + } + + const bool showProgress = options() & ImportProgress; + + QStringList files; + for(QStringList::ConstIterator it = dirs.begin(); !m_cancelled && it != dirs.end(); ++it) { + if((*it).isEmpty()) { + continue; + } + + QDir dir(*it); + dir.setFilter(QDir::Files | QDir::Readable | QDir::Hidden); // hidden since I want directory files + const QStringList list = dir.entryList(); + for(QStringList::ConstIterator it2 = list.begin(); it2 != list.end(); ++it2) { + files += dir.absFilePath(*it2); + } +// kapp->processEvents(); not needed ? + } + + if(m_cancelled) { + return 0; + } + item.setTotalSteps(files.count()); + + const QString title = QString::fromLatin1("title"); + const QString artist = QString::fromLatin1("artist"); + const QString year = QString::fromLatin1("year"); + const QString genre = QString::fromLatin1("genre"); + const QString track = QString::fromLatin1("track"); + const QString comments = QString::fromLatin1("comments"); + const QString file = QString::fromLatin1("file"); + + m_coll = new Data::MusicCollection(true); + + const bool addFile = m_addFilePath->isChecked(); + const bool addBitrate = m_addBitrate->isChecked(); + + Data::FieldPtr f; + if(addFile) { + f = m_coll->fieldByName(file); + if(!f) { + f = new Data::Field(file, i18n("Files"), Data::Field::Table); + m_coll->addField(f); + } + f->setProperty(QString::fromLatin1("column1"), i18n("Files")); + if(addBitrate) { + f->setProperty(QString::fromLatin1("columns"), QChar('2')); + f->setProperty(QString::fromLatin1("column2"), i18n("Bitrate")); + } else { + f->setProperty(QString::fromLatin1("columns"), QChar('1')); + } + } + + QMap<QString, Data::EntryPtr> albumMap; + + QStringList directoryFiles; + const uint stepSize = QMAX(static_cast<size_t>(1), files.count() / 100); + + bool changeTrackTitle = true; + uint j = 0; + for(QStringList::ConstIterator it = files.begin(); !m_cancelled && it != files.end(); ++it, ++j) { + TagLib::FileRef f(QFile::encodeName(*it)); + if(f.isNull() || !f.tag()) { + if((*it).endsWith(QString::fromLatin1("/.directory"))) { + directoryFiles += *it; + } + continue; + } + + TagLib::Tag* tag = f.tag(); + QString album = TStringToQString(tag->album()).stripWhiteSpace(); + if(album.isEmpty()) { + // can't do anything since tellico entries are by album + kdWarning() << "Skipping: no album listed for " << *it << endl; + continue; + } + int disc = discNumber(f); + if(disc > 1 && !m_coll->hasField(QString::fromLatin1("track%1").arg(disc))) { + Data::FieldPtr f2 = new Data::Field(QString::fromLatin1("track%1").arg(disc), + i18n("Tracks (Disc %1)").arg(disc), + Data::Field::Table); + f2->setFormatFlag(Data::Field::FormatTitle); + f2->setProperty(QString::fromLatin1("columns"), QChar('3')); + f2->setProperty(QString::fromLatin1("column1"), i18n("Title")); + f2->setProperty(QString::fromLatin1("column2"), i18n("Artist")); + f2->setProperty(QString::fromLatin1("column3"), i18n("Length")); + m_coll->addField(f2); + if(changeTrackTitle) { + Data::FieldPtr newTrack = new Data::Field(*m_coll->fieldByName(track)); + newTrack->setTitle(i18n("Tracks (Disc %1)").arg(1)); + m_coll->modifyField(newTrack); + changeTrackTitle = false; + } + } + bool various = false; + bool exists = true; + Data::EntryPtr entry = 0; + if(!(entry = albumMap[album.lower()])) { + entry = new Data::Entry(m_coll); + albumMap.insert(album.lower(), entry); + exists = false; + } + // album entries use the album name as the title + entry->setField(title, album); + QString a = TStringToQString(tag->artist()).stripWhiteSpace(); + if(!a.isEmpty()) { + if(exists && entry->field(artist).lower() != a.lower()) { + various = true; + entry->setField(artist, i18n("(Various)")); + } else { + entry->setField(artist, a); + } + } + if(tag->year() > 0) { + entry->setField(year, QString::number(tag->year())); + } + if(!tag->genre().isEmpty()) { + entry->setField(genre, TStringToQString(tag->genre()).stripWhiteSpace()); + } + + if(!tag->title().isEmpty()) { + int trackNum = tag->track(); + if(trackNum <= 0) { // try to figure out track number from file name + QFileInfo f(*it); + QString fileName = f.baseName(); + QString numString; + int i = 0; + const int len = fileName.length(); + while(fileName[i].isNumber() && i < len) { + i++; + } + if(i == 0) { // does not start with a number + i = len - 1; + while(i >= 0 && fileName[i].isNumber()) { + i--; + } + // file name ends with a number + if(i != len - 1) { + numString = fileName.mid(i + 1); + } + } else { + numString = fileName.mid(0, i); + } + bool ok; + int number = numString.toInt(&ok); + if(ok) { + trackNum = number; + } + } + if(trackNum > 0) { + QString t = TStringToQString(tag->title()).stripWhiteSpace(); + t += "::" + a; + const int len = f.audioProperties()->length(); + if(len > 0) { + t += "::" + Tellico::minutes(len); + } + QString realTrack = disc > 1 ? track + QString::number(disc) : track; + entry->setField(realTrack, insertValue(entry->field(realTrack), t, trackNum)); + if(addFile) { + QString fileValue = *it; + if(addBitrate) { + fileValue += "::" + QString::number(f.audioProperties()->bitrate()); + } + entry->setField(file, insertValue(entry->field(file), fileValue, trackNum)); + } + } else { + myDebug() << *it << " contains no track number and track number cannot be determined, so the track is not imported." << endl; + } + } else { + myDebug() << *it << " has an empty title, so the track is not imported." << endl; + } + if(!tag->comment().stripWhiteSpace().isEmpty()) { + QString c = entry->field(comments); + if(!c.isEmpty()) { + c += QString::fromLatin1("<br/>"); + } + if(!tag->title().isEmpty()) { + c += QString::fromLatin1("<em>") + TStringToQString(tag->title()).stripWhiteSpace() + QString::fromLatin1("</em> - "); + } + c += TStringToQString(tag->comment().stripWhiteSpace()).stripWhiteSpace(); + entry->setField(comments, c); + } + + if(!exists) { + m_coll->addEntries(entry); + } + if(showProgress && j%stepSize == 0) { + ProgressManager::self()->setTotalSteps(this, files.count() + directoryFiles.count()); + ProgressManager::self()->setProgress(this, j); + kapp->processEvents(); + } + +/* kdDebug() << "-- TAG --" << endl; + kdDebug() << "title - \"" << tag->title().to8Bit() << "\"" << endl; + kdDebug() << "artist - \"" << tag->artist().to8Bit() << "\"" << endl; + kdDebug() << "album - \"" << tag->album().to8Bit() << "\"" << endl; + kdDebug() << "year - \"" << tag->year() << "\"" << endl; + kdDebug() << "comment - \"" << tag->comment().to8Bit() << "\"" << endl; + kdDebug() << "track - \"" << tag->track() << "\"" << endl; + kdDebug() << "genre - \"" << tag->genre().to8Bit() << "\"" << endl;*/ + } + + if(m_cancelled) { + m_coll = 0; + return 0; + } + + QTextStream ts; + QRegExp iconRx(QString::fromLatin1("Icon\\s*=\\s*(.*)")); + for(QStringList::ConstIterator it = directoryFiles.begin(); !m_cancelled && it != directoryFiles.end(); ++it, ++j) { + QFile file(*it); + if(!file.open(IO_ReadOnly)) { + continue; + } + ts.unsetDevice(); + ts.setDevice(&file); + for(QString line = ts.readLine(); !line.isNull(); line = ts.readLine()) { + if(!iconRx.exactMatch(line)) { + continue; + } + QDir thisDir(*it); + thisDir.cdUp(); + QFileInfo fi(thisDir, iconRx.cap(1)); + Data::EntryPtr entry = albumMap[thisDir.dirName()]; + if(!entry) { + continue; + } + KURL u; + u.setPath(fi.absFilePath()); + QString id = ImageFactory::addImage(u, true); + if(!id.isEmpty()) { + entry->setField(QString::fromLatin1("cover"), id); + } + break; + } + + if(showProgress && j%stepSize == 0) { + ProgressManager::self()->setProgress(this, j); + kapp->processEvents(); + } + } + + if(m_cancelled) { + m_coll = 0; + return 0; + } + + return m_coll; +#endif +} + +QWidget* AudioFileImporter::widget(QWidget* parent_, const char* name_) { + if(m_widget) { + return m_widget; + } + + m_widget = new QWidget(parent_, name_); + QVBoxLayout* l = new QVBoxLayout(m_widget); + + QVGroupBox* box = new QVGroupBox(i18n("Audio File Options"), m_widget); + + m_recursive = new QCheckBox(i18n("Recursive &folder search"), box); + QWhatsThis::add(m_recursive, i18n("If checked, folders are recursively searched for audio files.")); + // by default, make it checked + m_recursive->setChecked(true); + + m_addFilePath = new QCheckBox(i18n("Include file &location"), box); + QWhatsThis::add(m_addFilePath, i18n("If checked, the file names for each track are added to the entries.")); + m_addFilePath->setChecked(false); + connect(m_addFilePath, SIGNAL(toggled(bool)), SLOT(slotAddFileToggled(bool))); + + m_addBitrate = new QCheckBox(i18n("Include &bitrate"), box); + QWhatsThis::add(m_addBitrate, i18n("If checked, the bitrate for each track is added to the entries.")); + m_addBitrate->setChecked(false); + m_addBitrate->setEnabled(false); + + l->addWidget(box); + l->addStretch(1); + return m_widget; +} + +// pos_ is NOT zero-indexed! +QString AudioFileImporter::insertValue(const QString& str_, const QString& value_, uint pos_) { + QStringList list = Data::Field::split(str_, true); + for(uint i = list.count(); i < pos_; ++i) { + list += QString::null; + } + if(!list[pos_-1].isNull()) { + myDebug() << "AudioFileImporter::insertValue() - overwriting track " << pos_ << endl; + myDebug() << "*** Old value: " << list[pos_-1] << endl; + myDebug() << "*** New value: " << value_ << endl; + } + list[pos_-1] = value_; + return list.join(QString::fromLatin1("; ")); +} + +void AudioFileImporter::slotCancel() { + m_cancelled = true; +} + +void AudioFileImporter::slotAddFileToggled(bool on_) { + m_addBitrate->setEnabled(on_); + if(!on_) { + m_addBitrate->setChecked(false); + } +} + +int AudioFileImporter::discNumber(const TagLib::FileRef& ref_) const { + // default to 1 unless otherwise + int num = 1; +#ifdef HAVE_TAGLIB + QString disc; + if(TagLib::MPEG::File* file = dynamic_cast<TagLib::MPEG::File*>(ref_.file())) { + if(file->ID3v2Tag() && !file->ID3v2Tag()->frameListMap()["TPOS"].isEmpty()) { + disc = TStringToQString(file->ID3v2Tag()->frameListMap()["TPOS"].front()->toString()).stripWhiteSpace(); + } + } else if(TagLib::Ogg::Vorbis::File* file = dynamic_cast<TagLib::Ogg::Vorbis::File*>(ref_.file())) { + if(file->tag() && !file->tag()->fieldListMap()["DISCNUMBER"].isEmpty()) { + disc = TStringToQString(file->tag()->fieldListMap()["DISCNUMBER"].front()).stripWhiteSpace(); + } + } else if(TagLib::FLAC::File* file = dynamic_cast<TagLib::FLAC::File*>(ref_.file())) { + if(file->xiphComment() && !file->xiphComment()->fieldListMap()["DISCNUMBER"].isEmpty()) { + disc = TStringToQString(file->xiphComment()->fieldListMap()["DISCNUMBER"].front()).stripWhiteSpace(); + } + } + + if(!disc.isEmpty()) { + int pos = disc.find('/'); + int n; + bool ok; + if(pos == -1) { + n = disc.toInt(&ok); + } else { + n = disc.left(pos).toInt(&ok); + } + if(ok && n > 0) { + num = n; + } + } +#endif + return num; +} + +#include "audiofileimporter.moc" diff --git a/src/translators/audiofileimporter.h b/src/translators/audiofileimporter.h new file mode 100644 index 0000000..d9c0c9a --- /dev/null +++ b/src/translators/audiofileimporter.h @@ -0,0 +1,69 @@ +/*************************************************************************** + copyright : (C) 2004-2007 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef AUDIOFILEIMPORTER_H +#define AUDIOFILEIMPORTER_H + +class QCheckBox; + +#include "importer.h" +#include "../datavectors.h" + +namespace TagLib { + class FileRef; +} + +namespace Tellico { + namespace Import { + +/** + * The AudioFileImporter class takes care of importing audio files. + * + * @author Robby Stephenson + */ +class AudioFileImporter : public Importer { +Q_OBJECT + +public: + /** + */ + AudioFileImporter(const KURL& url); + + /** + */ + virtual Data::CollPtr collection(); + /** + */ + virtual QWidget* widget(QWidget* parent, const char* name=0); + virtual bool canImport(int type) const; + +public slots: + void slotCancel(); + void slotAddFileToggled(bool on); + +private: + static QString insertValue(const QString& str, const QString& value, uint pos); + + int discNumber(const TagLib::FileRef& file) const; + + Data::CollPtr m_coll; + QWidget* m_widget; + QCheckBox* m_recursive; + QCheckBox* m_addFilePath; + QCheckBox* m_addBitrate; + bool m_cancelled : 1; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/bibtex-translation.xml b/src/translators/bibtex-translation.xml new file mode 100644 index 0000000..0c1bf03 --- /dev/null +++ b/src/translators/bibtex-translation.xml @@ -0,0 +1,298 @@ +<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE keymap> +<!-- Some bibtex file may incorrectly place the braces. --> +<!-- The first string element should be the correct representation --> +<!-- Most of these key mappings were taken, with permission, from the --> +<!-- CharacterConversion.plist file in the Bibdesk program by Michael McCracken --> +<keymap version="1.0"> + <key char="Å"> + <string>{\AA}</string> + </key> + <key char="À"> + <string>{\`A}</string> + <string>\`{A}</string> + </key> + <key char="Â"> + <string>{\^A}</string> + <string>\^{A}</string> + </key> + <key char="Á"> + <string>{\'A}</string> + <string>\'{A}</string> + </key> + <key char="Ã"> + <string>{\~A}</string> + <string>\~{A}</string> + </key> + <key char="Ä"> + <string>{\"A}</string> + <string>\"{A}</string> + </key> + <key char="Æ"> + <string>{\AE}</string> + </key> + <key char="Ø"> + <string>{\O}</string> + </key> + <key char="à"> + <string>{\`a}</string> + <string>\`{a}</string> + </key> + <key char="á"> + <string>{\'a}</string> + <string>\'{a}</string> + </key> + <key char="â"> + <string>{\^a}</string> + <string>\^{a}</string> + </key> + <key char="ã"> + <string>{\~a}</string> + <string>\~{a}</string> + </key> + <key char="ä"> + <string>{\"a}</string> + <string>\"{a}</string> + </key> + <key char="å"> + <string>{\aa}</string> + </key> + <key char="æ"> + <string>{\ae}</string> + </key> + <key char="Ç"> + <string>{\c C}</string> + <string>\c{C}</string> + </key> + <key char="Č"> + <string>{\u C}</string> + <string>\u{C}</string> + </key> + <key char="Č"> + <string>{\v C}</string> + <string>\v{C}</string> + </key> + <key char="ç"> + <string>{\c c}</string> + <string>\c{c}</string> + </key> + <key char="ć"> + <string>{\'c}</string> + <string>\'{c}</string> + </key> + <key char="č"> + <string>{\v c}</string> + <string>\v{c}</string> + </key> + <key char="È"> + <string>{\`E}</string> + <string>\`{E}</string> + </key> + <key char="Ê"> + <string>{\^E}</string> + <string>\^{E}</string> + </key> + <key char="É"> + <string>{\'E}</string> + <string>\'{E}</string> + </key> + <key char="Ë"> + <string>{\"E}</string> + <string>\"{E}</string> + </key> + <key char="è"> + <string>{\`e}</string> + <string>\`{e}</string> + </key> + <key char="é"> + <string>{\'e}</string> + <string>\'{e}</string> + </key> + <key char="ê"> + <string>{\^e}</string> + <string>\^{e}</string> + </key> + <key char="ë"> + <string>{\"e}</string> + <string>\"{e}</string> + </key> + <key char="Î"> + <string>{\^I}</string> + <string>\^{I}</string> + </key> + <key char="Í"> + <string>{\'I}</string> + <string>\'{I}</string> + </key> + <key char="Ï"> + <string>{\"I}</string> + <string>\"{I}</string> + </key> + <key char="ì"> + <string>{\`{\i}}</string> + <string>\`{\i}</string> + </key> + <key char="í"> + <string>{\'{\i}}</string> + <string>\'{\i}</string> + </key> + <key char="î"> + <string>{\^{\i}}</string> + <string>\^{\i}</string> + </key> + <key char="ï"> + <string>{\"{\i}}</string> + <string>\"{\i}</string> + </key> + <key char="Ñ"> + <string>{\~N}</string> + <string>\~{N}</string> + </key> + <key char="ñ"> + <string>{\~n}</string> + <string>\~{n}</string> + </key> + <key char="Ó"> + <string>{\'O}</string> + <string>\'{O}</string> + </key> + <key char="Ô"> + <string>{\^O}</string> + <string>\^{O}</string> + </key> + <key char="Ø"> + <string>{\O}</string> + </key> + <key char="Ö"> + <string>{\"O}</string> + <string>\"{O}</string> + </key> + <key char="Œ"> + <string>{\OE}</string> + </key> + <key char="ò"> + <string>{\`o}</string> + <string>\`{o}</string> + </key> + <key char="ó"> + <string>{\'o}</string> + <string>\'{o}</string> + </key> + <key char="ô"> + <string>{\^o}</string> + <string>\^{o}</string> + </key> + <key char="õ"> + <string>{\~o}</string> + <string>\~{o}</string> + </key> + <key char="ö"> + <string>{\"o}</string> + <string>\"{o}</string> + </key> + <key char="œ"> + <string>{\oe}</string> + </key> + <key char="ø"> + <string>{\o}</string> + </key> + <key char="ş"> + <string>{\c s}</string> + <string>\c{s}</string> + </key> + <key char="š"> + <string>{\v s}</string> + <string>\v{s}</string> + </key> + <key char="Ţ"> + <string>{\c T}</string> + <string>\c{T}</string> + </key> + <key char="ţ"> + <string>{\c t}</string> + <string>\c{t}</string> + </key> + <key char="Ú"> + <string>{\'U}</string> + <string>\'{U}</string> + </key> + <key char="Û"> + <string>{\^U}</string> + <string>\^{U}</string> + </key> + <key char="Ü"> + <string>{\"U}</string> + <string>\"{U}</string> + </key> + <key char="ù"> + <string>{\`u}</string> + <string>\`{u}</string> + </key> + <key char="ú"> + <string>{\'u}</string> + <string>\'{u}</string> + </key> + <key char="û"> + <string>{\^u}</string> + <string>\^{u}</string> + </key> + <key char="ü"> + <string>{\"u}</string> + <string>\"{u}</string> + </key> + <key char="Ÿ"> + <string>{\"Y}</string> + <string>\"{Y}</string> + </key> + <key char="ÿ"> + <string>{\"y}</string> + <string>\"{y}</string> + </key> + <key char="Ž"> + <string>{\v Z}</string> + <string>\v{Z}</string> + </key> + <key char="ž"> + <string>{\v z}</string> + <string>\v{z}</string> + </key> + <key char="ß"> + <string>{\ss}</string> + </key> + <key char="£"> + <string>\pounds</string> + </key> + <key char="±"> + <string>$\pm$</string> + </key> + <key char="–"> + <string>--</string> + </key> + <key char="—"> + <string>---</string> + </key> + <key char="•"> + <string>*</string> + </key> + <key char="…"> + <string>{\ldots}</string> + </key> + <key char="§"> + <string>{\S}</string> + </key> + <key char="©"> + <string>{\copyright}</string> + </key> + <key char="®"> + <string>{\textregistered}</string> + </key> + <key char="™"> + <string>{\texttrademark}</string> + </key> + <key char="°"> + <string>$^{\circ}$</string> + </key> + <key char="%"> + <string>\%</string> + </key> +</keymap> diff --git a/src/translators/bibtexexporter.cpp b/src/translators/bibtexexporter.cpp new file mode 100644 index 0000000..2706ac8 --- /dev/null +++ b/src/translators/bibtexexporter.cpp @@ -0,0 +1,326 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + + +#include "bibtexexporter.h" +#include "bibtexhandler.h" +#include "../document.h" +#include "../collections/bibtexcollection.h" +#include "../latin1literal.h" +#include "../filehandler.h" +#include "../stringset.h" +#include "../tellico_debug.h" + +#include <config.h> + +#include <klocale.h> +#include <kdebug.h> +#include <kconfig.h> +#include <kcombobox.h> + +#include <qregexp.h> +#include <qcheckbox.h> +#include <qlayout.h> +#include <qgroupbox.h> +#include <qwhatsthis.h> +#include <qlabel.h> +#include <qhbox.h> + +using Tellico::Export::BibtexExporter; + +BibtexExporter::BibtexExporter() : Tellico::Export::Exporter(), + m_expandMacros(false), + m_packageURL(true), + m_skipEmptyKeys(false), + m_widget(0) { +} + +QString BibtexExporter::formatString() const { + return i18n("Bibtex"); +} + +QString BibtexExporter::fileFilter() const { + return i18n("*.bib|Bibtex Files (*.bib)") + QChar('\n') + i18n("*|All Files"); +} + +bool BibtexExporter::exec() { + Data::CollPtr c = collection(); + if(!c || c->type() != Data::Collection::Bibtex) { + return false; + } + const Data::BibtexCollection* coll = static_cast<const Data::BibtexCollection*>(c.data()); + +// there are some special attributes +// the entry-type specifies the entry type - book, inproceedings, whatever + QString typeField; +// the key specifies the cite-key + QString keyField; +// the crossref bibtex field can reference another entry + QString crossRefField; + bool hasCrossRefs = false; + + const QString bibtex = QString::fromLatin1("bibtex"); +// keep a list of all the 'ordinary' fields to iterate through later + Data::FieldVec fields; + Data::FieldVec vec = coll->fields(); + for(Data::FieldVec::Iterator it = vec.begin(); it != vec.end(); ++it) { + QString bibtexField = it->property(bibtex); + if(bibtexField == Latin1Literal("entry-type")) { + typeField = it->name(); + } else if(bibtexField == Latin1Literal("key")) { + keyField = it->name(); + } else if(bibtexField == Latin1Literal("crossref")) { + fields.append(it); // still output crossref field + crossRefField = it->name(); + hasCrossRefs = true; + } else if(!bibtexField.isEmpty()) { + fields.append(it); + } + } + + if(typeField.isEmpty() || keyField.isEmpty()) { + kdWarning() << "BibtexExporter::exec() - the collection must have fields defining " + "the entry-type and the key of the entry" << endl; + return false; + } + if(fields.isEmpty()) { + kdWarning() << "BibtexExporter::exec() - no bibtex field mapping exists in the collection." << endl; + return false; + } + + QString text = QString::fromLatin1("@comment{Generated by Tellico ") + + QString::fromLatin1(VERSION) + + QString::fromLatin1("}\n\n"); + + if(!coll->preamble().isEmpty()) { + text += QString::fromLatin1("@preamble{") + coll->preamble() + QString::fromLatin1("}\n\n"); + } + + const QStringList macros = coll->macroList().keys(); + if(!m_expandMacros) { + QMap<QString, QString>::ConstIterator macroIt; + for(macroIt = coll->macroList().constBegin(); macroIt != coll->macroList().constEnd(); ++macroIt) { + if(!macroIt.data().isEmpty()) { + text += QString::fromLatin1("@string{") + + macroIt.key() + + QString::fromLatin1("=") + + BibtexHandler::exportText(macroIt.data(), macros) + + QString::fromLatin1("}\n\n"); + } + } + } + + // if anything is crossref'd, we have to do an initial scan through the + // whole collection first + StringSet crossRefKeys; + if(hasCrossRefs) { + for(Data::EntryVec::ConstIterator entryIt = entries().begin(); entryIt != entries().end(); ++entryIt) { + crossRefKeys.add(entryIt->field(crossRefField)); + } + } + + + StringSet usedKeys; + Data::ConstEntryVec crossRefs; + QString type, key, newKey, value; + for(Data::EntryVec::ConstIterator entryIt = entries().begin(); entryIt != entries().end(); ++entryIt) { + type = entryIt->field(typeField); + if(type.isEmpty()) { + kdWarning() << "BibtexExporter::text() - the entry for '" << entryIt->title() + << "' has no entry-type, skipping it!" << endl; + continue; + } + + key = entryIt->field(keyField); + if(key.isEmpty()) { + if(m_skipEmptyKeys) { + continue; + } + key = BibtexHandler::bibtexKey(entryIt.data()); + } else { + // check crossrefs, only counts for non-empty keys + // if this entry is crossref'd, add it to the list, and skip it + if(hasCrossRefs && crossRefKeys.has(key)) { + crossRefs.append(entryIt.data()); + continue; + } + } + + newKey = key; + char c = 'a'; + while(usedKeys.has(newKey)) { + // duplicate found! + newKey = key + c; + ++c; + } + key = newKey; + usedKeys.add(key); + + writeEntryText(text, fields, *entryIt, type, key); + } + + // now write out crossrefs + for(Data::ConstEntryVec::Iterator entryIt = crossRefs.begin(); entryIt != crossRefs.end(); ++entryIt) { + // no need to check type + + key = entryIt->field(keyField); + newKey = key; + char c = 'a'; + while(usedKeys.has(newKey)) { + // duplicate found! + newKey = key + c; + ++c; + } + key = newKey; + usedKeys.add(key); + + writeEntryText(text, fields, *entryIt, entryIt->field(typeField), key); + } + + return FileHandler::writeTextURL(url(), text, options() & ExportUTF8, options() & Export::ExportForce); +} + +QWidget* BibtexExporter::widget(QWidget* parent_, const char* name_/*=0*/) { + if(m_widget && m_widget->parent() == parent_) { + return m_widget; + } + + m_widget = new QWidget(parent_, name_); + QVBoxLayout* l = new QVBoxLayout(m_widget); + + QGroupBox* box = new QGroupBox(1, Qt::Horizontal, i18n("Bibtex Options"), m_widget); + l->addWidget(box); + + m_checkExpandMacros = new QCheckBox(i18n("Expand string macros"), box); + m_checkExpandMacros->setChecked(m_expandMacros); + QWhatsThis::add(m_checkExpandMacros, i18n("If checked, the string macros will be expanded and no " + "@string{} entries will be written.")); + + m_checkPackageURL = new QCheckBox(i18n("Use URL package"), box); + m_checkPackageURL->setChecked(m_packageURL); + QWhatsThis::add(m_checkPackageURL, i18n("If checked, any URL fields will be wrapped in a " + "\\url declaration.")); + + m_checkSkipEmpty = new QCheckBox(i18n("Skip entries with empty citation keys"), box); + m_checkSkipEmpty->setChecked(m_skipEmptyKeys); + QWhatsThis::add(m_checkSkipEmpty, i18n("If checked, any entries without a bibtex citation key " + "will be skipped.")); + + QHBox* hbox = new QHBox(box); + QLabel* l1 = new QLabel(i18n("Bibtex quotation style:") + ' ', hbox); // add a space for astheticss + m_cbBibtexStyle = new KComboBox(hbox); + m_cbBibtexStyle->insertItem(i18n("Braces")); + m_cbBibtexStyle->insertItem(i18n("Quotes")); + QString whats = i18n("<qt>The quotation style used when exporting bibtex. All field values will " + " be escaped with either braces or quotation marks.</qt>"); + QWhatsThis::add(l1, whats); + QWhatsThis::add(m_cbBibtexStyle, whats); + if(BibtexHandler::s_quoteStyle == BibtexHandler::BRACES) { + m_cbBibtexStyle->setCurrentItem(i18n("Braces")); + } else { + m_cbBibtexStyle->setCurrentItem(i18n("Quotes")); + } + + l->addStretch(1); + return m_widget; +} + +void BibtexExporter::readOptions(KConfig* config_) { + KConfigGroup group(config_, QString::fromLatin1("ExportOptions - %1").arg(formatString())); + m_expandMacros = group.readBoolEntry("Expand Macros", m_expandMacros); + m_packageURL = group.readBoolEntry("URL Package", m_packageURL); + m_skipEmptyKeys = group.readBoolEntry("Skip Empty Keys", m_skipEmptyKeys); + + if(group.readBoolEntry("Use Braces", true)) { + BibtexHandler::s_quoteStyle = BibtexHandler::BRACES; + } else { + BibtexHandler::s_quoteStyle = BibtexHandler::QUOTES; + } +} + +void BibtexExporter::saveOptions(KConfig* config_) { + KConfigGroup group(config_, QString::fromLatin1("ExportOptions - %1").arg(formatString())); + m_expandMacros = m_checkExpandMacros->isChecked(); + group.writeEntry("Expand Macros", m_expandMacros); + m_packageURL = m_checkPackageURL->isChecked(); + group.writeEntry("URL Package", m_packageURL); + m_skipEmptyKeys = m_checkSkipEmpty->isChecked(); + group.writeEntry("Skip Empty Keys", m_skipEmptyKeys); + + bool useBraces = m_cbBibtexStyle->currentText() == i18n("Braces"); + group.writeEntry("Use Braces", useBraces); + if(useBraces) { + BibtexHandler::s_quoteStyle = BibtexHandler::BRACES; + } else { + BibtexHandler::s_quoteStyle = BibtexHandler::QUOTES; + } +} + +void BibtexExporter::writeEntryText(QString& text_, const Data::FieldVec& fields_, const Data::Entry& entry_, + const QString& type_, const QString& key_) { + const QStringList macros = static_cast<const Data::BibtexCollection*>(Data::Document::self()->collection().data())->macroList().keys(); + const QString bibtex = QString::fromLatin1("bibtex"); + const QString bibtexSep = QString::fromLatin1("bibtex-separator"); + + text_ += '@' + type_ + '{' + key_; + + QString value; + Data::FieldVec::ConstIterator fIt, end = fields_.constEnd(); + bool format = options() & Export::ExportFormatted; + for(fIt = fields_.constBegin(); fIt != end; ++fIt) { + value = entry_.field(fIt->name(), format); + if(value.isEmpty()) { + continue; + } + + // If the entry is formatted as a name and allows multiple values + // insert "and" in between them (e.g. author and editor) + if(fIt->formatFlag() == Data::Field::FormatName + && fIt->flags() & Data::Field::AllowMultiple) { + value.replace(Data::Field::delimiter(), QString::fromLatin1(" and ")); + } else if(fIt->flags() & Data::Field::AllowMultiple) { + QString bibsep = fIt->property(bibtexSep); + if(!bibsep.isEmpty()) { + value.replace(Data::Field::delimiter(), bibsep); + } + } else if(fIt->type() == Data::Field::Para) { + // strip HTML from bibtex export + QRegExp stripHTML(QString::fromLatin1("<.*>"), true); + stripHTML.setMinimal(true); + value.remove(stripHTML); + } else if(fIt->property(bibtex) == Latin1Literal("pages")) { + QRegExp rx(QString::fromLatin1("(\\d)-(\\d)")); + for(int pos = rx.search(value); pos > -1; pos = rx.search(value, pos+2)) { + value.replace(pos, 3, rx.cap(1)+"--"+rx.cap(2)); + } + } + + if(m_packageURL && fIt->type() == Data::Field::URL) { + bool b = BibtexHandler::s_quoteStyle == BibtexHandler::BRACES; + value = (b ? QChar('{') : QChar('"')) + + QString::fromLatin1("\\url{") + BibtexHandler::exportText(value, macros) + QChar('}') + + (b ? QChar('}') : QChar('"')); + } else if(fIt->type() != Data::Field::Number) { + // numbers aren't escaped, nor will they have macros + // if m_expandMacros is true, then macros is empty, so this is ok even then + value = BibtexHandler::exportText(value, macros); + } + text_ += QString::fromLatin1(",\n ") + + fIt->property(bibtex) + + QString::fromLatin1(" = ") + + value; + } + text_ += QString::fromLatin1("\n}\n\n"); +} + +#include "bibtexexporter.moc" diff --git a/src/translators/bibtexexporter.h b/src/translators/bibtexexporter.h new file mode 100644 index 0000000..dccfde8 --- /dev/null +++ b/src/translators/bibtexexporter.h @@ -0,0 +1,63 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef BIBTEXEXPORTER_H +#define BIBTEXEXPORTER_H + +class QCheckBox; +class KComboBox; + +#include "exporter.h" + +namespace Tellico { + namespace Export { + +/** + * The Bibtex exporter shows a list of possible Bibtex fields next to a combobox of all + * the current attributes in the collection. I had thought about the reverse - having a list + * of all the attributes, with comboboxes for each Bibtex field, but I think this way is more obvious. + * + * @author Robby Stephenson + */ +class BibtexExporter : public Exporter { +Q_OBJECT + +public: + BibtexExporter(); + + virtual bool exec(); + virtual QString formatString() const; + virtual QString fileFilter() const; + + virtual QWidget* widget(QWidget* parent, const char* name=0); + virtual void readOptions(KConfig*); + virtual void saveOptions(KConfig*); + +private: + void writeEntryText(QString& text, const Data::FieldVec& field, const Data::Entry& entry, + const QString& type, const QString& key); + + bool m_expandMacros; + bool m_packageURL; + bool m_skipEmptyKeys; + + QWidget* m_widget; + QCheckBox* m_checkExpandMacros; + QCheckBox* m_checkPackageURL; + QCheckBox* m_checkSkipEmpty; + KComboBox* m_cbBibtexStyle; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/bibtexhandler.cpp b/src/translators/bibtexhandler.cpp new file mode 100644 index 0000000..8c88e43 --- /dev/null +++ b/src/translators/bibtexhandler.cpp @@ -0,0 +1,319 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "bibtexhandler.h" +#include "../collections/bibtexcollection.h" +#include "../entry.h" +#include "../field.h" +#include "../collection.h" +#include "../document.h" +#include "../filehandler.h" +#include "../latin1literal.h" +#include "../tellico_debug.h" + +#include <kstandarddirs.h> +#include <kurl.h> +#include <kstringhandler.h> +#include <klocale.h> + +#include <qstring.h> +#include <qstringlist.h> +#include <qregexp.h> +#include <qdom.h> + +// don't add braces around capital letters by default +#define TELLICO_BIBTEX_BRACES 0 + +using Tellico::BibtexHandler; + +BibtexHandler::StringListMap* BibtexHandler::s_utf8LatexMap = 0; +BibtexHandler::QuoteStyle BibtexHandler::s_quoteStyle = BibtexHandler::BRACES; +const QRegExp BibtexHandler::s_badKeyChars(QString::fromLatin1("[^0-9a-zA-Z-]")); + +QStringList BibtexHandler::bibtexKeys(const Data::EntryVec& entries_) { + QStringList keys; + for(Data::EntryVec::ConstIterator it = entries_.begin(); it != entries_.end(); ++it) { + QString s = bibtexKey(it.data()); + if(!s.isEmpty()) { + keys << s; + } + } + return keys; +} + +QString BibtexHandler::bibtexKey(Data::ConstEntryPtr entry_) { + if(!entry_ || !entry_->collection() || entry_->collection()->type() != Data::Collection::Bibtex) { + return QString::null; + } + + const Data::BibtexCollection* c = static_cast<const Data::BibtexCollection*>(entry_->collection().data()); + Data::FieldPtr f = c->fieldByBibtexName(QString::fromLatin1("key")); + if(f) { + QString key = entry_->field(f->name()); + if(!key.isEmpty()) { + return key; + } + } + + QString author; + Data::FieldPtr authorField = c->fieldByBibtexName(QString::fromLatin1("author")); + if(authorField) { + if(authorField->flags() & Data::Field::AllowMultiple) { + // grab first author only; + QString tmp = entry_->field(authorField->name()); + author = tmp.section(';', 0, 0); + } else { + author = entry_->field(authorField->name()); + } + } + + Data::FieldPtr titleField = c->fieldByBibtexName(QString::fromLatin1("title")); + QString title; + if(titleField) { + title = entry_->field(titleField->name()); + } + + Data::FieldPtr yearField = c->fieldByBibtexName(QString::fromLatin1("year")); + QString year; + if(yearField) { + year = entry_->field(yearField->name()); + } + if(year.isEmpty()) { + year = entry_->field(QString::fromLatin1("pub_year")); + if(year.isEmpty()) { + year = entry_->field(QString::fromLatin1("cr_year")); + } + } + year = year.section(';', 0, 0); + + return bibtexKey(author, title, year); +} + +QString BibtexHandler::bibtexKey(const QString& author_, const QString& title_, const QString& year_) { + QString key; + // if no comma, take the last word + if(!author_.isEmpty()) { + if(author_.find(',') == -1) { + key += author_.section(' ', -1).lower() + '-'; + } else { + // if there is a comma, take the string up to the first comma + key += author_.section(',', 0, 0).lower() + '-'; + } + } + QStringList words = QStringList::split(' ', title_); + for(QStringList::ConstIterator it = words.begin(); it != words.end(); ++it) { + key += (*it).left(1).lower(); + } + key += year_; + // bibtex key may only contain [0-9a-zA-Z-] + return key.replace(s_badKeyChars, QString::null); +} + +void BibtexHandler::loadTranslationMaps() { + QString mapfile = locate("appdata", QString::fromLatin1("bibtex-translation.xml")); + if(mapfile.isEmpty()) { + return; + } + + s_utf8LatexMap = new StringListMap(); + + KURL u; + u.setPath(mapfile); + // no namespace processing + QDomDocument dom = FileHandler::readXMLFile(u, false); + + QDomNodeList keyList = dom.elementsByTagName(QString::fromLatin1("key")); + + for(unsigned i = 0; i < keyList.count(); ++i) { + QDomNodeList strList = keyList.item(i).toElement().elementsByTagName(QString::fromLatin1("string")); + // the strList might have more than one node since there are multiple ways + // to represent a character in LaTex. + QString s = keyList.item(i).toElement().attribute(QString::fromLatin1("char")); + for(unsigned j = 0; j < strList.count(); ++j) { + (*s_utf8LatexMap)[s].append(strList.item(j).toElement().text()); +// kdDebug() << "BibtexHandler::loadTranslationMaps - " +// << s << " = " << strList.item(j).toElement().text() << endl; + } + } +} + +QString BibtexHandler::importText(char* text_) { + if(!s_utf8LatexMap) { + loadTranslationMaps(); + } + + QString str = QString::fromUtf8(text_); + for(StringListMap::Iterator it = s_utf8LatexMap->begin(); it != s_utf8LatexMap->end(); ++it) { + for(QStringList::Iterator sit = it.data().begin(); sit != it.data().end(); ++sit) { + str.replace(*sit, it.key()); + } + } + + // now replace capitalized letters, such as {X} + // but since we don't want to turn "... X" into "... {X}" later when exporting + // we need to lower-case any capitalized text after the first letter that is + // NOT contained in braces + + QRegExp rx(QString::fromLatin1("\\{([A-Z]+)\\}")); + rx.setMinimal(true); + str.replace(rx, QString::fromLatin1("\\1")); + + return str; +} + +QString BibtexHandler::exportText(const QString& text_, const QStringList& macros_) { + if(!s_utf8LatexMap) { + loadTranslationMaps(); + } + + QChar lquote, rquote; + switch(s_quoteStyle) { + case BRACES: + lquote = '{'; + rquote = '}'; + break; + case QUOTES: + lquote = '"'; + rquote = '"'; + break; + } + + QString text = text_; + + for(StringListMap::Iterator it = s_utf8LatexMap->begin(); it != s_utf8LatexMap->end(); ++it) { + text.replace(it.key(), it.data()[0]); + } + + if(macros_.isEmpty()) { + return lquote + addBraces(text) + rquote; + } + +// Now, split the text by the character '#', and examine each token to see if it is in +// the macro list. If it is not, then add left-quote and right-quote around it. If it is, don't +// change it. Then, in case '#' occurs in a non-macro string, replace any occurrences of '}#{' with '#' + +// list of new tokens + QStringList list; + +// first, split the text + QStringList tokens = QStringList::split('#', text, true); + for(QStringList::Iterator it = tokens.begin(); it != tokens.end(); ++it) { + // check to see if token is a macro + if(macros_.findIndex((*it).stripWhiteSpace()) == -1) { + // the token is NOT a macro, add braces around whole words and also around capitals + list << lquote + addBraces(*it) + rquote; + } else { + list << *it; + } + } + + const QChar octo = '#'; + text = list.join(octo); + text.replace(QString(rquote)+octo+lquote, octo); + + return text; +} + +bool BibtexHandler::setFieldValue(Data::EntryPtr entry_, const QString& bibtexField_, const QString& value_) { + Data::BibtexCollection* c = static_cast<Data::BibtexCollection*>(entry_->collection().data()); + Data::FieldPtr field = c->fieldByBibtexName(bibtexField_); + if(!field) { + // it was the case that the default bibliography did not have a bibtex property for keywords + // so a "keywords" field would get created in the imported collection + // but the existing collection had a field "keyword" so the values would not get imported + // here, check to see if the current collection has a field with the same bibtex name and + // use it instead of creating a new one + Data::BibtexCollection* existingColl = Data::Document::self()->collection()->type() == Data::Collection::Bibtex + ? static_cast<Data::BibtexCollection*>(Data::Document::self()->collection().data()) + : 0; + Data::FieldPtr existingField = existingColl ? existingColl->fieldByBibtexName(bibtexField_) : 0; + if(existingField) { + field = new Data::Field(*existingField); + } else if(value_.length() < 100) { + // arbitrarily say if the value has more than 100 chars, then it's a paragraph + QString vlower = value_.lower(); + // special case, try to detect URLs + // In qt 3.1, QString::startsWith() is always case-sensitive + if(bibtexField_ == Latin1Literal("url") + || vlower.startsWith(QString::fromLatin1("http")) // may also be https + || vlower.startsWith(QString::fromLatin1("ftp:/")) + || vlower.startsWith(QString::fromLatin1("file:/")) + || vlower.startsWith(QString::fromLatin1("/"))) { // assume this indicates a local path + myDebug() << "BibtexHandler::setFieldValue() - creating a URL field for " << bibtexField_ << endl; + field = new Data::Field(bibtexField_, KStringHandler::capwords(bibtexField_), Data::Field::URL); + } else { + field = new Data::Field(bibtexField_, KStringHandler::capwords(bibtexField_), Data::Field::Line); + } + field->setCategory(i18n("Unknown")); + } else { + field = new Data::Field(bibtexField_, KStringHandler::capwords(bibtexField_), Data::Field::Para); + } + field->setProperty(QString::fromLatin1("bibtex"), bibtexField_); + c->addField(field); + } + // special case keywords, replace commas with semi-colons so they get separated + QString value = value_; + if(field->property(QString::fromLatin1("bibtex")).startsWith(QString::fromLatin1("keyword"))) { + value.replace(',', ';'); + // special case refbase bibtex export, with multiple keywords fields + QString oValue = entry_->field(field); + if(!oValue.isEmpty()) { + value = oValue + "; " + value; + } + } + return entry_->setField(field, value); +} + +QString& BibtexHandler::cleanText(QString& text_) { + // FIXME: need to improve this for removing all Latex entities +// QRegExp rx(QString::fromLatin1("(?=[^\\\\])\\\\.+\\{")); + QRegExp rx(QString::fromLatin1("\\\\.+\\{")); + rx.setMinimal(true); + text_.replace(rx, QString::null); + text_.replace(QRegExp(QString::fromLatin1("[{}]")), QString::null); + text_.replace('~', ' '); + return text_; +} + +// add braces around capital letters +QString& BibtexHandler::addBraces(QString& text) { +#if !TELLICO_BIBTEX_BRACES + return text; +#else + int inside = 0; + uint l = text.length(); + // start at first letter, but skip if only the first is capitalized + for(uint i = 0; i < l; ++i) { + const QChar c = text.at(i); + if(inside == 0 && c >= 'A' && c <= 'Z') { + uint j = i+1; + while(text.at(j) >= 'A' && text.at(j) <= 'Z' && j < l) { + ++j; + } + if(i == 0 && j == 1) { + continue; // no need to do anything to first letter + } + text.insert(i, '{'); + // now j should be incremented + text.insert(j+1, '}'); + i = j+1; + l += 2; // the length changed + } else if(c == '{') { + ++inside; + } else if(c == '}') { + --inside; + } + } + return text; +#endif +} diff --git a/src/translators/bibtexhandler.h b/src/translators/bibtexhandler.h new file mode 100644 index 0000000..87d8bf0 --- /dev/null +++ b/src/translators/bibtexhandler.h @@ -0,0 +1,60 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef BIBTEXHANDLER_H +#define BIBTEXHANDLER_H + +class QString; +class QStringList; +class QRegExp; + +#include "../datavectors.h" + +#include <qmap.h> + +namespace Tellico { + +/** + * @author Robby Stephenson + */ +class BibtexHandler { +public: + enum QuoteStyle { BRACES=0, QUOTES=1 }; + static QStringList bibtexKeys(const Data::EntryVec& entries); + static QString bibtexKey(Data::ConstEntryPtr entry); + static QString importText(char* text); + static QString exportText(const QString& text, const QStringList& macros); + static bool setFieldValue(Data::EntryPtr entry, const QString& bibtexField, const QString& value); + /** + * Strips the text of all vestiges of LaTeX. + * + * @param text A reference to the text + * @return A reference to the text + */ + static QString& cleanText(QString& text); + + static QuoteStyle s_quoteStyle; + +private: + typedef QMap<QString, QStringList> StringListMap; + + static QString bibtexKey(const QString& author, const QString& title, const QString& year); + static void loadTranslationMaps(); + static QString& addBraces(QString& string); + + static StringListMap* s_utf8LatexMap; + static const QRegExp s_badKeyChars; +}; + +} // end namespace +#endif diff --git a/src/translators/bibteximporter.cpp b/src/translators/bibteximporter.cpp new file mode 100644 index 0000000..2e514d3 --- /dev/null +++ b/src/translators/bibteximporter.cpp @@ -0,0 +1,312 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "bibteximporter.h" +#include "bibtexhandler.h" +#include "../collections/bibtexcollection.h" +#include "../entry.h" +#include "../latin1literal.h" +#include "../progressmanager.h" +#include "../filehandler.h" +#include "../tellico_debug.h" + +#include <kapplication.h> +#include <kconfig.h> + +#include <qptrlist.h> +#include <qregexp.h> +#include <qlayout.h> +#include <qvbuttongroup.h> +#include <qradiobutton.h> +#include <qwhatsthis.h> +#include <qtextcodec.h> + +using Tellico::Import::BibtexImporter; + +BibtexImporter::BibtexImporter(const KURL::List& urls_) : Importer(urls_) + , m_coll(0), m_widget(0), m_readUTF8(0), m_readLocale(0), m_cancelled(false) { + bt_initialize(); +} + +BibtexImporter::BibtexImporter(const QString& text_) : Importer(text_) + , m_coll(0), m_widget(0), m_readUTF8(0), m_readLocale(0), m_cancelled(false) { + bt_initialize(); +} + +BibtexImporter::~BibtexImporter() { + bt_cleanup(); + if(m_readUTF8) { + KConfigGroup config(kapp->config(), "Import Options"); + config.writeEntry("Bibtex UTF8", m_readUTF8->isChecked()); + } +} + +bool BibtexImporter::canImport(int type) const { + return type == Data::Collection::Bibtex; +} + +Tellico::Data::CollPtr BibtexImporter::collection() { + if(m_coll) { + return m_coll; + } + + ProgressItem& item = ProgressManager::self()->newProgressItem(this, progressLabel(), true); + item.setTotalSteps(urls().count() * 100); + connect(&item, SIGNAL(signalCancelled(ProgressItem*)), SLOT(slotCancel())); + ProgressItem::Done done(this); + + bool useUTF8 = m_widget && m_readUTF8->isChecked(); + + m_coll = new Data::BibtexCollection(true); + + int count = 0; + // might be importing text only + if(!text().isEmpty()) { + QString text = this->text(); + Data::CollPtr coll = readCollection(text, count); + if(!coll || coll->entryCount() == 0) { + setStatusMessage(i18n("No valid bibtex entries were found")); + } else { + m_coll->addEntries(coll->entries()); + } + } + + KURL::List urls = this->urls(); + for(KURL::List::ConstIterator it = urls.begin(); it != urls.end(); ++it, ++count) { + if(m_cancelled) { + return 0; + } + if(!(*it).isValid()) { + continue; + } + QString text = FileHandler::readTextFile(*it, false, useUTF8); + if(text.isEmpty()) { + continue; + } + Data::CollPtr coll = readCollection(text, count); + if(!coll || coll->entryCount() == 0) { + setStatusMessage(i18n("No valid bibtex entries were found in file - %1").arg(url().fileName())); + continue; + } + m_coll->addEntries(coll->entries()); + } + + if(m_cancelled) { + return 0; + } + + return m_coll; +} + +Tellico::Data::CollPtr BibtexImporter::readCollection(const QString& text, int n) { + if(text.isEmpty()) { + myDebug() << "BibtexImporter::readCollection() - no text" << endl; + return 0; + } + Data::CollPtr ptr = new Data::BibtexCollection(true); + Data::BibtexCollection* c = static_cast<Data::BibtexCollection*>(ptr.data()); + + parseText(text); // populates m_nodes + if(m_cancelled) { + return 0; + } + + if(m_nodes.isEmpty()) { + return 0; + } + + QString str; + const uint count = m_nodes.count(); + const uint stepSize = QMAX(s_stepSize, count/100); + const bool showProgress = options() & ImportProgress; + + uint j = 0; + for(ASTListIterator it(m_nodes); !m_cancelled && it.current(); ++it, ++j) { + // if we're parsing a macro string, comment or preamble, skip it for now + if(bt_entry_metatype(it.current()) == BTE_PREAMBLE) { + char* preamble = bt_get_text(it.current()); + if(preamble) { + c->setPreamble(QString::fromUtf8(preamble)); + } + continue; + } + + if(bt_entry_metatype(it.current()) == BTE_MACRODEF) { + char* macro; + (void) bt_next_field(it.current(), 0, ¯o); + // FIXME: replace macros within macro definitions! + // lookup lowercase macro in map + c->addMacro(m_macros[QString::fromUtf8(macro)], QString::fromUtf8(bt_macro_text(macro, 0, 0))); + continue; + } + + if(bt_entry_metatype(it.current()) == BTE_COMMENT) { + continue; + } + + // now we're parsing a regular entry + Data::EntryPtr entry = new Data::Entry(ptr); + + str = QString::fromUtf8(bt_entry_type(it.current())); +// kdDebug() << "entry type: " << str << endl; + // text is automatically put into lower-case by btparse + BibtexHandler::setFieldValue(entry, QString::fromLatin1("entry-type"), str); + + str = QString::fromUtf8(bt_entry_key(it.current())); +// kdDebug() << "entry key: " << str << endl; + BibtexHandler::setFieldValue(entry, QString::fromLatin1("key"), str); + + char* name; + AST* field = 0; + while((field = bt_next_field(it.current(), field, &name))) { +// kdDebug() << "\tfound: " << name << endl; +// str = QString::fromLatin1(bt_get_text(field)); + str.truncate(0); + AST* value = 0; + bt_nodetype type; + char* svalue; + bool end_macro = false; + while((value = bt_next_value(field, value, &type, &svalue))) { + switch(type) { + case BTAST_STRING: + case BTAST_NUMBER: + str += BibtexHandler::importText(svalue).simplifyWhiteSpace(); + end_macro = false; + break; + case BTAST_MACRO: + str += QString::fromUtf8(svalue) + '#'; + end_macro = true; + break; + default: + break; + } + } + if(end_macro) { + // remove last character '#' + str.truncate(str.length() - 1); + } + QString fieldName = QString::fromUtf8(name); + if(fieldName == Latin1Literal("author") || fieldName == Latin1Literal("editor")) { + str.replace(QRegExp(QString::fromLatin1("\\sand\\s")), QString::fromLatin1("; ")); + } + BibtexHandler::setFieldValue(entry, fieldName, str); + } + + ptr->addEntries(entry); + + if(showProgress && j%stepSize == 0) { + ProgressManager::self()->setProgress(this, n*100 + 100*j/count); + kapp->processEvents(); + } + } + + if(m_cancelled) { + ptr = 0; + } + + // clean-up + for(ASTListIterator it(m_nodes); it.current(); ++it) { + bt_free_ast(it.current()); + } + + return ptr; +} + +void BibtexImporter::parseText(const QString& text) { + m_nodes.clear(); + m_macros.clear(); + + ushort bt_options = 0; // ushort is defined in btparse.h + boolean ok; // boolean is defined in btparse.h as an int + + // for regular nodes (entries), do NOT convert numbers to strings, do NOT expand macros + bt_set_stringopts(BTE_REGULAR, 0); + bt_set_stringopts(BTE_MACRODEF, 0); +// bt_set_stringopts(BTE_PREAMBLE, BTO_CONVERT | BTO_EXPAND); + + QString entry; + QRegExp rx(QString::fromLatin1("[{}]")); + QRegExp macroName(QString::fromLatin1("@string\\s*\\{\\s*(.*)="), false /*case sensitive*/); + macroName.setMinimal(true); + + bool needsCleanup = false; + int brace = 0; + int startpos = 0; + int pos = text.find(rx, 0); + while(pos > 0 && !m_cancelled) { + if(text[pos] == '{') { + ++brace; + } else if(text[pos] == '}' && brace > 0) { + --brace; + } + if(brace == 0) { + entry = text.mid(startpos, pos-startpos+1).stripWhiteSpace(); + // All the downstream text processing on the AST node will assume utf-8 + AST* node = bt_parse_entry_s(const_cast<char*>(entry.utf8().data()), + const_cast<char*>(url().fileName().local8Bit().data()), + 0, bt_options, &ok); + if(ok && node) { + if(bt_entry_metatype(node) == BTE_MACRODEF && macroName.search(entry) > -1) { + char* macro; + (void) bt_next_field(node, 0, ¯o); + m_macros.insert(QString::fromUtf8(macro), macroName.cap(1).stripWhiteSpace()); + } + m_nodes.append(node); + needsCleanup = true; + } + startpos = pos+1; + } + pos = text.find(rx, pos+1); + } + if(needsCleanup) { + // clean up some structures + bt_parse_entry_s(0, 0, 1, 0, 0); + } +} + +void BibtexImporter::slotCancel() { + m_cancelled = true; +} + +QWidget* BibtexImporter::widget(QWidget* parent_, const char* name_/*=0*/) { + if(m_widget) { + return m_widget; + } + + m_widget = new QWidget(parent_, name_); + QVBoxLayout* l = new QVBoxLayout(m_widget); + + QButtonGroup* box = new QVButtonGroup(i18n("Bibtex Options"), m_widget); + m_readUTF8 = new QRadioButton(i18n("Use Unicode (UTF-8) encoding"), box); + QWhatsThis::add(m_readUTF8, i18n("Read the imported file in Unicode (UTF-8).")); + QString localStr = i18n("Use user locale (%1) encoding").arg( + QString::fromLatin1(QTextCodec::codecForLocale()->name())); + m_readLocale = new QRadioButton(localStr, box); + m_readLocale->setChecked(true); + QWhatsThis::add(m_readLocale, i18n("Read the imported file in the local encoding.")); + + KConfigGroup config(kapp->config(), "Import Options"); + bool useUTF8 = config.readBoolEntry("Bibtex UTF8", false); + if(useUTF8) { + m_readUTF8->setChecked(true); + } else { + m_readLocale->setChecked(true); + } + + l->addWidget(box); + l->addStretch(1); + return m_widget; +} + + +#include "bibteximporter.moc" diff --git a/src/translators/bibteximporter.h b/src/translators/bibteximporter.h new file mode 100644 index 0000000..c17195b --- /dev/null +++ b/src/translators/bibteximporter.h @@ -0,0 +1,90 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef BIBTEXIMPORTER_H +#define BIBTEXIMPORTER_H + +#include <config.h> +#include "importer.h" +#include "../datavectors.h" + +extern "C" { +#ifdef HAVE_LIBBTPARSE +#include <btparse.h> +#else +#include "btparse/btparse.h" +} +#endif + +#include <qptrlist.h> +#include <qmap.h> + +class QRadioButton; + +namespace Tellico { + namespace Import { + +/** + * Bibtex files are used for bibliographies within LaTex. The btparse library is used to + * parse the text and generate a @ref BibtexCollection. + * + * @author Robby Stephenson + */ +class BibtexImporter : public Importer { +Q_OBJECT + +public: + /** + * Initializes the btparse library + * + * @param url The url of the bibtex file + */ + BibtexImporter(const KURL::List& urls); + BibtexImporter(const QString& text); + /* + * Some cleanup is done for the btparse library + */ + virtual ~BibtexImporter(); + + /** + * Returns a pointer to a @ref BibtexCollection created on the stack. All entries + * in the bibtex file are added, including any preamble, all macro strings, and each entry. + * + * @return A pointer to a @ref BibtexCollection, or 0 if none can be created. + */ + virtual Data::CollPtr collection(); + virtual QWidget* widget(QWidget* parent, const char* name=0); + virtual bool canImport(int type) const; + +public slots: + void slotCancel(); + +private: + Data::CollPtr readCollection(const QString& text, int n); + void parseText(const QString& text); + + typedef QPtrList<AST> ASTList; + typedef QPtrListIterator<AST> ASTListIterator; + ASTList m_nodes; + QMap<QString, QString> m_macros; + + Data::CollPtr m_coll; + QWidget* m_widget; + QRadioButton* m_readUTF8; + QRadioButton* m_readLocale; + bool m_cancelled : 1; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/bibtexmlexporter.cpp b/src/translators/bibtexmlexporter.cpp new file mode 100644 index 0000000..4a0a4d3 --- /dev/null +++ b/src/translators/bibtexmlexporter.cpp @@ -0,0 +1,182 @@ +/************************************************************************* + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include <config.h> + +#include "bibtexmlexporter.h" +#include "bibtexhandler.h" +#include "../document.h" +#include "../collections/bibtexcollection.h" +#include "../latin1literal.h" +#include "../filehandler.h" +#include "tellico_xml.h" +#include "../stringset.h" + +#include <klocale.h> +#include <kdebug.h> + +#include <qvbox.h> +#include <qdom.h> +#include <qregexp.h> +#include <qtextcodec.h> + +using Tellico::Export::BibtexmlExporter; + +QString BibtexmlExporter::formatString() const { + return i18n("Bibtexml"); +} + +QString BibtexmlExporter::fileFilter() const { + return i18n("*.xml|Bibtexml Files (*.xml)") + QChar('\n') + i18n("*|All Files"); +} + +bool BibtexmlExporter::exec() { + Data::CollPtr c = collection(); + if(!c || c->type() != Data::Collection::Bibtex) { + return false; + } + const Data::BibtexCollection* coll = static_cast<const Data::BibtexCollection*>(c.data()); + +// there are some special fields +// the entry-type specifies the entry type - book, inproceedings, whatever + QString typeField; +// the key specifies the cite-key + QString keyField; + + const QString bibtex = QString::fromLatin1("bibtex"); +// keep a list of all the 'ordinary' fields to iterate through later + Data::FieldVec fields; + Data::FieldVec vec = coll->fields(); + for(Data::FieldVec::Iterator it = vec.begin(); it != vec.end(); ++it) { + QString bibtexField = it->property(bibtex); + if(bibtexField == Latin1Literal("entry-type")) { + typeField = it->name(); + } else if(bibtexField == Latin1Literal("key")) { + keyField = it->name(); + } else if(!bibtexField.isEmpty()) { + fields.append(it); + } + } + + QDomImplementation impl; + QDomDocumentType doctype = impl.createDocumentType(QString::fromLatin1("file"), + QString::null, + XML::dtdBibtexml); + //default namespace + const QString& ns = XML::nsBibtexml; + + QDomDocument dom = impl.createDocument(ns, QString::fromLatin1("file"), doctype); + + // root element + QDomElement root = dom.documentElement(); + + QString encodeStr = QString::fromLatin1("version=\"1.0\" encoding=\""); + if(options() & Export::ExportUTF8) { + encodeStr += QString::fromLatin1("UTF-8"); + } else { + encodeStr += QString::fromLatin1(QTextCodec::codecForLocale()->mimeName()); + } + encodeStr += '"'; + + // createDocument creates a root node, insert the processing instruction before it + dom.insertBefore(dom.createProcessingInstruction(QString::fromLatin1("xml"), encodeStr), root); + QString comment = QString::fromLatin1("Generated by Tellico ") + QString::fromLatin1(VERSION); + dom.insertBefore(dom.createComment(comment), root); + + Data::ConstFieldPtr field; + Data::FieldVec::ConstIterator fIt, end = fields.constEnd(); + bool format = options() & Export::ExportFormatted; + + StringSet usedKeys; + QString type, key, newKey, value, elemName, parElemName; + QDomElement btElem, entryElem, parentElem, fieldElem; + for(Data::EntryVec::ConstIterator entryIt = entries().begin(); entryIt != entries().end(); ++entryIt) { + key = entryIt->field(keyField); + if(key.isEmpty()) { + key = BibtexHandler::bibtexKey(entryIt.data()); + } + QString newKey = key; + char c = 'a'; + while(usedKeys.has(newKey)) { + // duplicate found! + newKey = key + c; + ++c; + } + key = newKey; + usedKeys.add(key); + + btElem = dom.createElement(QString::fromLatin1("entry")); + btElem.setAttribute(QString::fromLatin1("id"), key); + root.appendChild(btElem); + + type = entryIt->field(typeField); + if(type.isEmpty()) { + kdWarning() << "BibtexmlExporter::exec() - the entry for '" << entryIt->title() + << "' has no entry-type, skipping it!" << endl; + continue; + } + + entryElem = dom.createElement(type); + btElem.appendChild(entryElem); + + // now iterate over attributes + for(fIt = fields.constBegin(); fIt != end; ++fIt) { + field = fIt.data(); + value = entryIt->field(field->name(), format); + if(value.isEmpty()) { + continue; + } + +/* Bibtexml has special container elements for titles, authors, editors, and keywords + I'm going to ignore the titlelist element for right now. All authors are contained in + an authorlist element, editors in an editorlist element, and keywords are in a + keywords element, and themselves as a keyword. Also, Bibtexml can format names + similar to docbook, with first, middle, last, etc elements. I'm going to ignore that + for now, too.*/ + elemName = field->property(bibtex); + // split text for author, editor, and keywords + if(elemName == Latin1Literal("author") || + elemName == Latin1Literal("editor") || + elemName == Latin1Literal("keywords")) { + if(elemName == Latin1Literal("author")) { + parElemName = QString::fromLatin1("authorlist"); + } else if(elemName == Latin1Literal("editor")) { + parElemName = QString::fromLatin1("editorlist"); + } else { // keywords + parElemName = QString::fromLatin1("keywords"); + elemName = QString::fromLatin1("keyword"); + } + + parentElem = dom.createElement(parElemName); + const QStringList values = entryIt->fields(field->name(), false); + for(QStringList::ConstIterator it = values.begin(); it != values.end(); ++it) { + fieldElem = dom.createElement(elemName); + fieldElem.appendChild(dom.createTextNode(*it)); + parentElem.appendChild(fieldElem); + } + if(parentElem.hasChildNodes()) { + entryElem.appendChild(parentElem); + } + } else { + fieldElem = dom.createElement(elemName); + fieldElem.appendChild(dom.createTextNode(value)); + entryElem.appendChild(fieldElem); + } + } + } + + return FileHandler::writeTextURL(url(), dom.toString(), + options() & ExportUTF8, options() & Export::ExportForce); +} + +#include "bibtexmlexporter.moc" diff --git a/src/translators/bibtexmlexporter.h b/src/translators/bibtexmlexporter.h new file mode 100644 index 0000000..8f63a55 --- /dev/null +++ b/src/translators/bibtexmlexporter.h @@ -0,0 +1,41 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef BIBTEXMLEXPORTER_H +#define BIBTEXMLEXPORTER_H + +#include "exporter.h" + +namespace Tellico { + namespace Export { + +/** + * @author Robby Stephenson + */ +class BibtexmlExporter : public Exporter { +Q_OBJECT + +public: + BibtexmlExporter() : Exporter() {} + + virtual bool exec(); + virtual QString formatString() const; + virtual QString fileFilter() const; + + // no options + virtual QWidget* widget(QWidget*, const char*) { return 0; } +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/bibtexmlimporter.cpp b/src/translators/bibtexmlimporter.cpp new file mode 100644 index 0000000..2feb2f2 --- /dev/null +++ b/src/translators/bibtexmlimporter.cpp @@ -0,0 +1,163 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "bibtexmlimporter.h" +#include "tellico_xml.h" +#include "bibtexhandler.h" +#include "../collections/bibtexcollection.h" +#include "../field.h" +#include "../entry.h" +#include "../latin1literal.h" +#include "../tellico_strings.h" +#include "../progressmanager.h" +#include "../tellico_debug.h" + +#include <kapplication.h> + +using Tellico::Import::BibtexmlImporter; + +bool BibtexmlImporter::canImport(int type) const { + return type == Data::Collection::Bibtex; +} + +Tellico::Data::CollPtr BibtexmlImporter::collection() { + if(!m_coll) { + loadDomDocument(); + } + return m_coll; +} + +void BibtexmlImporter::loadDomDocument() { + QDomElement root = domDocument().documentElement(); + if(root.isNull() || root.localName() != Latin1Literal("file")) { + setStatusMessage(i18n(errorLoad).arg(url().fileName())); + return; + } + + const QString& ns = XML::nsBibtexml; + m_coll = new Data::BibtexCollection(true); + + QDomNodeList entryelems = root.elementsByTagNameNS(ns, QString::fromLatin1("entry")); +// kdDebug() << "BibtexmlImporter::loadDomDocument - found " << entryelems.count() << " entries" << endl; + + const uint count = entryelems.count(); + const uint stepSize = QMAX(s_stepSize, count/100); + const bool showProgress = options() & ImportProgress; + + ProgressItem& item = ProgressManager::self()->newProgressItem(this, progressLabel(), true); + item.setTotalSteps(count); + connect(&item, SIGNAL(signalCancelled(ProgressItem*)), SLOT(slotCancel())); + ProgressItem::Done done(this); + + for(uint j = 0; !m_cancelled && j < entryelems.count(); ++j) { + readEntry(entryelems.item(j)); + + if(showProgress && j%stepSize == 0) { + ProgressManager::self()->setProgress(this, j); + kapp->processEvents(); + } + } // end entry loop +} + +void BibtexmlImporter::readEntry(const QDomNode& entryNode_) { + QDomNode node = const_cast<QDomNode&>(entryNode_); + + Data::EntryPtr entry = new Data::Entry(m_coll); + +/* The Bibtexml format looks like + <entry id="..."> + <book> + <authorlist> + <author>...</author> + </authorlist> + <publisher>...</publisher> */ + + QString type = node.firstChild().toElement().tagName(); + entry->setField(QString::fromLatin1("entry-type"), type); + QString id = node.toElement().attribute(QString::fromLatin1("id")); + entry->setField(QString::fromLatin1("bibtex-key"), id); + + QString name, value; + // field values are first child of first child of entry node + for(QDomNode n = node.firstChild().firstChild(); !n.isNull(); n = n.nextSibling()) { + // n could be something like authorlist, with multiple authors, or just + // a plain element with a single text child... + // second case first + if(n.firstChild().isText()) { + name = n.toElement().tagName(); + value = n.toElement().text(); + } else { + // is either titlelist, authorlist, editorlist, or keywords + QString parName = n.toElement().tagName(); + if(parName == Latin1Literal("titlelist")) { + for(QDomNode n2 = node.firstChild(); !n2.isNull(); n2 = n2.nextSibling()) { + name = n2.toElement().tagName(); + value = n2.toElement().text(); + if(!name.isEmpty() && !value.isEmpty()) { + BibtexHandler::setFieldValue(entry, name, value.simplifyWhiteSpace()); + } + } + name.truncate(0); + value.truncate(0); + } else { + name = n.firstChild().toElement().tagName(); + if(name == Latin1Literal("keyword")) { + name = QString::fromLatin1("keywords"); + } + value.truncate(0); + for(QDomNode n2 = n.firstChild(); !n2.isNull(); n2 = n2.nextSibling()) { + // n2 could have first, middle, lastname elements... + if(name == Latin1Literal("person")) { + QStringList names; + names << QString::fromLatin1("initials") << QString::fromLatin1("first") + << QString::fromLatin1("middle") << QString::fromLatin1("prelast") + << QString::fromLatin1("last") << QString::fromLatin1("lineage"); + for(QStringList::ConstIterator it = names.begin(); it != names.end(); ++it) { + QDomNodeList list = n2.toElement().elementsByTagName(*it); + if(list.count() > 1) { + value += list.item(0).toElement().text(); + } + if(*it != names.last()) { + value += QString::fromLatin1(" "); + } + } + } + for(QDomNode n3 = n2.firstChild(); !n3.isNull(); n3 = n3.nextSibling()) { + if(n3.isElement()) { + value += n3.toElement().text(); + } else if(n3.isText()) { + value += n3.toText().data(); + } + if(n3 != n2.lastChild()) { + value += QString::fromLatin1(" "); + } + } + if(n2 != n.lastChild()) { + value += QString::fromLatin1("; "); + } + } + } + } + if(!name.isEmpty() && !value.isEmpty()) { + BibtexHandler::setFieldValue(entry, name, value.simplifyWhiteSpace()); + } + } + + m_coll->addEntries(entry); +} + +void BibtexmlImporter::slotCancel() { + m_cancelled = true; +} + +#include "bibtexmlimporter.moc" diff --git a/src/translators/bibtexmlimporter.h b/src/translators/bibtexmlimporter.h new file mode 100644 index 0000000..826ea30 --- /dev/null +++ b/src/translators/bibtexmlimporter.h @@ -0,0 +1,54 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef BIBTEXMLIMPORTER_H +#define BIBTEXMLIMPORTER_H + +#include "xmlimporter.h" +#include "../datavectors.h" + +class QDomNode; + +namespace Tellico { + namespace Import { + +/** + *@author Robby Stephenson + */ +class BibtexmlImporter : public XMLImporter { +Q_OBJECT + +public: + /** + */ + BibtexmlImporter(const KURL& url) : Import::XMLImporter(url), m_coll(0), m_cancelled(false) {} + + /** + */ + virtual Data::CollPtr collection(); + virtual bool canImport(int type) const; + +public slots: + void slotCancel(); + +private: + void loadDomDocument(); + void readEntry(const QDomNode& entryNode); + + Data::CollPtr m_coll; + bool m_cancelled : 1; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/btparse/Makefile.am b/src/translators/btparse/Makefile.am new file mode 100644 index 0000000..84af63b --- /dev/null +++ b/src/translators/btparse/Makefile.am @@ -0,0 +1,18 @@ +####### kdevelop will overwrite this part!!! (begin)########## +if !USE_LIBBTPARSE + +noinst_LIBRARIES = libbtparse.a + +AM_CPPFLAGS = $(all_includes) + +libbtparse_a_METASOURCES = AUTO + +libbtparse_a_SOURCES = bibtex_ast.c bibtex.c err.c ast.c scan.c util.c lex_auxiliary.c parse_auxiliary.c format_name.c string_util.c tex_tree.c names.c modify.c traversal.c sym.c macros.c error.c postprocess.c input.c init.c + +endif + +EXTRA_DIST = btparse.h init.c stdpccts.h attrib.h lex_auxiliary.h error.h parse_auxiliary.h prototypes.h tokens.h mode.h input.c postprocess.c error.c macros.c sym.h sym.c bt_debug.h traversal.c modify.c names.c my_alloca.h tex_tree.c string_util.c format_name.c antlr.h ast.h btconfig.h dlgdef.h parse_auxiliary.c lex_auxiliary.c util.c scan.c dlgauto.h ast.c err.h err.c bibtex.c bibtex_ast.c + +####### kdevelop will overwrite this part!!! (end)############ + +KDE_OPTIONS = noautodist diff --git a/src/translators/btparse/antlr.h b/src/translators/btparse/antlr.h new file mode 100644 index 0000000..f52aba6 --- /dev/null +++ b/src/translators/btparse/antlr.h @@ -0,0 +1,561 @@ +/* antlr.h + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ +#ifndef ANTLR_H +#define ANTLR_H + +#include "btconfig.h" + +/* + * Define all of the stack setup and manipulation of $i, #i variables. + * + * Notes: + * The type 'Attrib' must be defined before entry into this .h file. + */ + +#include <stdlib.h> +#include <string.h> + +typedef int ANTLRTokenType; +typedef unsigned char SetWordType; + +typedef char ANTLRChar; + + /* G u e s s S t u f f */ + +#ifdef ZZCAN_GUESS +#ifndef ZZINF_LOOK +#define ZZINF_LOOK +#endif +#endif + +#ifdef ZZCAN_GUESS +typedef struct _zzjmp_buf { + jmp_buf state; + } zzjmp_buf; +#endif + + +/* can make this a power of 2 for more efficient lookup */ +#ifndef ZZLEXBUFSIZE +#define ZZLEXBUFSIZE 2000 +#endif + +#define zzOvfChk \ + if ( zzasp <= 0 ) \ + { \ + fprintf(stderr, zzStackOvfMsg, __FILE__, __LINE__); \ + exit(PCCTS_EXIT_FAILURE); \ + } + +#ifndef ZZA_STACKSIZE +#define ZZA_STACKSIZE 400 +#endif +#ifndef ZZAST_STACKSIZE +#define ZZAST_STACKSIZE 400 +#endif + +#ifndef zzfailed_pred +#define zzfailed_pred(_p) \ + fprintf(stderr, "semantic error; failed predicate: '%s'\n",_p) +#endif + +#ifdef LL_K +#define LOOKAHEAD \ + int zztokenLA[LL_K]; \ + char zztextLA[LL_K][ZZLEXBUFSIZE]; \ + int zzlap = 0, zzlabase=0; /* labase only used for DEMAND_LOOK */ +#else +#define LOOKAHEAD \ + int zztoken; +#endif + +#ifndef zzcr_ast +#define zzcr_ast(ast,attr,tok,text) +#endif + +#ifdef DEMAND_LOOK +#define DemandLookData int zzdirty=1; +#else +#define DemandLookData +#endif + + /* S t a t e S t u f f */ + +#ifdef ZZCAN_GUESS +#define zzGUESS_BLOCK zzantlr_state zzst; int zzrv; +#define zzGUESS zzsave_antlr_state(&zzst); \ + zzguessing = 1; \ + zzrv = setjmp(zzguess_start.state); +#define zzGUESS_FAIL longjmp(zzguess_start.state, 1) +#define zzGUESS_DONE zzrestore_antlr_state(&zzst); +#define zzNON_GUESS_MODE if ( !zzguessing ) +#define zzGuessData \ + zzjmp_buf zzguess_start; \ + int zzguessing; +#else +#define zzGUESS_BLOCK +#define zzGUESS +#define zzGUESS_FAIL +#define zzGUESS_DONE +#define zzNON_GUESS_MODE +#define zzGuessData +#endif + +typedef struct _zzantlr_state { +#ifdef ZZCAN_GUESS + zzjmp_buf guess_start; + int guessing; +#endif + int asp; + int ast_sp; +#ifdef ZZINF_LOOK + int inf_lap; /* not sure we need to save this one */ + int inf_labase; + int inf_last; +#endif +#ifdef DEMAND_LOOK + int dirty; +#endif + +#ifdef LL_K + int tokenLA[LL_K]; + char textLA[LL_K][ZZLEXBUFSIZE]; + int lap; + int labase; +#else + int token; + char text[ZZLEXBUFSIZE]; +#endif + } zzantlr_state; + + + /* I n f i n i t e L o o k a h e a d */ + + +#ifdef ZZINF_LOOK +#define InfLookData \ + int *zzinf_tokens; \ + char **zzinf_text; \ + char *zzinf_text_buffer; \ + int *zzinf_line; \ + int zzinf_labase; \ + int zzinf_last; +#else +#define InfLookData +#endif + +#ifdef ZZINF_LOOK + +#ifndef ZZINF_DEF_TEXT_BUFFER_SIZE +#define ZZINF_DEF_TEXT_BUFFER_SIZE 20000 +#endif +#ifndef ZZINF_DEF_TOKEN_BUFFER_SIZE +#define ZZINF_DEF_TOKEN_BUFFER_SIZE 2000 +#endif +/* WARNING!!!!!! + * ZZINF_BUFFER_TEXT_CHUNK_SIZE must be > sizeof(text) largest possible token. + */ +#ifndef ZZINF_BUFFER_TEXT_CHUNK_SIZE +#define ZZINF_BUFFER_TEXT_CHUNK_SIZE 5000 +#endif +#ifndef ZZINF_BUFFER_TOKEN_CHUNK_SIZE +#define ZZINF_BUFFER_TOKEN_CHUNK_SIZE 1000 +#endif + +#if ZZLEXBUFSIZE > ZZINF_BUFFER_TEXT_CHUNK_SIZE +#define ZZINF_BUFFER_TEXT_CHUNK_SIZE ZZLEXBUFSIZE+5 +#endif + +/* make inf_look user-access macros */ +#ifdef LL_K +#define ZZINF_LA_VALID(i) (((zzinf_labase+i-1)-LL_K+1) <= zzinf_last) +#define ZZINF_LA(i) zzinf_tokens[(zzinf_labase+i-1)-LL_K+1] +#define ZZINF_LATEXT(i) zzinf_text[(zzinf_labase+i-1)-LL_K+1] +/* #define ZZINF_LINE(i) zzinf_line[(zzinf_labase+i-1)-LL_K+1]*/ +#else +#define ZZINF_LA_VALID(i) (((zzinf_labase+i-1)) <= zzinf_last) +#define ZZINF_LA(i) zzinf_tokens[(zzinf_labase+i-1)] +#define ZZINF_LATEXT(i) zzinf_text[(zzinf_labase+i-1)] +#endif + +#define inf_zzgettok _inf_zzgettok() +extern void _inf_zzgettok(); + +#endif /* ZZINF_LOOK */ + + +#ifdef LL_K + +#define ANTLR_INFO \ + Attrib zzempty_attr(void) {static Attrib a; return a;} \ + Attrib zzconstr_attr(int _tok, char *_text)\ + {Attrib a; zzcr_attr((&a),_tok,_text); return a;} \ + int zzasp=ZZA_STACKSIZE; \ + char zzStackOvfMsg[]="fatal: attrib/AST stack overflow %s(%d)!\n"; \ + Attrib zzaStack[ZZA_STACKSIZE]; DemandLookData \ + InfLookData \ + zzGuessData + +#else + +#define ANTLR_INFO \ + Attrib zzempty_attr(void) {static Attrib a; return a;} \ + Attrib zzconstr_attr(int _tok, char *_text)\ + {Attrib a; zzcr_attr((&a),_tok,_text); return a;} \ + int zzasp=ZZA_STACKSIZE; \ + char zzStackOvfMsg[]="fatal: attrib/AST stack overflow %s(%d)!\n"; \ + Attrib zzaStack[ZZA_STACKSIZE]; DemandLookData \ + InfLookData \ + zzGuessData + +#endif /* LL_k */ + + +#ifdef ZZINF_LOOK + +#ifdef LL_K +#ifdef DEMAND_LOOK +#define zzPrimeLookAhead {zzdirty=LL_K; zzlap = zzlabase = 0;} +#else +#define zzPrimeLookAhead {zzlap = zzlabase = 0; zzfill_inf_look();\ + {int _i; for(_i=1;_i<=LL_K; _i++) \ + {zzCONSUME;} zzlap = zzlabase = 0;}} +#endif + +#else /* LL_K */ + +#ifdef DEMAND_LOOK +#define zzPrimeLookAhead zzfill_inf_look(); zzdirty=1 +#else +#define zzPrimeLookAhead zzfill_inf_look(); inf_zzgettok + +#endif +#endif /* LL_K */ + +#else /* ZZINF_LOOK */ + +#ifdef LL_K +#ifdef DEMAND_LOOK +#define zzPrimeLookAhead {zzdirty=LL_K; zzlap = zzlabase = 0;} +#else +#define zzPrimeLookAhead {int _i; zzlap = 0; for(_i=1;_i<=LL_K; _i++) \ + {zzCONSUME;} zzlap = 0;} +#endif + +#else + +#ifdef DEMAND_LOOK +#define zzPrimeLookAhead zzdirty=1 +#else +#define zzPrimeLookAhead zzgettok() +#endif +#endif /* LL_K */ + +#endif /* ZZINF_LOOK */ + + +#ifdef LL_K +#define zzenterANTLRs(s) \ + zzlextext = &(zztextLA[0][0]); zzrdstr( s ); zzPrimeLookAhead; +#define zzenterANTLRf(f) \ + zzlextext = &(zztextLA[0][0]); zzrdfunc( f ); zzPrimeLookAhead; +#define zzenterANTLR(f) \ + zzlextext = &(zztextLA[0][0]); zzrdstream( f ); zzPrimeLookAhead; +#ifdef ZZINF_LOOK +#define zzleaveANTLR(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line); +#define zzleaveANTLRf(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line); +#define zzleaveANTLRs(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line); +#else +#define zzleaveANTLR(f) +#define zzleaveANTLRf(f) +#define zzleaveANTLRs(f) +#endif + +#else + +#define zzenterANTLRs(s) \ + {static char zztoktext[ZZLEXBUFSIZE]; \ + zzlextext = zztoktext; zzrdstr( s ); zzPrimeLookAhead;} +#define zzenterANTLRf(f) \ + {static char zztoktext[ZZLEXBUFSIZE]; \ + zzlextext = zztoktext; zzrdfunc( f ); zzPrimeLookAhead;} +#define zzenterANTLR(f) \ + {static char zztoktext[ZZLEXBUFSIZE]; \ + zzlextext = zztoktext; zzrdstream( f ); zzPrimeLookAhead;} +#ifdef ZZINF_LOOK +#define zzleaveANTLR(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line); +#define zzleaveANTLRf(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line); +#define zzleaveANTLRs(f) free(zzinf_text_buffer); free(zzinf_text); free(zzinf_tokens); free(zzinf_line); +#else +#define zzleaveANTLR(f) +#define zzleaveANTLRf(f) +#define zzleaveANTLRs(f) +#endif + +#endif + +#define ANTLR(st, f) zzbufsize = ZZLEXBUFSIZE; \ + zzenterANTLR(f); \ + st; ++zzasp; \ + zzleaveANTLR(f); + +#define ANTLRm(st, f, _m) zzbufsize = ZZLEXBUFSIZE; \ + zzmode(_m); \ + zzenterANTLR(f); \ + st; ++zzasp; \ + zzleaveANTLR(f); + +#define ANTLRf(st, f) zzbufsize = ZZLEXBUFSIZE; \ + zzenterANTLRf(f); \ + st; ++zzasp; \ + zzleaveANTLRf(f); + +#define ANTLRs(st, s) zzbufsize = ZZLEXBUFSIZE; \ + zzenterANTLRs(s); \ + st; ++zzasp; \ + zzleaveANTLRs(s); + +#ifdef LL_K +#define zztext (&(zztextLA[zzlap][0])) +#else +#define zztext zzlextext +#endif + + + /* A r g u m e n t A c c e s s */ + +#define zzaCur (zzaStack[zzasp]) +#define zzaRet (*zzaRetPtr) +#define zzaArg(v,n) zzaStack[v-n] +#define zzMakeAttr { zzNON_GUESS_MODE {zzOvfChk; --zzasp; zzcr_attr(&(zzaStack[zzasp]),LA(1),LATEXT(1));}} +#ifdef zzdef0 +#define zzMake0 { zzOvfChk; --zzasp; zzdef0(&(zzaStack[zzasp]));} +#else +#define zzMake0 { zzOvfChk; --zzasp;} +#endif +#define zzaPush(_v) { zzOvfChk; zzaStack[--zzasp] = _v;} +#ifndef zzd_attr +#define zzREL(t) zzasp=(t); /* Restore state of stack */ +#else +#define zzREL(t) for (; zzasp<(t); zzasp++) \ + { zzd_attr(&(zzaStack[zzasp])); } +#endif + +#define zzsetmatch(_es) \ + if ( !_zzsetmatch(_es, &zzBadText, &zzMissText, &zzMissTok, &zzBadTok, &zzMissSet) ) goto fail; +#define zzsetmatch_wsig(_es, handler) \ + if ( !_zzsetmatch_wsig(_es) ) {_signal=MismatchedToken; goto handler;} + +extern int _zzsetmatch(SetWordType *, char **, char **, int *, int *, SetWordType **); +extern int _zzsetmatch_wsig(SetWordType *); + +#define zzmatch(_t) \ + if ( !_zzmatch(_t, &zzBadText, &zzMissText, &zzMissTok, &zzBadTok, &zzMissSet) ) goto fail; +#define zzmatch_wsig(_t,handler) \ + if ( !_zzmatch_wsig(_t) ) {_signal=MismatchedToken; goto handler;} + +extern int _zzmatch(int, const char **, const char **, int *, int *, SetWordType **); +extern int _zzmatch_wsig(int); + +#define zzmatch_wdfltsig(_t,_f) \ + if ( !_zzmatch_wdfltsig(_t,_f) ) _signal=MismatchedToken; +#define zzsetmatch_wdfltsig(tw,tt,wf) \ + if ( !_zzsetmatch_wdfltsig(tw,tt,wf) ) _signal=MismatchedToken; + +extern int _zzmatch_wdfltsig(int, SetWordType *); +extern int _zzsetmatch_wdfltsig(SetWordType *tokensWanted, + int tokenTypeOfSet, + SetWordType *whatFollows); + +#ifdef GENAST +#define zzRULE Attrib *zzaRetPtr = &(zzaStack[zzasp-1]); \ + SetWordType *zzMissSet=NULL; int zzMissTok=0; \ + int zzBadTok=0; const char *zzBadText=""; \ + int zzErrk=1; \ + const char *zzMissText=""; zzASTVars +#else +#define zzRULE Attrib *zzaRetPtr = &(zzaStack[zzasp-1]); \ + int zzBadTok=0; const char *zzBadText=""; \ + int zzErrk=1; \ + SetWordType *zzMissSet=NULL; int zzMissTok=0; const char *zzMissText="" +#endif + +#ifdef GENAST +#define zzBLOCK(i) int i = zzasp - 1; int zztsp = zzast_sp +#define zzEXIT(i) zzREL(i); zzastREL; zzNON_GUESS_MODE { zzastPush(*_root); } +#define zzLOOP(i) zzREL(i); zzastREL +#else +#define zzBLOCK(i) int i = zzasp - 1 +#define zzEXIT(i) zzREL(i) +#define zzLOOP(i) zzREL(i) +#endif + +#ifdef LL_K + +#ifdef DEMAND_LOOK +#define LOOK(_k) {int i,stop=_k-(LL_K-zzdirty); for (i=1; i<=stop; i++) \ + zzCONSUME;} +#define zzCONSUME {zzgettok(); zzdirty--; \ + zzlap = (zzlap+1)&(LL_K-1); \ + zzlextext = &(zztextLA[zzlap][0]);} +#else +#ifdef ZZINF_LOOK +#define zzCONSUME {inf_zzgettok; \ + zzlap = (zzlap+1)&(LL_K-1); \ + zzlextext = &(zztextLA[zzlap][0]); \ + } +#else +#define zzCONSUME {zzgettok(); \ + zzlap = (zzlap+1)&(LL_K-1); \ + zzlextext = &(zztextLA[zzlap][0]);} +#endif /* ZZINF_LOOK */ +#endif /* DEMAND_LOOK */ + +#else /* LL_K */ + +#ifdef DEMAND_LOOK +#define LOOK(_k) if ( zzdirty) zzCONSUME; +#ifdef ZZINF_LOOK +#define zzCONSUME inf_zzgettok; zzdirty=0; +#else +#define zzCONSUME zzgettok(); zzdirty=0; +#endif /* ZZINF_LOOK */ + +#else /* DEMAND_LOOK */ + +#ifdef ZZINF_LOOK +#define zzCONSUME inf_zzgettok +#else +#define zzCONSUME zzgettok(); +#endif + +#endif /* DEMAND_LOOK */ + +#endif /* LL_K */ + +#ifdef LL_K +#define NLA zztokenLA[zzlap&(LL_K-1)] /* --> next LA */ +#define NLATEXT zztextLA[zzlap&(LL_K-1)] /* --> next text of LA */ +#ifdef DEMAND_LOOK +#define LA(i) zztokenLA[(zzlabase+(i)-1)&(LL_K-1)] +#define LATEXT(i) (&(zztextLA[(zzlabase+(i)-1)&(LL_K-1)][0])) +#else +#define LA(i) zztokenLA[(zzlap+(i)-1)&(LL_K-1)] +#define LATEXT(i) (&(zztextLA[(zzlap+(i)-1)&(LL_K-1)][0])) +#endif +#else +#define NLA zztoken +#define NLATEXT zztext +#define LA(i) zztoken +#define LATEXT(i) zztext +#endif + + + /* S t a n d a r d S i g n a l s */ + +#define NoSignal 0 +#define MismatchedToken 1 +#define NoViableAlt 2 +#define NoSemViableAlt 3 + + + /* F u n c t i o n T r a c i n g */ + +#ifndef zzTRACEIN +#define zzTRACEIN(r) fprintf(stderr, "enter rule \"%s\"\n", r); +#endif +#ifndef zzTRACEOUT +#define zzTRACEOUT(r) fprintf(stderr, "exit rule \"%s\"\n", r); +#endif + +#ifdef ZZWCHAR_T +#define zzchar_t unsigned wchar_t +#else +#define zzchar_t unsigned char +#endif + + /* E x t e r n D e f s */ + +extern Attrib zzempty_attr(void); +extern Attrib zzconstr_attr(int, char *); +extern void zzsyn(const char *, int, char *, SetWordType *, int, int, const char *); +extern int zzset_el(unsigned, SetWordType *); +extern int zzset_deg(SetWordType *); +extern void zzedecode(SetWordType *); +extern void zzFAIL(int k, ...); +extern void zzresynch(SetWordType *, SetWordType); +extern void zzsave_antlr_state(zzantlr_state *); +extern void zzrestore_antlr_state(zzantlr_state *); +extern void zzfill_inf_look(void); +#ifdef EXCEPTION_HANDLING +extern void zzdflthandlers(int, int *); +#endif + + /* G l o b a l V a r i a b l e s */ + +/* Define a parser; user should do a "#parser myname" in their grammar file */ +/*extern struct pccts_parser zzparser;*/ + +extern const char *zztokens[]; +#ifdef LL_K +extern int zztokenLA[]; +extern char zztextLA[][ZZLEXBUFSIZE]; +extern int zzlap; +extern int zzlabase; +#else +extern int zztoken; +#endif + +extern char zzStackOvfMsg[]; +extern int zzasp; +extern Attrib zzaStack[]; +#ifdef ZZINF_LOOK +extern int *zzinf_tokens; +extern char **zzinf_text; +extern char *zzinf_text_buffer; +extern int *zzinf_line; +extern int zzinf_labase; +extern int zzinf_last; +#endif +#ifdef DEMAND_LOOK +extern int zzdirty; +#endif +#ifdef ZZCAN_GUESS +extern int zzguessing; +extern zzjmp_buf zzguess_start; +#endif + +/* Define global veriables that refer to values exported by the scanner. + * These declarations duplicate those in dlgdef.h, but are needed + * if ANTLR is not to generate a .dlg file (-gx); PS, this is a hack. + */ +extern zzchar_t *zzlextext; /* text of most recently matched token */ +extern int zzbufsize; /* how long zzlextext is */ + +#endif diff --git a/src/translators/btparse/ast.c b/src/translators/btparse/ast.c new file mode 100644 index 0000000..d433f79 --- /dev/null +++ b/src/translators/btparse/ast.c @@ -0,0 +1,227 @@ +/* Abstract syntax tree manipulation functions + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ +#include <stdarg.h> +#include <stdio.h> + +#include "ast.h" +#include "attrib.h" +#include "antlr.h" + +/* ensure that tree manipulation variables are current after a rule + * reference + */ +void +zzlink(AST **_root, AST **_sibling, AST **_tail) +{ + if ( *_sibling == NULL ) return; + if ( *_root == NULL ) *_root = *_sibling; + else if ( *_root != *_sibling ) (*_root)->down = *_sibling; + if ( *_tail==NULL ) *_tail = *_sibling; + while ( (*_tail)->right != NULL ) *_tail = (*_tail)->right; +} + +AST * +zzastnew(void) +{ + AST *p = (AST *) calloc(1, sizeof(AST)); + if ( p == NULL ) fprintf(stderr,"%s(%d): cannot allocate AST node\n",__FILE__,__LINE__); + return p; +} + +/* add a child node to the current sibling list */ +void +zzsubchild(AST **_root, AST **_sibling, AST **_tail) +{ + AST *n; + zzNON_GUESS_MODE { + n = zzastnew(); +#ifdef DEMAND_LOOK + zzcr_ast(n, &(zzaCur), LA(0), LATEXT(0)); +#else + zzcr_ast(n, &(zzaCur), LA(1), LATEXT(1)); +#endif + zzastPush( n ); + if ( *_tail != NULL ) (*_tail)->right = n; + else { + *_sibling = n; + if ( *_root != NULL ) (*_root)->down = *_sibling; + } + *_tail = n; + if ( *_root == NULL ) *_root = *_sibling; + } +} + +/* make a new AST node. Make the newly-created + * node the root for the current sibling list. If a root node already + * exists, make the newly-created node the root of the current root. + */ +void +zzsubroot(AST **_root, AST **_sibling, AST **_tail) +{ + AST *n; + zzNON_GUESS_MODE { + n = zzastnew(); +#ifdef DEMAND_LOOK + zzcr_ast(n, &(zzaCur), LA(0), LATEXT(0)); +#else + zzcr_ast(n, &(zzaCur), LA(1), LATEXT(1)); +#endif + zzastPush( n ); + if ( *_root != NULL ) + if ( (*_root)->down == *_sibling ) *_sibling = *_tail = *_root; + *_root = n; + (*_root)->down = *_sibling; + } +} + +/* Apply function to root then each sibling + * example: print tree in child-sibling LISP-format (AST has token field) + * + * void show(tree) + * AST *tree; + * { + * if ( tree == NULL ) return; + * printf(" %s", zztokens[tree->token]); + * } + * + * void before() { printf(" ("); } + * void after() { printf(" )"); } + * + * LISPdump() { zzpre_ast(tree, show, before, after); } + * + */ +void +zzpre_ast( + AST *tree, + void (*func)(AST *), /* apply this to each tree node */ + void (*before)(AST *), /* apply this to root of subtree before preordering it */ + void (*after)(AST *)) /* apply this to root of subtree after preordering it */ +{ + while ( tree!= NULL ) + { + if ( tree->down != NULL ) (*before)(tree); + (*func)(tree); + zzpre_ast(tree->down, func, before, after); + if ( tree->down != NULL ) (*after)(tree); + tree = tree->right; + } +} + +/* free all AST nodes in tree; apply func to each before freeing */ +void +zzfree_ast(AST *tree) +{ + if ( tree == NULL ) return; + zzfree_ast( tree->down ); + zzfree_ast( tree->right ); + zztfree( tree ); +} + +/* build a tree (root child1 child2 ... NULL) + * If root is NULL, simply make the children siblings and return ptr + * to 1st sibling (child1). If root is not single node, return NULL. + * + * Siblings that are actually siblins lists themselves are handled + * correctly. For example #( NULL, #( NULL, A, B, C), D) results + * in the tree ( NULL A B C D ). + * + * Requires at least two parameters with the last one being NULL. If + * both are NULL, return NULL. + */ +AST *zztmake(AST *rt, ...) +{ + va_list ap; + register AST *child, *sibling=NULL, *tail, *w; + AST *root; + + va_start(ap, rt); + root = rt; + + if ( root != NULL ) + if ( root->down != NULL ) return NULL; + child = va_arg(ap, AST *); + while ( child != NULL ) + { + for (w=child; w->right!=NULL; w=w->right) {;} /* find end of child */ + if ( sibling == NULL ) {sibling = child; tail = w;} + else {tail->right = child; tail = w;} + child = va_arg(ap, AST *); + } + if ( root==NULL ) root = sibling; + else root->down = sibling; + va_end(ap); + return root; +} + +/* tree duplicate */ +AST * +zzdup_ast(AST *t) +{ + AST *u; + + if ( t == NULL ) return NULL; + u = zzastnew(); + *u = *t; +#ifdef zzAST_DOUBLE + u->up = NULL; /* set by calling invocation */ + u->left = NULL; +#endif + u->right = zzdup_ast(t->right); + u->down = zzdup_ast(t->down); +#ifdef zzAST_DOUBLE + if ( u->right!=NULL ) u->right->left = u; + if ( u->down!=NULL ) u->down->up = u; +#endif + return u; +} + +void +zztfree(AST *t) +{ +#ifdef zzd_ast + zzd_ast( t ); +#endif + free( t ); +} + +#ifdef zzAST_DOUBLE +/* + * Set the 'up', and 'left' pointers of all nodes in 't'. + * Initial call is double_link(your_tree, NULL, NULL). + */ +void +zzdouble_link(AST *t, AST *left, AST *up) +{ + if ( t==NULL ) return; + t->left = left; + t->up = up; + zzdouble_link(t->down, NULL, t); + zzdouble_link(t->right, t, up); +} +#endif diff --git a/src/translators/btparse/ast.h b/src/translators/btparse/ast.h new file mode 100644 index 0000000..59622ec --- /dev/null +++ b/src/translators/btparse/ast.h @@ -0,0 +1,99 @@ +/* Abstract syntax tree + * + * Macros, definitions + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#ifndef ZZAST_H +#define ZZAST_H + +#define zzastOvfChk \ + if ( zzast_sp <= 0 ) \ + { \ + fprintf(stderr, zzStackOvfMsg, __FILE__, __LINE__); \ + exit(PCCTS_EXIT_FAILURE); \ + } + +#ifndef USER_DEFINED_AST +#ifndef AST_FIELDS +#define AST_FIELDS +#endif + +typedef struct _ast { + struct _ast *right, *down; +#ifdef zzAST_DOUBLE + struct _ast *left, *up; +#endif + AST_FIELDS +} AST; + +#else + +#ifdef zzAST_DOUBLE +#define AST_REQUIRED_FIELDS struct _ast *right, *down, *left, *up; +#else +#define AST_REQUIRED_FIELDS struct _ast *right, *down; +#endif + +#endif + + +/* N o d e a c c e s s m a c r o s */ +#define zzchild(t) (((t)==NULL)?NULL:(t->down)) +#define zzsibling(t) (((t)==NULL)?NULL:(t->right)) + + +/* define global variables needed by #i stack */ +#define zzASTgvars \ + AST *zzastStack[ZZAST_STACKSIZE]; \ + int zzast_sp = ZZAST_STACKSIZE; + +#define zzASTVars AST *_ast = NULL, *_sibling = NULL, *_tail = NULL +#define zzSTR ( (_tail==NULL)?(&_sibling):(&(_tail->right)) ) +#define zzastCur (zzastStack[zzast_sp]) +#define zzastArg(i) (zzastStack[zztsp-i]) +#define zzastPush(p) zzastOvfChk; zzastStack[--zzast_sp] = p; +#define zzastDPush --zzast_sp +#define zzastMARK zztsp=zzast_sp; /* Save state of stack */ +#define zzastREL zzast_sp=zztsp; /* Return state of stack */ +#define zzrm_ast {zzfree_ast(*_root); _tail = _sibling = (*_root)=NULL;} + +extern int zzast_sp; +extern AST *zzastStack[]; + +void zzlink(AST **, AST **, AST **); +void zzsubchild(AST **, AST **, AST **); +void zzsubroot(AST **, AST **, AST **); +void zzpre_ast(AST *, void (*)(), void (*)(), void (*)()); +void zzfree_ast(AST *); +AST *zztmake(AST *, ...); +AST *zzdup_ast(AST *); +void zztfree(AST *); +void zzdouble_link(AST *, AST *, AST *); +AST *zzastnew(void); +#endif diff --git a/src/translators/btparse/attrib.h b/src/translators/btparse/attrib.h new file mode 100644 index 0000000..6a3cecf --- /dev/null +++ b/src/translators/btparse/attrib.h @@ -0,0 +1,35 @@ +/* ------------------------------------------------------------------------ +@NAME : attrib.h +@DESCRIPTION: Definition of the Attrib type needed by the PCCTS- + generated parser. +@CREATED : Summer 1996, Greg Ward +@MODIFIED : +@VERSION : $Id: attrib.h,v 1.3 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +#ifndef ATTRIB_H +#define ATTRIB_H + +/* + * Defining Attrib this way (as opposed to making it a pointer to a struct) + * avoid the expense of allocating/deallocating a structure for each token; + * this way, PCCTS statically allocates the whole stack once and that's + * it. (Of course, the stack is four times bigger than it would have been + * otherwise.) + */ + +typedef struct { + int line; + int offset; + int token; + char *text; +} Attrib; + +#endif /* ATTRIB_H */ diff --git a/src/translators/btparse/bibtex.c b/src/translators/btparse/bibtex.c new file mode 100644 index 0000000..c922803 --- /dev/null +++ b/src/translators/btparse/bibtex.c @@ -0,0 +1,312 @@ +/* + * A n t l r T r a n s l a t i o n H e a d e r + * + * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994 + * Purdue University Electrical Engineering + * With AHPCRC, University of Minnesota + * ANTLR Version 1.33 + */ +#include <stdio.h> +#define ANTLR_VERSION 133 + +#define ZZCOL +#define USER_ZZSYN + +#include "btconfig.h" +#include "btparse.h" +#include "attrib.h" +#include "lex_auxiliary.h" +#include "error.h" +#include "parse_auxiliary.h" +/*#include "my_dmalloc.h"*/ + +extern char * InputFilename; /* for zzcr_ast call in pccts/ast.c */ +#define GENAST + +#include "ast.h" + +#define zzSET_SIZE 4 +#include "antlr.h" +#include "tokens.h" +#include "dlgdef.h" +#include "mode.h" +#ifndef PURIFY +#define PURIFY(r,s) +#endif +#include "ast.c" +zzASTgvars + +ANTLR_INFO + +void +bibfile(AST**_root) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + AST *last; (*_root) = NULL; + { + zzBLOCK(zztasp2); + zzMake0; + { + while ( (LA(1)==AT) ) { + _ast = NULL; entry(&_ast); + /* a little creative forestry... */ + if ((*_root) == NULL) + (*_root) = zzastArg(1); + else + last->right = zzastArg(1); + last = zzastArg(1); + zzLOOP(zztasp2); + } + zzEXIT(zztasp2); + } + } + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd1, 0x1); + } +} + +void +entry(AST**_root) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + bt_metatype metatype; + zzmatch(AT); zzCONSUME; + zzmatch(NAME); zzsubroot(_root, &_sibling, &_tail); + + metatype = entry_metatype(); + zzastArg(1)->nodetype = BTAST_ENTRY; + zzastArg(1)->metatype = metatype; + zzCONSUME; + + body(zzSTR, metatype ); zzlink(_root, &_sibling, &_tail); + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd1, 0x2); + } +} + +void +body(AST**_root, bt_metatype metatype ) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + if ( (LA(1)==STRING) ) { + if (!(metatype == BTE_COMMENT )) {zzfailed_pred(" metatype == BTE_COMMENT ");} + zzmatch(STRING); zzsubchild(_root, &_sibling, &_tail); + zzastArg(1)->nodetype = BTAST_STRING; + zzCONSUME; + + } + else { + if ( (LA(1)==ENTRY_OPEN) ) { + zzmatch(ENTRY_OPEN); zzCONSUME; + contents(zzSTR, metatype ); zzlink(_root, &_sibling, &_tail); + zzmatch(ENTRY_CLOSE); zzCONSUME; + } + else {zzFAIL(1,zzerr1,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;} + } + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd1, 0x4); + } +} + +void +contents(AST**_root, bt_metatype metatype ) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + if ( (setwd1[LA(1)]&0x8)&&(metatype == BTE_REGULAR /* || metatype == BTE_MODIFY */ ) ) { + if (!(metatype == BTE_REGULAR /* || metatype == BTE_MODIFY */ )) {zzfailed_pred(" metatype == BTE_REGULAR /* || metatype == BTE_MODIFY */ ");} + { + zzBLOCK(zztasp2); + zzMake0; + { + if ( (LA(1)==NAME) ) { + zzmatch(NAME); zzsubchild(_root, &_sibling, &_tail); zzCONSUME; + } + else { + if ( (LA(1)==NUMBER) ) { + zzmatch(NUMBER); zzsubchild(_root, &_sibling, &_tail); zzCONSUME; + } + else {zzFAIL(1,zzerr2,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;} + } + zzEXIT(zztasp2); + } + } + zzastArg(1)->nodetype = BTAST_KEY; + zzmatch(COMMA); zzCONSUME; + fields(zzSTR); zzlink(_root, &_sibling, &_tail); + } + else { + if ( (setwd1[LA(1)]&0x10)&&(metatype == BTE_MACRODEF ) ) { + if (!(metatype == BTE_MACRODEF )) {zzfailed_pred(" metatype == BTE_MACRODEF ");} + fields(zzSTR); zzlink(_root, &_sibling, &_tail); + } + else { + if ( (setwd1[LA(1)]&0x20)&&(metatype == BTE_PREAMBLE ) ) { + if (!(metatype == BTE_PREAMBLE )) {zzfailed_pred(" metatype == BTE_PREAMBLE ");} + value(zzSTR); zzlink(_root, &_sibling, &_tail); + } + else {zzFAIL(1,zzerr3,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;} + } + } + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd1, 0x40); + } +} + +void +fields(AST**_root) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + if ( (LA(1)==NAME) ) { + field(zzSTR); zzlink(_root, &_sibling, &_tail); + { + zzBLOCK(zztasp2); + zzMake0; + { + if ( (LA(1)==COMMA) ) { + zzmatch(COMMA); zzCONSUME; + fields(zzSTR); zzlink(_root, &_sibling, &_tail); + } + zzEXIT(zztasp2); + } + } + } + else { + if ( (LA(1)==ENTRY_CLOSE) ) { + } + else {zzFAIL(1,zzerr4,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;} + } + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd1, 0x80); + } +} + +void +field(AST**_root) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + zzmatch(NAME); zzsubroot(_root, &_sibling, &_tail); + zzastArg(1)->nodetype = BTAST_FIELD; check_field_name (zzastArg(1)); + zzCONSUME; + + zzmatch(EQUALS); zzCONSUME; + value(zzSTR); zzlink(_root, &_sibling, &_tail); + +#if DEBUG > 1 + printf ("field: fieldname = %p (%s)\n" + " first val = %p (%s)\n", + zzastArg(1)->text, zzastArg(1)->text, zzastArg(2)->text, zzastArg(2)->text); +#endif + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd2, 0x1); + } +} + +void +value(AST**_root) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + simple_value(zzSTR); zzlink(_root, &_sibling, &_tail); + { + zzBLOCK(zztasp2); + zzMake0; + { + while ( (LA(1)==HASH) ) { + zzmatch(HASH); zzCONSUME; + simple_value(zzSTR); zzlink(_root, &_sibling, &_tail); + zzLOOP(zztasp2); + } + zzEXIT(zztasp2); + } + } + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd2, 0x2); + } +} + +void +simple_value(AST**_root) +{ + zzRULE; + zzBLOCK(zztasp1); + zzMake0; + { + if ( (LA(1)==STRING) ) { + zzmatch(STRING); zzsubchild(_root, &_sibling, &_tail); + zzastArg(1)->nodetype = BTAST_STRING; + zzCONSUME; + + } + else { + if ( (LA(1)==NUMBER) ) { + zzmatch(NUMBER); zzsubchild(_root, &_sibling, &_tail); + zzastArg(1)->nodetype = BTAST_NUMBER; + zzCONSUME; + + } + else { + if ( (LA(1)==NAME) ) { + zzmatch(NAME); zzsubchild(_root, &_sibling, &_tail); + zzastArg(1)->nodetype = BTAST_MACRO; + zzCONSUME; + + } + else {zzFAIL(1,zzerr5,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;} + } + } + zzEXIT(zztasp1); + return; +fail: + zzEXIT(zztasp1); + zzsyn(zzMissText, zzBadTok, (ANTLRChar *)"", zzMissSet, zzMissTok, zzErrk, zzBadText); + zzresynch(setwd2, 0x4); + } +} diff --git a/src/translators/btparse/bibtex_ast.c b/src/translators/btparse/bibtex_ast.c new file mode 100644 index 0000000..354cefb --- /dev/null +++ b/src/translators/btparse/bibtex_ast.c @@ -0,0 +1,63 @@ +/* ------------------------------------------------------------------------ +@NAME : bibtex_ast.c +@DESCRIPTION: Data and functions for internal display/manipulation of AST + nodes. (Stuff for external consumption, and for processing + whole trees, is to be found in traversal.c.) +@GLOBALS : +@CREATED : 1997/08/12, Greg Ward +@MODIFIED : +@VERSION : $Id: bibtex_ast.c,v 1.6 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include "btparse.h" +#include "prototypes.h" +/*#include "my_dmalloc.h"*/ + + +const char *nodetype_names[] = +{ + "bogus", "entry", "key", "field", "string", "number", "macro" +}; + + +static void dump (AST *root, int depth) +{ + AST *cur; + + if (root == NULL) + { + printf ("[empty]\n"); + return; + } + + cur = root; + while (cur != NULL) + { + printf ("%*s[%s]: ", 2*depth, "", nodetype_names[cur->nodetype]); + if (cur->text != NULL) + printf ("(%s)\n", cur->text); + else + printf ("(null)\n"); + + if (cur->down != NULL) + dump (cur->down, depth+1); + cur = cur->right; + } +} + + +void dump_ast (char *msg, AST *root) +{ + if (msg != NULL) + printf (msg); + dump (root, 0); + printf ("\n"); +} diff --git a/src/translators/btparse/bt_debug.h b/src/translators/btparse/bt_debug.h new file mode 100644 index 0000000..913ae1a --- /dev/null +++ b/src/translators/btparse/bt_debug.h @@ -0,0 +1,38 @@ +/* ------------------------------------------------------------------------ +@NAME : bt_debug.h +@DESCRIPTION: Defines various macros needed for compile-time selection + of debugging code. +@GLOBALS : +@CREATED : +@MODIFIED : +@VERSION : $Id: bt_debug.h,v 1.2 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +#ifndef BT_DEBUG_H +#define BT_DEBUG_H + +/* + * DEBUG is the debug level -- an integer, defaults to 0 + * DBG_ACTION is a macro to conditionally execute a bit of code -- + * must have compiled with DEBUG true, and the debug level + * must be >= `level' (the macro argument) + */ + +#ifndef DEBUG +# define DEBUG 0 +#endif + +#if DEBUG +# define DBG_ACTION(level,action) if (DEBUG >= level) { action; } +#else +# define DBG_ACTION(level,action) +#endif + +#endif /* BT_DEBUG_H */ diff --git a/src/translators/btparse/btconfig.h b/src/translators/btparse/btconfig.h new file mode 100644 index 0000000..7405825 --- /dev/null +++ b/src/translators/btparse/btconfig.h @@ -0,0 +1,220 @@ +#ifndef BTPARSE_CONFIG_H +#define BTPARSE_CONFIG_H +/* + * config.h (for ANTLR, DLG, and SORCERER) + * + * This is a simple configuration file that doesn't have config stuff + * in it, but it's a start. + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * Used by PCCTS 1.33 (SORCERER 1.00B11 and up) + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +/* This file knows about the following ``environments'' + UNIX (default) + DOS (use #define PC) + MAC (use #define MPW; has a few things for THINK C, Metrowerks) + */ + +/* +* Define PC32 if in a 32-bit PC environment (e.g. extended DOS or Win32). +* The macros tested here are defined by Watcom, Microsoft, Borland, +* and djgpp, respectively, when they are used as 32-bit compilers. +* Users of these compilers *must* be sure to define PC in their +* makefiles for this to work correctly. +*/ +#ifdef PC +# if (defined(__WATCOM__) || defined(_WIN32) || defined(__WIN32__) || \ + defined(__GNUC__) || defined(__GNUG__)) +# ifndef PC32 +# define PC32 +# endif +# endif +#endif + +#ifdef PC +#define ATOKEN_H "AToken.h" +#define ATOKPTR_H "ATokPtr.h" +#define ATOKPTR_C "ATokPtr.cpp" +#define ATOKENBUFFER_H "ATokBuf.h" +#define ATOKENBUFFER_C "ATokBuf.cpp" +#define ATOKENSTREAM_H "ATokStr.h" +#define APARSER_H "AParser.h" +#define APARSER_C "AParser.cpp" +#define ASTBASE_H "ASTBase.h" +#define ASTBASE_C "ASTBase.cpp" +#define PCCTSAST_C "PCCTSAST.cpp" +#define LIST_C "List.cpp" +#define DLEXERBASE_H "DLexBase.h" +#define DLEXERBASE_C "DLexBase.cpp" +#define DLEXER_C "DLexer.cpp" +#define STREESUPPORT_C "STreeSup.C" +#else +#define ATOKEN_H "AToken.h" +#define ATOKPTR_H "ATokPtr.h" +#define ATOKPTR_C "ATokPtr.cpp" +#define ATOKENBUFFER_H "ATokenBuffer.h" +#define ATOKENBUFFER_C "ATokenBuffer.cpp" +#define ATOKENSTREAM_H "ATokenStream.h" +#define APARSER_H "AParser.h" +#define APARSER_C "AParser.cpp" +#define ASTBASE_H "ASTBase.h" +#define ASTBASE_C "ASTBase.cpp" +#define PCCTSAST_C "PCCTSAST.cpp" +#define LIST_C "List.cpp" +#define DLEXERBASE_H "DLexerBase.h" +#define DLEXERBASE_C "DLexerBase.cpp" +#define DLEXER_C "DLexer.cpp" +#define STREESUPPORT_C "STreeSupport.cpp" +#endif + +/* SORCERER Stuff */ +#ifdef PC +#define STPARSER_H "STreePar.h" +#define STPARSER_C "STreePar.C" +#else +#define STPARSER_H "STreeParser.h" +#define STPARSER_C "STreeParser.cpp" +#endif + +#ifdef MPW +#define CPP_FILE_SUFFIX ".cp" +#define CPP_FILE_SUFFIX_NO_DOT "cp" +#define OBJ_FILE_SUFFIX ".o" +#else +#ifdef PC +#define CPP_FILE_SUFFIX ".cpp" +#define CPP_FILE_SUFFIX_NO_DOT "cpp" +#define OBJ_FILE_SUFFIX ".obj" +#else +#define CPP_FILE_SUFFIX ".cpp" +#define CPP_FILE_SUFFIX_NO_DOT "cpp" +#define OBJ_FILE_SUFFIX ".o" +#endif +#endif + +/* User may redefine how line information looks */ +#define LineInfoFormatStr "# %d \"%s\"\n" + +#ifdef MPW /* Macintosh Programmer's Workshop */ +#define ErrHdr "File \"%s\"; Line %d #" +#else +#define ErrHdr "%s, line %d:" +#endif + + +/* must assume old K&R cpp here, can't use #if defined(..)... */ + +#ifdef MPW +#define TopDirectory ":" +#define DirectorySymbol ":" +#define OutputDirectoryOption "Directory where all output files should go (default=\":\")" +#else +#ifdef PC +#define TopDirectory "." +#define DirectorySymbol "\\" +#define OutputDirectoryOption "Directory where all output files should go (default=\".\")" +#else +#define TopDirectory "." +#define DirectorySymbol "/" +#define OutputDirectoryOption "Directory where all output files should go (default=\".\")" +#endif +#endif + +#ifdef MPW + +/* Make sure we have prototypes for all functions under MPW */ + +#include <string.h> +#include <stdlib.h> +#include <CursorCtl.h> +#ifdef __cplusplus +extern "C" { +#endif +extern void fsetfileinfo (char *filename, unsigned long newcreator, unsigned long newtype); +#ifdef __cplusplus +} +#endif + +/* File creators for various popular development environments */ + +#define MAC_FILE_CREATOR 'MPS ' /* MPW Text files */ +#if 0 +#define MAC_FILE_CREATOR 'KAHL' /* THINK C/Symantec C++ Text files */ +#endif +#if 0 +#define MAC_FILE_CREATOR 'MMCC' /* Metrowerks C/C++ Text files */ +#endif + +#endif + +#ifdef MPW +#define DAWDLE SpinCursor(1) +#else +#define DAWDLE +#endif + + +/* + * useless definitions of special_inits() and special_fopen_actions() + * deleted -- GPW 1997/09/06 + */ + +/* Define usable bits for set.c stuff */ +#define BytesPerWord sizeof(unsigned) +#define WORDSIZE (sizeof(unsigned)*8) +#define LogWordSize (WORDSIZE==16?4:5) + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +#ifdef VAXC +#define PCCTS_EXIT_SUCCESS 1 +#define PCCTS_EXIT_FAILURE 0 +#define zzDIE return 0; +#define zzDONE return 1; + +#else /* !VAXC */ + +#define PCCTS_EXIT_SUCCESS 0 +#define PCCTS_EXIT_FAILURE 1 +#define zzDIE return 1; +#define zzDONE return 0; + +#endif + +#ifdef USER_ZZMODE_STACK +# ifndef ZZSTACK_MAX_MODE +# define ZZSTACK_MAX_MODE 32 +# endif +# define ZZMAXSTK (ZZSTACK_MAX_MODE * 2) +#endif + +#endif diff --git a/src/translators/btparse/btparse.h b/src/translators/btparse/btparse.h new file mode 100644 index 0000000..841d3ee --- /dev/null +++ b/src/translators/btparse/btparse.h @@ -0,0 +1,378 @@ +/* ------------------------------------------------------------------------ +@NAME : btparse.h +@DESCRIPTION: Declarations and types for users of the btparse library. + + (Actually, btparse.h is generated from btparse.h.in by + the `configure' script, in order to automatically determine + the appropriate values of HAVE_USHORT and HAVE_BOOLEAN.) +@GLOBALS : +@CALLS : +@CREATED : 1997/01/19, Greg Ward +@MODIFIED : +@VERSION : $Id: btparse.h.in,v 1.35 1999/12/28 18:23:17 greg Exp $ +@COPYRIGHT : Copyright (c) 1996-97 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ +#ifndef BTPARSE_H +#define BTPARSE_H + +#include <sys/types.h> /* probably supplies 'ushort' */ +#include <stdio.h> + +#include "config.h" /* not btparse's config.h but Tellico's */ + +/* + * Here we attempt to define HAVE_USHORT if a typdef for `ushort' appears + * in <sys/types.h>. The detective work is actually done by the + * `configure' script, so if compilation fails because of duplicate + * definitions of `ushort', that's a bug in `configure' -- please tell me + * about it! + */ + +#ifndef HAVE_USHORT +# define HAVE_USHORT 0 +#endif + +#if ! HAVE_USHORT /* needed for various bitmaps */ +typedef unsigned short ushort; +#endif + + +/* Likewise for boolean. */ + +#ifndef HAVE_BOOLEAN +# define HAVE_BOOLEAN 0 +#endif + +#if ! HAVE_BOOLEAN +typedef int boolean; +#endif + +#ifndef TRUE +# define TRUE 1 +# define FALSE 0 +#endif + +#ifndef HAVE_STRLWR +# define HAVE_STRLWR 0 +#endif + +#ifndef HAVE_STRUPR +# define HAVE_STRUPR 0 +#endif + + +/* Parsing (and post-processing) options */ + +#define BTO_CONVERT 1 /* convert numbers to strings? */ +#define BTO_EXPAND 2 /* expand macros? */ +#define BTO_PASTE 4 /* paste substrings together? */ +#define BTO_COLLAPSE 8 /* collapse whitespace? */ + +#define BTO_NOSTORE 16 + +#define BTO_FULL (BTO_CONVERT | BTO_EXPAND | BTO_PASTE | BTO_COLLAPSE) +#define BTO_MACRO (BTO_CONVERT | BTO_EXPAND | BTO_PASTE) +#define BTO_MINIMAL 0 + +#define BTO_STRINGMASK (BTO_CONVERT | BTO_EXPAND | BTO_PASTE | BTO_COLLAPSE) + +#define BT_VALID_NAMEPARTS "fvlj" +#define BT_MAX_NAMEPARTS 4 + +typedef enum +{ + BTE_UNKNOWN, + BTE_REGULAR, + BTE_COMMENT, + BTE_PREAMBLE, + BTE_MACRODEF +/* + BTE_ALIAS, + BTE_MODIFY +*/ +} bt_metatype; + +#define NUM_METATYPES ((int) BTE_MACRODEF + 1) + +typedef enum +{ + BTAST_BOGUS, /* to detect uninitialized nodes */ + BTAST_ENTRY, + BTAST_KEY, + BTAST_FIELD, + BTAST_STRING, + BTAST_NUMBER, + BTAST_MACRO +} bt_nodetype; + +typedef enum +{ + BTN_FIRST, BTN_VON, BTN_LAST, BTN_JR, BTN_NONE +} bt_namepart; + +typedef enum +{ + BTJ_MAYTIE, /* "discretionary" tie between words */ + BTJ_SPACE, /* force a space between words */ + BTJ_FORCETIE, /* force a tie (~ in TeX) */ + BTJ_NOTHING /* nothing between words */ +} bt_joinmethod; + + +#define USER_DEFINED_AST 1 + +#define zzcr_ast(ast,attr,tok,txt) \ +{ \ + (ast)->filename = InputFilename; \ + (ast)->line = (attr)->line; \ + (ast)->offset = (attr)->offset; \ + (ast)->text = strdup ((attr)->text); \ +} + +#define zzd_ast(ast) \ +/* printf ("zzd_ast: free'ing ast node with string %p (%s)\n", \ + (ast)->text, (ast)->text); */ \ + if ((ast)->text != NULL) free ((ast)->text); + + +#ifdef USER_DEFINED_AST +typedef struct _ast +{ + struct _ast *right, *down; + char * filename; + int line; + int offset; + bt_nodetype nodetype; + bt_metatype metatype; + char * text; +} AST; +#endif /* USER_DEFINED_AST */ + + +typedef struct +{ + /* + * `string' is the string that has been split; items[0] ... + * items[num_items-1] are pointers into `string', or NULL for empty + * substrings. Note that `string' is actually a copy of the string + * passed in to bt_split_list() with NULs inserted between substrings. + */ + + char * string; + int num_items; + char ** items; +} bt_stringlist; + + +typedef struct +{ + bt_stringlist * tokens; /* flat list of all tokens in name */ + char ** parts[BT_MAX_NAMEPARTS]; /* each elt. is list of pointers */ + /* into `tokens->string' */ + int part_len[BT_MAX_NAMEPARTS]; /* length in tokens */ +} bt_name; + + +typedef struct tex_tree_s +{ + char * start; + int len; + struct tex_tree_s + * child, + * next; +} bt_tex_tree; + + +typedef struct +{ + /* These determine the order (and presence) of parts in the name. */ + int num_parts; + bt_namepart parts[BT_MAX_NAMEPARTS]; + + /* + * These lists are always in the order of the bt_namepart enum -- *not* + * dependent on the particular order of parts the user specified! (This + * will make it a bit harder if I ever allow more than one occurrence of + * a part in a format; since I don't allow that, I'm not [yet] worried + * about it!) + */ + const char * pre_part[BT_MAX_NAMEPARTS]; + char * post_part[BT_MAX_NAMEPARTS]; + char * pre_token[BT_MAX_NAMEPARTS]; + const char * post_token[BT_MAX_NAMEPARTS]; + boolean abbrev[BT_MAX_NAMEPARTS]; + bt_joinmethod join_tokens[BT_MAX_NAMEPARTS]; + bt_joinmethod join_part[BT_MAX_NAMEPARTS]; +} bt_name_format; + + +typedef enum +{ + BTERR_NOTIFY, /* notification about next action */ + BTERR_CONTENT, /* warning about the content of a record */ + BTERR_LEXWARN, /* warning in lexical analysis */ + BTERR_USAGEWARN, /* warning about library usage */ + BTERR_LEXERR, /* error in lexical analysis */ + BTERR_SYNTAX, /* error in parser */ + BTERR_USAGEERR, /* fatal error in library usage */ + BTERR_INTERNAL /* my fault */ +} bt_errclass; + +typedef enum +{ + BTACT_NONE, /* do nothing on error */ + BTACT_CRASH, /* call exit(1) */ + BTACT_ABORT /* call abort() */ +} bt_erraction; + +typedef struct +{ + bt_errclass errclass; + char * filename; + int line; + const char * item_desc; + int item; + char * message; +} bt_error; + +typedef void (*bt_err_handler) (bt_error *); + + +#if defined(__cplusplus__) || defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/* Function prototypes */ + +/* + * First, we might need a prototype for strdup() (because the zzcr_ast + * macro uses it, and that macro is used in pccts/ast.c -- which I don't + * want to modify if I can help it, because it's someone else's code). + * This is to accomodate AIX, where including <string.h> apparently doesn't + * declare strdup() (reported by Reiner Schlotte + * <schlotte@geo.palmod.uni-bremen.de>), and compiling bibtex.c (which + * includes pccts/ast.c) crashes because of this (yes, yes, I know it + * should just be a warning -- I don't know what's going on there!). + * + * Unfortunately, this duplicates code in bt_config.h -- I can't include + * bt_config.h here, because this header must be freestanding; I don't want + * to include bt_config.h in pccts/ast.c, because I don't want to touch the + * PCCTS code if I can help it; but I don't want every source file that + * uses strdup() to have to include btparse.h. Hence the duplication. + * Yuck. + */ +#ifndef HAVE_STRDUP_DECL +# define HAVE_STRDUP_DECL 0 +#endif +#if !HAVE_STRDUP_DECL +extern char *strdup (const char *s); +#endif + + +/* init.c */ +void bt_initialize (void); +void bt_free_ast (AST *ast); +void bt_cleanup (void); + +/* input.c */ +void bt_set_stringopts (bt_metatype metatype, ushort options); +AST * bt_parse_entry_s (char * entry_text, + char * filename, + int line, + ushort options, + boolean * status); +AST * bt_parse_entry (FILE * infile, + char * filename, + ushort options, + boolean * status); +AST * bt_parse_file (char * filename, + ushort options, + boolean * overall_status); + +/* postprocess.c */ +void bt_postprocess_string (char * s, ushort options); +char * bt_postprocess_value (AST * value, ushort options, boolean replace); +char * bt_postprocess_field (AST * field, ushort options, boolean replace); +void bt_postprocess_entry (AST * entry, ushort options); + +/* error.c */ +void bt_reset_error_counts (void); +int bt_get_error_count (bt_errclass errclass); +int * bt_get_error_counts (int *counts); +ushort bt_error_status (int *saved_counts); + +/* macros.c */ +void bt_add_macro_value (AST *assignment, ushort options); +void bt_add_macro_text (char * macro, char * text, char * filename, int line); +void bt_delete_macro (char * macro); +void bt_delete_all_macros (void); +int bt_macro_length (char *macro); +char * bt_macro_text (char * macro, char * filename, int line); + +/* traversal.c */ +AST *bt_next_entry (AST *entry_list, AST *prev_entry); +bt_metatype bt_entry_metatype (AST *entry); +char *bt_entry_type (AST *entry); +char *bt_entry_key (AST *entry); +AST *bt_next_field (AST *entry, AST *prev, char **name); +AST *bt_next_macro (AST *entry, AST *prev, char **name); +AST *bt_next_value (AST *head, + AST *prev, + bt_nodetype *nodetype, + char **text); +char *bt_get_text (AST *node); + +/* modify.c */ +void bt_set_text (AST * node, char * new_text); +void bt_entry_set_key (AST * entry, char * new_key); + +/* names.c */ +bt_stringlist * bt_split_list (char * string, + char * delim, + char * filename, + int line, + char * description); +void bt_free_list (bt_stringlist *list); +bt_name * bt_split_name (char * name, + char * filename, + int line, + int name_num); +void bt_free_name (bt_name * name); + +/* tex_tree.c */ +bt_tex_tree * bt_build_tex_tree (char * string); +void bt_free_tex_tree (bt_tex_tree **top); +void bt_dump_tex_tree (bt_tex_tree *node, int depth, FILE *stream); +char * bt_flatten_tex_tree (bt_tex_tree *top); + +/* string_util.c */ +void bt_purify_string (char * string, ushort options); +void bt_change_case (char transform, char * string, ushort options); + +/* format_name.c */ +bt_name_format * bt_create_name_format (char * parts, boolean abbrev_first); +void bt_free_name_format (bt_name_format * format); +void bt_set_format_text (bt_name_format * format, + bt_namepart part, + char * pre_part, + char * post_part, + char * pre_token, + char * post_token); +void bt_set_format_options (bt_name_format * format, + bt_namepart part, + boolean abbrev, + bt_joinmethod join_tokens, + bt_joinmethod join_part); +char * bt_format_name (bt_name * name, bt_name_format * format); + +#if defined(__cplusplus__) || defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* BTPARSE_H */ diff --git a/src/translators/btparse/dlgauto.h b/src/translators/btparse/dlgauto.h new file mode 100644 index 0000000..efcc3b2 --- /dev/null +++ b/src/translators/btparse/dlgauto.h @@ -0,0 +1,408 @@ +/* dlgauto.h automaton + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Will Cohen and Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#ifndef ZZDEFAUTO_H +#define ZZDEFAUTO_H + +zzchar_t *zzlextext; /* text of most recently matched token */ +zzchar_t *zzbegexpr; /* beginning of last reg expr recogn. */ +zzchar_t *zzendexpr; /* beginning of last reg expr recogn. */ +int zzbufsize; /* number of characters in zzlextext */ +int zzbegcol = 0; /* column that first character of token is in*/ +int zzendcol = 0; /* column that last character of token is in */ +int zzline = 1; /* line current token is on */ +int zzreal_line=1; /* line of 1st portion of token that is not skipped */ +int zzchar; /* character to determine next state */ +int zzbufovf; /* indicates that buffer too small for text */ +int zzcharfull = 0; +static zzchar_t *zznextpos;/* points to next available position in zzlextext*/ +static int zzclass; + +void zzerrstd(const char *); +void (*zzerr)(const char *)=zzerrstd;/* pointer to error reporting function */ +extern int zzerr_in(void); + +static FILE *zzstream_in=0; +static int (*zzfunc_in)() = zzerr_in; +static zzchar_t *zzstr_in=0; + +#ifdef USER_ZZMODE_STACK +int zzauto = 0; +#else +static int zzauto = 0; +#endif +static int zzadd_erase; +static char zzebuf[70]; + +#ifdef ZZCOL +#define ZZINC (++zzendcol) +#else +#define ZZINC +#endif + + +#define ZZGETC_STREAM {zzchar = getc(zzstream_in); zzclass = ZZSHIFT(zzchar);} +#define ZZGETC_FUNC {zzchar = (*zzfunc_in)(); zzclass = ZZSHIFT(zzchar);} +#define ZZGETC_STR { \ + if (*zzstr_in){ \ + zzchar = *zzstr_in; \ + ++zzstr_in; \ + }else{ \ + zzchar = EOF; \ + } \ + zzclass = ZZSHIFT(zzchar); \ +} + +#define ZZNEWSTATE (newstate = dfa[state][zzclass]) + +#ifndef ZZCOPY +#define ZZCOPY \ + /* Truncate matching buffer to size (not an error) */ \ + if (zznextpos < lastpos){ \ + *(zznextpos++) = zzchar; \ + }else{ \ + zzbufovf = 1; \ + } +#endif + +void +zzrdstream( FILE *f ) +{ + /* make sure that it is really set to something, otherwise just + leave it be. + */ + if (f){ + /* make sure that there is always someplace to get input + before closing zzstream_in + */ + zzline = 1; + zzstream_in = f; + zzfunc_in = NULL; + zzstr_in = 0; + zzcharfull = 0; + } +} + +void +zzrdfunc( int (*f)() ) +{ + /* make sure that it is really set to something, otherwise just + leave it be. + */ + if (f){ + /* make sure that there is always someplace to get input + before closing zzstream_in + */ + zzline = 1; + zzstream_in = NULL; + zzfunc_in = f; + zzstr_in = 0; + zzcharfull = 0; + } +} + + +void +zzrdstr( zzchar_t *s ) +{ + /* make sure that it is really set to something, otherwise just + leave it be. + */ + if (s){ + /* make sure that there is always someplace to get input + before closing zzstream_in + */ + zzline = 1; + zzstream_in = NULL; + zzfunc_in = 0; + zzstr_in = s; + zzcharfull = 0; + } +} + + +void +zzclose_stream() +{ +} + +/* saves dlg state, but not what feeds dlg (such as file position) */ +void +zzsave_dlg_state(struct zzdlg_state *state) +{ + state->stream = zzstream_in; + state->func_ptr = zzfunc_in; + state->str = zzstr_in; + state->auto_num = zzauto; + state->add_erase = zzadd_erase; + state->lookc = zzchar; + state->char_full = zzcharfull; + state->begcol = zzbegcol; + state->endcol = zzendcol; + state->line = zzline; + state->lextext = zzlextext; + state->begexpr = zzbegexpr; + state->endexpr = zzendexpr; + state->bufsize = zzbufsize; + state->bufovf = zzbufovf; + state->nextpos = zznextpos; + state->class_num = zzclass; +} + +void +zzrestore_dlg_state(struct zzdlg_state *state) +{ + zzstream_in = state->stream; + zzfunc_in = state->func_ptr; + zzstr_in = state->str; + zzauto = state->auto_num; + zzadd_erase = state->add_erase; + zzchar = state->lookc; + zzcharfull = state->char_full; + zzbegcol = state->begcol; + zzendcol = state->endcol; + zzline = state->line; + zzlextext = state->lextext; + zzbegexpr = state->begexpr; + zzendexpr = state->endexpr; + zzbufsize = state->bufsize; + zzbufovf = state->bufovf; + zznextpos = state->nextpos; + zzclass = state->class_num; +} + +void +zzmode( int m ) +{ + /* points to base of dfa table */ + if (m<MAX_MODE){ + zzauto = m; + /* have to redo class since using different compression */ + zzclass = ZZSHIFT(zzchar); + }else{ + sprintf(zzebuf,"Invalid automaton mode = %d ",m); + zzerr(zzebuf); + } +} + +/* erase what is currently in the buffer, and get a new reg. expr */ +void +zzskip() +{ + zzadd_erase = 1; +} + +/* don't erase what is in the zzlextext buffer, add on to it */ +void +zzmore() +{ + zzadd_erase = 2; +} + +/* substitute c for the reg. expr last matched and is in the buffer */ +void +zzreplchar(zzchar_t c) +{ + /* can't allow overwriting null at end of string */ + if (zzbegexpr < &zzlextext[zzbufsize-1]){ + *zzbegexpr = c; + *(zzbegexpr+1) = '\0'; + } + zzendexpr = zzbegexpr; + zznextpos = zzbegexpr + 1; +} + +/* replace the string s for the reg. expr last matched and in the buffer */ +void +zzreplstr(register zzchar_t *s) +{ + register zzchar_t *l= &zzlextext[zzbufsize -1]; + + zznextpos = zzbegexpr; + if (s){ + while ((zznextpos <= l) && (*(zznextpos++) = *(s++))!=0){ + /* empty */ + } + /* correct for NULL at end of string */ + zznextpos--; + } + if ((zznextpos <= l) && (*(--s) == 0)){ + zzbufovf = 0; + }else{ + zzbufovf = 1; + } + *(zznextpos) = '\0'; + zzendexpr = zznextpos - 1; +} + +void +zzgettok() +{ + register int state, newstate; + /* last space reserved for the null char */ + zzchar_t *lastpos; /* GPW 1997/09/05 (removed 'register' */ + +skip: + zzreal_line = zzline; + zzbufovf = 0; + lastpos = &zzlextext[zzbufsize-1]; + zznextpos = zzlextext; + zzbegcol = zzendcol+1; +more: + zzbegexpr = zznextpos; +#ifdef ZZINTERACTIVE + /* interactive version of automaton */ + /* if there is something in zzchar, process it */ + state = newstate = dfa_base[zzauto]; + if (zzcharfull){ + ZZINC; + ZZCOPY; + ZZNEWSTATE; + } + if (zzstr_in) + while (zzalternatives[newstate]){ + state = newstate; + ZZGETC_STR; + ZZINC; + ZZCOPY; + ZZNEWSTATE; + } + else if (zzstream_in) + while (zzalternatives[newstate]){ + state = newstate; + ZZGETC_STREAM; + ZZINC; + ZZCOPY; + ZZNEWSTATE; + } + else if (zzfunc_in) + while (zzalternatives[newstate]){ + state = newstate; + ZZGETC_FUNC; + ZZINC; + ZZCOPY; + ZZNEWSTATE; + } + /* figure out if last character really part of token */ + if ((state != dfa_base[zzauto]) && (newstate == DfaStates)){ + zzcharfull = 1; + --zznextpos; + }else{ + zzcharfull = 0; + state = newstate; + } + *(zznextpos) = '\0'; + /* Able to transition out of start state to some non err state?*/ + if ( state == dfa_base[zzauto] ){ + /* make sure doesn't get stuck */ + zzadvance(); + } +#else + /* non-interactive version of automaton */ + if (!zzcharfull) + zzadvance(); + else + ZZINC; + state = dfa_base[zzauto]; + if (zzstr_in) + while (ZZNEWSTATE != DfaStates){ + state = newstate; + ZZCOPY; + ZZGETC_STR; + ZZINC; + } + else if (zzstream_in) + while (ZZNEWSTATE != DfaStates){ + state = newstate; + ZZCOPY; + ZZGETC_STREAM; + ZZINC; + } + else if (zzfunc_in) + while (ZZNEWSTATE != DfaStates){ + state = newstate; + ZZCOPY; + ZZGETC_FUNC; + ZZINC; + } + zzcharfull = 1; + if ( state == dfa_base[zzauto] ){ + if (zznextpos < lastpos){ + *(zznextpos++) = zzchar; + }else{ + zzbufovf = 1; + } + *zznextpos = '\0'; + /* make sure doesn't get stuck */ + zzadvance(); + }else{ + *zznextpos = '\0'; + } +#endif +#ifdef ZZCOL + zzendcol -= zzcharfull; +#endif + zzendexpr = zznextpos -1; + zzadd_erase = 0; + (*actions[accepts[state]])(); + switch (zzadd_erase) { + case 1: goto skip; + case 2: goto more; + } +} + +void +zzadvance() +{ + if (zzstream_in) { ZZGETC_STREAM; zzcharfull = 1; ZZINC;} + if (zzfunc_in) { ZZGETC_FUNC; zzcharfull = 1; ZZINC;} + if (zzstr_in) { ZZGETC_STR; zzcharfull = 1; ZZINC;} + if (!(zzstream_in || zzfunc_in || zzstr_in)){ + zzerr_in(); + } +} + +void +zzerrstd(const char *s) +{ + fprintf(stderr, + "%s near line %d (text was '%s')\n", + ((s == NULL) ? "Lexical error" : s), + zzline,zzlextext); +} + +int +zzerr_in() +{ + fprintf(stderr,"No input stream, function, or string\n"); + /* return eof to get out gracefully */ + return EOF; +} + +#endif diff --git a/src/translators/btparse/dlgdef.h b/src/translators/btparse/dlgdef.h new file mode 100644 index 0000000..ded2c31 --- /dev/null +++ b/src/translators/btparse/dlgdef.h @@ -0,0 +1,97 @@ +/* dlgdef.h + * Things in scanner produced by dlg that should be visible to the outside + * world + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#ifndef ZZDLGDEF_H +#define ZZDLGDEF_H + +#include "btconfig.h" + +#ifndef zzchar_t +#ifdef ZZWCHAR_T +#define zzchar_t unsigned wchar_t +#else +#define zzchar_t unsigned char +#endif +#endif + +struct zzdlg_state { + FILE *stream; + int (*func_ptr)(); + zzchar_t *str; + int auto_num; + int add_erase; + int lookc; + int char_full; + int begcol, endcol; + int line; + zzchar_t *lextext, *begexpr, *endexpr; + int bufsize; + int bufovf; + zzchar_t *nextpos; + int class_num; +}; + +extern zzchar_t *zzlextext; /* text of most recently matched token */ +extern zzchar_t *zzbegexpr; /* beginning of last reg expr recogn. */ +extern zzchar_t *zzendexpr; /* beginning of last reg expr recogn. */ +extern int zzbufsize; /* how long zzlextext is */ +extern int zzbegcol; /* column that first character of token is in*/ +extern int zzendcol; /* column that last character of token is in */ +extern int zzline; /* line current token is on */ +extern int zzreal_line; /* line of 1st portion of token that is not skipped */ +extern int zzchar; /* character to determine next state */ +extern int zzbufovf; /* indicates that buffer too small for text */ +extern void (*zzerr)(const char *);/* pointer to error reporting function */ + +#ifdef USER_ZZMODE_STACK +extern int zzauto; +#endif + +extern void zzadvance(void); +extern void zzskip(void); /* erase zzlextext, look for antoher token */ +extern void zzmore(void); /* keep zzlextext, look for another token */ +extern void zzmode(int k); /* switch to automaton 'k' */ +extern void zzrdstream(FILE *);/* what stream to read from */ +extern void zzclose_stream(void);/* close the current input stream */ +extern void zzrdfunc(int (*)());/* what function to get char from */ +extern void zzrdstr( zzchar_t * ); +extern void zzgettok(void); /* get next token */ +extern void zzreplchar(zzchar_t c);/* replace last recognized reg. expr. with + a character */ +extern void zzreplstr(zzchar_t *s);/* replace last recognized reg. expr. with + a string */ +extern void zzsave_dlg_state(struct zzdlg_state *); +extern void zzrestore_dlg_state(struct zzdlg_state *); +extern int zzerr_in(void); +extern void zzerrstd(const char *); +extern void zzerraction(); + +#endif diff --git a/src/translators/btparse/err.c b/src/translators/btparse/err.c new file mode 100644 index 0000000..f143048 --- /dev/null +++ b/src/translators/btparse/err.c @@ -0,0 +1,75 @@ +/* + * A n t l r S e t s / E r r o r F i l e H e a d e r + * + * Generated from: bibtex.g + * + * Terence Parr, Russell Quong, Will Cohen, and Hank Dietz: 1989-1995 + * Parr Research Corporation + * with Purdue University Electrical Engineering + * With AHPCRC, University of Minnesota + * ANTLR Version 1.33 + */ + +#include <stdio.h> +#define ANTLR_VERSION 133 + +#define ZZCOL +#define USER_ZZSYN + +#include "btconfig.h" +#include "btparse.h" +#include "attrib.h" +#include "lex_auxiliary.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ + +extern char * InputFilename; /* for zzcr_ast call in pccts/ast.c */ +#define zzSET_SIZE 4 +#include "antlr.h" +#include "ast.h" +#include "tokens.h" +#include "dlgdef.h" +#include "err.h" + +const ANTLRChar *zztokens[27]={ + /* 00 */ "Invalid", + /* 01 */ "@", + /* 02 */ "AT", + /* 03 */ "\\n", + /* 04 */ "COMMENT", + /* 05 */ "[\\ \\r\\t]+", + /* 06 */ "~[\\@\\n\\ \\r\\t]+", + /* 07 */ "\\n", + /* 08 */ "[\\ \\r\\t]+", + /* 09 */ "NUMBER", + /* 10 */ "NAME", + /* 11 */ "LBRACE", + /* 12 */ "RBRACE", + /* 13 */ "ENTRY_OPEN", + /* 14 */ "ENTRY_CLOSE", + /* 15 */ "EQUALS", + /* 16 */ "HASH", + /* 17 */ "COMMA", + /* 18 */ "\"", + /* 19 */ "\\n~[\\n\\{\\}\\(\\)\"\\]*", + /* 20 */ "[\\r\\t]", + /* 21 */ "\\{", + /* 22 */ "\\}", + /* 23 */ "\\(", + /* 24 */ "\\)", + /* 25 */ "STRING", + /* 26 */ "~[\\n\\{\\}\\(\\)\"]+" +}; +SetWordType zzerr1[4] = {0x0,0x20,0x0,0x2}; +SetWordType zzerr2[4] = {0x0,0x6,0x0,0x0}; +SetWordType zzerr3[4] = {0x0,0x46,0x0,0x2}; +SetWordType zzerr4[4] = {0x0,0x44,0x0,0x0}; +SetWordType setwd1[27] = {0x0,0x7,0x6,0x0,0x0,0x0,0x0, + 0x0,0x0,0x28,0x38,0x0,0x0,0x0,0xd0, + 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, + 0x0,0x0,0x20,0x0}; +SetWordType zzerr5[4] = {0x0,0x6,0x0,0x2}; +SetWordType setwd2[27] = {0x0,0x0,0x0,0x0,0x0,0x0,0x0, + 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7, + 0x0,0x4,0x7,0x0,0x0,0x0,0x0,0x0, + 0x0,0x0,0x0,0x0}; diff --git a/src/translators/btparse/err.h b/src/translators/btparse/err.h new file mode 100644 index 0000000..d16615d --- /dev/null +++ b/src/translators/btparse/err.h @@ -0,0 +1,700 @@ +/* + * err.h + * + * Standard error handling mechanism + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * Has grown to hold all kinds of stuff (err.h is increasingly misnamed) + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#ifndef ERR_H +#define ERR_H + +#include "btconfig.h" + +#include <string.h> +#include <stdarg.h> + +#ifdef DUM +/* Define usable bits per unsigned int word (used for set stuff) */ +#ifdef PC +#define BSETWORDSIZE 16 +#define BSETLOGWORDSIZE 4 +#else +#define BSETWORDSIZE 32 +#define BSETLOGWORDSIZE 5 +#endif +#endif + +#define BSETWORDSIZE 8 +#define BSETLOGWORDSIZE 3 /* SetWordType is 8bits */ + +#define BSETMODWORD(x) ((x) & (BSETWORDSIZE-1)) /* x % BSETWORDSIZE */ +#define BSETDIVWORD(x) ((x) >> BSETLOGWORDSIZE) /* x / BSETWORDSIZE */ + +/* This is not put into the global pccts_parser structure because it is + * hidden and does not need to be saved during a "save state" operation + */ +/* maximum of 32 bits/unsigned int and must be 8 bits/byte */ +static SetWordType bitmask[] = { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, + 0x00000010, 0x00000020, 0x00000040, 0x00000080 +}; + +void +zzresynch(SetWordType *wd,SetWordType mask) +{ + static int consumed = 1; + + /* if you enter here without having consumed a token from last resynch + * force a token consumption. + */ + if ( !consumed ) {zzCONSUME; return;} + + /* if current token is in resynch set, we've got what we wanted */ + if ( wd[LA(1)]&mask || LA(1) == zzEOF_TOKEN ) {consumed=0; return;} + + /* scan until we find something in the resynch set */ + while ( !(wd[LA(1)]&mask) && LA(1) != zzEOF_TOKEN ) {zzCONSUME;} + consumed=1; +} + +void +zzconsumeUntil(SetWordType *st) +{ + while ( !zzset_el(LA(1), st) ) { zzCONSUME; } +} + +void +zzconsumeUntilToken(int t) +{ + while ( LA(1)!=t ) { zzCONSUME; } +} + +/* input looks like: + * zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText) + * where the zzMiss stuff is set here to the token that did not match + * (and which set wasn't it a member of). + */ +void +zzFAIL(int k, ...) +{ +#ifdef LL_K + static char text[LL_K*ZZLEXBUFSIZE+1]; + SetWordType *f[LL_K]; +#else + static char text[ZZLEXBUFSIZE+1]; + SetWordType *f[1]; +#endif + SetWordType **miss_set; + char **miss_text; + int *bad_tok; + char **bad_text; + int *err_k; + int i; + va_list ap; +/* Removed because it shadows a parameter. gcc 3.4 complains. + I think removing it preserves the behavior of gcc 3.3 and previous. + int k; +*/ + va_start(ap, k); + text[0] = '\0'; + for (i=1; i<=k; i++) /* collect all lookahead sets */ + { + f[i-1] = va_arg(ap, SetWordType *); + } + for (i=1; i<=k; i++) /* look for offending token */ + { + if ( i>1 ) strcat(text, " "); + strcat(text, LATEXT(i)); + if ( !zzset_el((unsigned)LA(i), f[i-1]) ) break; + } + miss_set = va_arg(ap, SetWordType **); + miss_text = va_arg(ap, char **); + bad_tok = va_arg(ap, int *); + bad_text = va_arg(ap, char **); + err_k = va_arg(ap, int *); + if ( i>k ) + { + /* bad; lookahead is permutation that cannot be matched, + * but, the ith token of lookahead is valid at the ith position + * (The old LL sub 1 (k) versus LL(k) parsing technique) + */ + *miss_set = NULL; + *miss_text = zzlextext; + *bad_tok = LA(1); + *bad_text = LATEXT(1); + *err_k = k; + return; + } +/* fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/ + *miss_set = f[i-1]; + *miss_text = text; + *bad_tok = LA(i); + *bad_text = LATEXT(i); + if ( i==1 ) *err_k = 1; + else *err_k = k; +} + +void +zzsave_antlr_state(zzantlr_state *buf) +{ +#ifdef LL_K + int i; +#endif + +#ifdef ZZCAN_GUESS + buf->guess_start = zzguess_start; + buf->guessing = zzguessing; +#endif + buf->asp = zzasp; +#ifdef GENAST + buf->ast_sp = zzast_sp; +#endif +#ifdef ZZINF_LOOK + buf->inf_labase = zzinf_labase; + buf->inf_last = zzinf_last; +#endif +#ifdef DEMAND_LOOK + buf->dirty = zzdirty; +#endif +#ifdef LL_K + for (i=0; i<LL_K; i++) buf->tokenLA[i] = zztokenLA[i]; + for (i=0; i<LL_K; i++) strcpy(buf->textLA[i], zztextLA[i]); + buf->lap = zzlap; + buf->labase = zzlabase; +#else + buf->token = zztoken; + strcpy(buf->text, zzlextext); +#endif +} + +void +zzrestore_antlr_state(zzantlr_state *buf) +{ +#ifdef LL_K + int i; +#endif + +#ifdef ZZCAN_GUESS + zzguess_start = buf->guess_start; + zzguessing = buf->guessing; +#endif + zzasp = buf->asp; +#ifdef GENAST + zzast_sp = buf->ast_sp; +#endif +#ifdef ZZINF_LOOK + zzinf_labase = buf->inf_labase; + zzinf_last = buf->inf_last; +#endif +#ifdef DEMAND_LOOK + zzdirty = buf->dirty; +#endif +#ifdef LL_K + for (i=0; i<LL_K; i++) zztokenLA[i] = buf->tokenLA[i]; + for (i=0; i<LL_K; i++) strcpy(zztextLA[i], buf->textLA[i]); + zzlap = buf->lap; + zzlabase = buf->labase; +#else + zztoken = buf->token; + strcpy(zzlextext, buf->text); +#endif +} + +void +zzedecode(SetWordType *a) +{ + register SetWordType *p = a; + register SetWordType *endp = &(p[zzSET_SIZE]); + register unsigned e = 0; + + if ( zzset_deg(a)>1 ) fprintf(stderr, " {"); + do { + register SetWordType t = *p; + register SetWordType *b = &(bitmask[0]); + do { + if ( t & *b ) fprintf(stderr, " %s", zztokens[e]); + e++; + } while (++b < &(bitmask[sizeof(SetWordType)*8])); + } while (++p < endp); + if ( zzset_deg(a)>1 ) fprintf(stderr, " }"); +} + +#ifndef USER_ZZSYN +/* standard error reporting function */ +void +zzsyn(char *text, int tok, char *egroup, SetWordType *eset, int etok, int k, char *bad_text) +{ + + fprintf(stderr, "line %d: syntax error at \"%s\"", zzline, (tok==zzEOF_TOKEN)?"EOF":bad_text); + if ( !etok && !eset ) {fprintf(stderr, "\n"); return;} + if ( k==1 ) fprintf(stderr, " missing"); + else + { + fprintf(stderr, "; \"%s\" not", bad_text); + if ( zzset_deg(eset)>1 ) fprintf(stderr, " in"); + } + if ( zzset_deg(eset)>0 ) zzedecode(eset); + else fprintf(stderr, " %s", zztokens[etok]); + if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup); + fprintf(stderr, "\n"); +} +#endif + +/* is b an element of set p? */ +int +zzset_el(unsigned b, SetWordType *p) +{ + return( p[BSETDIVWORD(b)] & bitmask[BSETMODWORD(b)] ); +} + +int +zzset_deg(SetWordType *a) +{ + /* Fast compute degree of a set... the number + of elements present in the set. Assumes + that all word bits are used in the set + */ + register SetWordType *p = a; + register SetWordType *endp = &(a[zzSET_SIZE]); + register int degree = 0; + + if ( a == NULL ) return 0; + while ( p < endp ) + { + register SetWordType t = *p; + register SetWordType *b = &(bitmask[0]); + do { + if (t & *b) ++degree; + } while (++b < &(bitmask[sizeof(SetWordType)*8])); + p++; + } + + return(degree); +} + +#ifdef DEMAND_LOOK + +#ifdef LL_K +int +_zzmatch(int _t, char **zzBadText, char **zzMissText, + int *zzMissTok, int *zzBadTok, + SetWordType **zzMissSet) +{ + if ( zzdirty==LL_K ) { + zzCONSUME; + } + if ( LA(1)!=_t ) { + *zzBadText = *zzMissText=LATEXT(1); + *zzMissTok= _t; *zzBadTok=LA(1); + *zzMissSet=NULL; + return 0; + } + zzMakeAttr + zzdirty++; + zzlabase++; + return 1; +} + +int +_zzmatch_wsig(int _t) +{ + if ( zzdirty==LL_K ) { + zzCONSUME; + } + if ( LA(1)!=_t ) { + return 0; + } + zzMakeAttr + zzdirty++; + zzlabase++; + return 1; +} + +#else + +int +_zzmatch(int _t, char **zzBadText, char **zzMissText, + int *zzMissTok, int *zzBadTok, SetWordType **zzMissSet) +{ + if ( zzdirty ) {zzCONSUME;} + if ( LA(1)!=_t ) { + *zzBadText = *zzMissText=LATEXT(1); + *zzMissTok= _t; *zzBadTok=LA(1); + *zzMissSet=NULL; + return 0; + } + zzdirty = 1; + zzMakeAttr + return 1; +} + +int +_zzmatch_wsig(int _t) +{ + if ( zzdirty ) {zzCONSUME;} + if ( LA(1)!=_t ) { + return 0; + } + zzdirty = 1; + zzMakeAttr + return 1; +} + +#endif /*LL_K*/ + +#else + +int +_zzmatch(int _t, const char **zzBadText, const char **zzMissText, + int *zzMissTok, int *zzBadTok, + SetWordType **zzMissSet) +{ + if ( LA(1)!=_t ) { + *zzBadText = *zzMissText=LATEXT(1); + *zzMissTok= _t; *zzBadTok=LA(1); + *zzMissSet=NULL; + return 0; + } + zzMakeAttr + return 1; +} + +int +_zzmatch_wsig(int _t) +{ + if ( LA(1)!=_t ) return 0; + zzMakeAttr + return 1; +} + +#endif /*DEMAND_LOOK*/ + +#ifdef ZZINF_LOOK +void +_inf_zzgettok(void) +{ + if ( zzinf_labase >= zzinf_last ) + {NLA = zzEOF_TOKEN; strcpy(NLATEXT, "");} + else { + NLA = zzinf_tokens[zzinf_labase]; + zzline = zzinf_line[zzinf_labase]; /* wrong in 1.21 */ + strcpy(NLATEXT, zzinf_text[zzinf_labase]); + zzinf_labase++; + } +} +#endif + +#ifdef ZZINF_LOOK +/* allocate default size text,token and line arrays; + * then, read all of the input reallocing the arrays as needed. + * Once the number of total tokens is known, the LATEXT(i) array (zzinf_text) + * is allocated and it's pointers are set to the tokens in zzinf_text_buffer. + */ +void +zzfill_inf_look(void) +{ + int tok, line; + int zzinf_token_buffer_size = ZZINF_DEF_TOKEN_BUFFER_SIZE; + int zzinf_text_buffer_size = ZZINF_DEF_TEXT_BUFFER_SIZE; + int zzinf_text_buffer_index = 0; + int zzinf_lap = 0; + + /* allocate text/token buffers */ + zzinf_text_buffer = (char *) malloc(zzinf_text_buffer_size); + if ( zzinf_text_buffer == NULL ) + { + fprintf(stderr, "cannot allocate lookahead text buffer (%d bytes)\n", + zzinf_text_buffer_size); + exit(PCCTS_EXIT_FAILURE); + } + zzinf_tokens = (int *) calloc(zzinf_token_buffer_size,sizeof(int)); + if ( zzinf_tokens == NULL ) + { + fprintf(stderr, "cannot allocate token buffer (%d tokens)\n", + zzinf_token_buffer_size); + exit(PCCTS_EXIT_FAILURE); + } + zzinf_line = (int *) calloc(zzinf_token_buffer_size,sizeof(int)); + if ( zzinf_line == NULL ) + { + fprintf(stderr, "cannot allocate line buffer (%d ints)\n", + zzinf_token_buffer_size); + exit(PCCTS_EXIT_FAILURE); + } + + /* get tokens, copying text to text buffer */ + zzinf_text_buffer_index = 0; + do { + zzgettok(); + line = zzreal_line; + while ( zzinf_lap>=zzinf_token_buffer_size ) + { + zzinf_token_buffer_size += ZZINF_BUFFER_TOKEN_CHUNK_SIZE; + zzinf_tokens = (int *) realloc(zzinf_tokens, + zzinf_token_buffer_size*sizeof(int)); + if ( zzinf_tokens == NULL ) + { + fprintf(stderr, "cannot allocate lookahead token buffer (%d tokens)\n", + zzinf_token_buffer_size); + exit(PCCTS_EXIT_FAILURE); + } + zzinf_line = (int *) realloc(zzinf_line, + zzinf_token_buffer_size*sizeof(int)); + if ( zzinf_line == NULL ) + { + fprintf(stderr, "cannot allocate lookahead line buffer (%d ints)\n", + zzinf_token_buffer_size); + exit(PCCTS_EXIT_FAILURE); + } + + } + while ( (zzinf_text_buffer_index+strlen(NLATEXT)+1) >= zzinf_text_buffer_size ) + { + zzinf_text_buffer_size += ZZINF_BUFFER_TEXT_CHUNK_SIZE; + zzinf_text_buffer = (char *) realloc(zzinf_text_buffer, + zzinf_text_buffer_size); + if ( zzinf_text_buffer == NULL ) + { + fprintf(stderr, "cannot allocate lookahead text buffer (%d bytes)\n", + zzinf_text_buffer_size); + exit(PCCTS_EXIT_FAILURE); + } + } + /* record token and text and line of input symbol */ + tok = zzinf_tokens[zzinf_lap] = NLA; + strcpy(&zzinf_text_buffer[zzinf_text_buffer_index], NLATEXT); + zzinf_text_buffer_index += strlen(NLATEXT)+1; + zzinf_line[zzinf_lap] = line; + zzinf_lap++; + } while (tok!=zzEOF_TOKEN); + zzinf_labase = 0; + zzinf_last = zzinf_lap-1; + + /* allocate ptrs to text of ith token */ + zzinf_text = (char **) calloc(zzinf_last+1,sizeof(char *)); + if ( zzinf_text == NULL ) + { + fprintf(stderr, "cannot allocate lookahead text buffer (%d)\n", + zzinf_text_buffer_size); + exit(PCCTS_EXIT_FAILURE); + } + zzinf_text_buffer_index = 0; + zzinf_lap = 0; + /* set ptrs so that zzinf_text[i] is the text of the ith token found on input */ + while (zzinf_lap<=zzinf_last) + { + zzinf_text[zzinf_lap++] = &zzinf_text_buffer[zzinf_text_buffer_index]; + zzinf_text_buffer_index += strlen(&zzinf_text_buffer[zzinf_text_buffer_index])+1; + } +} +#endif + +int +_zzsetmatch(SetWordType *e, char **zzBadText, char **zzMissText, + int *zzMissTok, int *zzBadTok, + SetWordType **zzMissSet) +{ +#ifdef DEMAND_LOOK +#ifdef LL_K + if ( zzdirty==LL_K ) {zzCONSUME;} +#else + if ( zzdirty ) {zzCONSUME;} +#endif +#endif + if ( !zzset_el((unsigned)LA(1), e) ) { + *zzBadText = LATEXT(1); *zzMissText=NULL; + *zzMissTok= 0; *zzBadTok=LA(1); + *zzMissSet=e; + return 0; + } +#ifdef DEMAND_LOOK +#ifdef LL_K + zzdirty++; +#else + zzdirty = 1; +#endif +#endif + zzMakeAttr + return 1; +} + +int +_zzmatch_wdfltsig(int tokenWanted, SetWordType *whatFollows) +{ +#ifdef DEMAND_LOOK +#ifdef LL_K + if ( zzdirty==LL_K ) { + zzCONSUME; + } +#else + if ( zzdirty ) {zzCONSUME;} +#endif +#endif + + if ( LA(1)!=tokenWanted ) + { + fprintf(stderr, + "line %d: syntax error at \"%s\" missing %s\n", + zzline, + (LA(1)==zzEOF_TOKEN)?"<eof>":(char*)LATEXT(1), + zztokens[tokenWanted]); + zzconsumeUntil( whatFollows ); + return 0; + } + else { + zzMakeAttr +#ifdef DEMAND_LOOK +#ifdef LL_K + zzdirty++; + zzlabase++; +#else + zzdirty = 1; +#endif +#else +/* zzCONSUME; consume if not demand lookahead */ +#endif + return 1; + } +} + +int +_zzsetmatch_wdfltsig(SetWordType *tokensWanted, + int tokenTypeOfSet, + SetWordType *whatFollows) +{ +#ifdef DEMAND_LOOK +#ifdef LL_K + if ( zzdirty==LL_K ) {zzCONSUME;} +#else + if ( zzdirty ) {zzCONSUME;} +#endif +#endif + if ( !zzset_el((unsigned)LA(1), tokensWanted) ) + { + fprintf(stderr, + "line %d: syntax error at \"%s\" missing %s\n", + zzline, + (LA(1)==zzEOF_TOKEN)?"<eof>":(char*)LATEXT(1), + zztokens[tokenTypeOfSet]); + zzconsumeUntil( whatFollows ); + return 0; + } + else { + zzMakeAttr +#ifdef DEMAND_LOOK +#ifdef LL_K + zzdirty++; + zzlabase++; +#else + zzdirty = 1; +#endif +#else +/* zzCONSUME; consume if not demand lookahead */ +#endif + return 1; + } +} + +int +_zzsetmatch_wsig(SetWordType *e) +{ +#ifdef DEMAND_LOOK +#ifdef LL_K + if ( zzdirty==LL_K ) {zzCONSUME;} +#else + if ( zzdirty ) {zzCONSUME;} +#endif +#endif + if ( !zzset_el((unsigned)LA(1), e) ) return 0; +#ifdef DEMAND_LOOK +#ifdef LL_K + zzdirty++; +#else + zzdirty = 1; +#endif +#endif + zzMakeAttr + return 1; +} + +#ifdef USER_ZZMODE_STACK +static int zzmstk[ZZMAXSTK] = { -1 }; +static int zzmdep = 0; +static char zzmbuf[70]; + +void +zzmpush( int m ) +{ + if(zzmdep == ZZMAXSTK - 1) { + sprintf(zzmbuf, "Mode stack overflow "); + zzerr(zzmbuf); + } else { + zzmstk[zzmdep++] = zzauto; + zzmode(m); + } +} + +void +zzmpop( void ) +{ + if(zzmdep == 0) + { sprintf(zzmbuf, "Mode stack underflow "); + zzerr(zzmbuf); + } + else + { zzmdep--; + zzmode(zzmstk[zzmdep]); + } +} + +void +zzsave_mode_stack( int modeStack[], int *modeLevel ) +{ + int i; + memcpy(modeStack, zzmstk, sizeof(zzmstk)); + *modeLevel = zzmdep; + zzmdep = 0; + + return; +} + +void +zzrestore_mode_stack( int modeStack[], int *modeLevel ) +{ + int i; + + memcpy(zzmstk, modeStack, sizeof(zzmstk)); + zzmdep = *modeLevel; + + return; +} +#endif /* USER_ZZMODE_STACK */ + +#endif /* ERR_H */ diff --git a/src/translators/btparse/error.c b/src/translators/btparse/error.c new file mode 100644 index 0000000..26f2fb2 --- /dev/null +++ b/src/translators/btparse/error.c @@ -0,0 +1,348 @@ +/* ------------------------------------------------------------------------ +@NAME : error.c +@DESCRIPTION: Anything relating to reporting or recording errors and + warnings. +@GLOBALS : errclass_names + err_actions + err_handlers + errclass_counts + error_buf +@CALLS : +@CREATED : 1996/08/28, Greg Ward +@MODIFIED : +@VERSION : $Id: error.c,v 2.5 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include "btparse.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ + + +#define NUM_ERRCLASSES ((int) BTERR_INTERNAL + 1) + + +static const char *errclass_names[NUM_ERRCLASSES] = +{ + NULL, /* BTERR_NOTIFY */ + "warning", /* BTERR_CONTENT */ + "warning", /* BTERR_LEXWARN */ + "warning", /* BTERR_USAGEWARN */ + "error", /* BTERR_LEXERR */ + "syntax error", /* BTERR_SYNTAX */ + "fatal error", /* BTERR_USAGEERR */ + "internal error" /* BTERR_INTERNAL */ +}; + +static const bt_erraction err_actions[NUM_ERRCLASSES] = +{ + BTACT_NONE, /* BTERR_NOTIFY */ + BTACT_NONE, /* BTERR_CONTENT */ + BTACT_NONE, /* BTERR_LEXWARN */ + BTACT_NONE, /* BTERR_USAGEWARN */ + BTACT_NONE, /* BTERR_LEXERR */ + BTACT_NONE, /* BTERR_SYNTAX */ + BTACT_CRASH, /* BTERR_USAGEERR */ + BTACT_ABORT /* BTERR_INTERNAL */ +}; + +void print_error (bt_error *err); + +static bt_err_handler err_handlers[NUM_ERRCLASSES] = +{ + print_error, + print_error, + print_error, + print_error, + print_error, + print_error, + print_error, + print_error +}; + +static int errclass_counts[NUM_ERRCLASSES] = { 0, 0, 0, 0, 0, 0, 0, 0 }; +static char error_buf[MAX_ERROR+1]; + + +/* ---------------------------------------------------------------------- + * Error-handling functions. + */ + +void print_error (bt_error *err) +{ + const char * name; + boolean something_printed; + + something_printed = FALSE; + + if (err->filename) + { + fprintf (stderr, err->filename); + something_printed = TRUE; + } + if (err->line > 0) /* going to print a line number? */ + { + if (something_printed) + fprintf (stderr, ", "); + fprintf (stderr, "line %d", err->line); + something_printed = TRUE; + } + if (err->item_desc && err->item > 0) /* going to print an item number? */ + { + if (something_printed) + fprintf (stderr, ", "); + fprintf (stderr, "%s %d", err->item_desc, err->item); + something_printed = TRUE; + } + + name = errclass_names[(int) err->errclass]; + if (name) + { + if (something_printed) + fprintf (stderr, ", "); + fprintf (stderr, name); + something_printed = TRUE; + } + + if (something_printed) + fprintf (stderr, ": "); + + fprintf (stderr, "%s\n", err->message); + +} /* print_error() */ + + + +/* ---------------------------------------------------------------------- + * Error-reporting functions: these are called anywhere in the library + * when we encounter an error. + */ + +void +report_error (bt_errclass errclass, + char * filename, + int line, + const char * item_desc, + int item, + const char * fmt, + va_list arglist) +{ + bt_error err; +#if !HAVE_VSNPRINTF + int msg_len; +#endif + + err.errclass = errclass; + err.filename = filename; + err.line = line; + err.item_desc = item_desc; + err.item = item; + + errclass_counts[(int) errclass]++; + + + /* + * Blech -- we're writing to a static buffer because there's no easy + * way to know how long the error message is going to be. (Short of + * reimplementing printf(), or maybe printf()'ing to a dummy file + * and using the return value -- ugh!) The GNU C library conveniently + * supplies vsnprintf(), which neatly solves this problem by truncating + * the output string if it gets too long. (I could check for this + * truncation if I wanted to, but I don't think it's necessary given the + * ample size of the message buffer.) For non-GNU systems, though, + * we're stuck with using vsprintf()'s return value. This can't be + * trusted on all systems -- thus there's a check for it in configure. + * Also, this won't necessarily trigger the internal_error() if we + * do overflow; it's conceivable that vsprintf() itself would crash. + * At least doing it this way we avoid the possibility of vsprintf() + * silently corrupting some memory, and crashing unpredictably at some + * later point. + */ + +#if HAVE_VSNPRINTF + vsnprintf (error_buf, MAX_ERROR, fmt, arglist); +#else + msg_len = vsprintf (error_buf, fmt, arglist); + if (msg_len > MAX_ERROR) + internal_error ("static error message buffer overflowed"); +#endif + + err.message = error_buf; + if (err_handlers[errclass]) + (*err_handlers[errclass]) (&err); + + switch (err_actions[errclass]) + { + case BTACT_NONE: return; + case BTACT_CRASH: exit (1); + case BTACT_ABORT: abort (); + default: internal_error ("invalid error action %d for class %d (%s)", + (int) err_actions[errclass], + (int) errclass, errclass_names[errclass]); + } + +} /* report_error() */ + + +GEN_ERRFUNC (general_error, + (bt_errclass errclass, + char * filename, + int line, + const char * item_desc, + int item, + char * fmt, + ...), + errclass, filename, line, item_desc, item, fmt) + +GEN_ERRFUNC (error, + (bt_errclass errclass, + char * filename, + int line, + char * fmt, + ...), + errclass, filename, line, NULL, -1, fmt) + +GEN_ERRFUNC (ast_error, + (bt_errclass errclass, + AST * ast, + char * fmt, + ...), + errclass, ast->filename, ast->line, NULL, -1, fmt) + +GEN_ERRFUNC (notify, + (const char * fmt, ...), + BTERR_NOTIFY, NULL, -1, NULL, -1, fmt) + +GEN_ERRFUNC (usage_warning, + (const char * fmt, ...), + BTERR_USAGEWARN, NULL, -1, NULL, -1, fmt) + +GEN_ERRFUNC (usage_error, + (const char * fmt, ...), + BTERR_USAGEERR, NULL, -1, NULL, -1, fmt) + +GEN_ERRFUNC (internal_error, + (const char * fmt, ...), + BTERR_INTERNAL, NULL, -1, NULL, -1, fmt) + + +/* ====================================================================== + * Functions to be used outside of the library + */ + +/* ------------------------------------------------------------------------ +@NAME : bt_reset_error_counts() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Resets all the error counters to zero. +@GLOBALS : +@CALLS : +@CREATED : 1997/01/08, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void bt_reset_error_counts (void) +{ + int i; + + for (i = 0; i < NUM_ERRCLASSES; i++) + errclass_counts[i] = 0; +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_get_error_count() +@INPUT : errclass +@OUTPUT : +@RETURNS : +@DESCRIPTION: Returns number of errors seen in the specified class. +@GLOBALS : errclass_counts +@CALLS : +@CREATED : +@MODIFIED : +-------------------------------------------------------------------------- */ +int bt_get_error_count (bt_errclass errclass) +{ + return errclass_counts[errclass]; +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_get_error_counts() +@INPUT : counts - pointer to an array big enough to hold all the counts + if NULL, the array will be allocated for you (and you + must free() it when done with it) +@OUTPUT : +@RETURNS : counts - either the passed-in pointer, or the newly- + allocated array if you pass in NULL +@DESCRIPTION: Returns a newly-allocated array with the number of errors + in each error class, indexed by the members of the + eclass_t enum. +@GLOBALS : errclass_counts +@CALLS : +@CREATED : 1997/01/06, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +int *bt_get_error_counts (int *counts) +{ + int i; + + if (counts == NULL) + counts = (int *) malloc (sizeof (int) * NUM_ERRCLASSES); + for (i = 0; i < NUM_ERRCLASSES; i++) + counts[i] = errclass_counts[i]; + + return counts; +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_error_status +@INPUT : saved_counts - an array of error counts as returned by + bt_get_error_counts, or NULL not to compare + to a previous checkpoint +@OUTPUT : +@RETURNS : +@DESCRIPTION: Computes a bitmap where a bit is set for each error class + that has more errors now than it used to have (or, if + saved_counts is NULL, the bit is set of there are have been + any errors in the corresponding error class). + + Eg. "x & (1<<E_SYNTAX)" (where x is returned by bt_error_status) + is true if there have been any syntax errors. +@GLOBALS : +@CALLS : +@CREATED : +@MODIFIED : +-------------------------------------------------------------------------- */ +ushort bt_error_status (int *saved_counts) +{ + int i; + ushort status; + + status = 0; + + if (saved_counts) + { + for (i = 0; i < NUM_ERRCLASSES; i++) + status |= ( (errclass_counts[i] > saved_counts[i]) << i); + } + else + { + for (i = 0; i < NUM_ERRCLASSES; i++) + status |= ( (errclass_counts[i] > 0) << i); + } + + return status; +} /* bt_error_status () */ diff --git a/src/translators/btparse/error.h b/src/translators/btparse/error.h new file mode 100644 index 0000000..aede151 --- /dev/null +++ b/src/translators/btparse/error.h @@ -0,0 +1,65 @@ +/* ------------------------------------------------------------------------ +@NAME : error.c +@DESCRIPTION: Prototypes for the error-generating functions (i.e. functions + defined in error.c, and meant only for use elswhere in the + library). +@CREATED : Summer 1996, Greg Ward +@MODIFIED : +@VERSION : $Id: error.h,v 1.11 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +#ifndef ERROR_H +#define ERROR_H + +#include <stdarg.h> +#include "btparse.h" /* for AST typedef */ + +#define MAX_ERROR 1024 + +#define ERRFUNC_BODY(class,filename,line,item_desc,item,format) \ +{ \ + va_list arglist; \ + \ + va_start (arglist, format); \ + report_error (class, filename, line, item_desc, item, format, arglist); \ + va_end (arglist); \ +} + +#define GEN_ERRFUNC(name,params,class,filename,line,item_desc,item,format) \ +void name params \ +ERRFUNC_BODY (class, filename, line, item_desc, item, format) + +#define GEN_PRIVATE_ERRFUNC(name,params, \ + class,filename,line,item_desc,item,format) \ +static GEN_ERRFUNC(name,params,class,filename,line,item_desc,item,format) + +/* + * Prototypes for functions exported by error.c but only used within + * the library -- functions that can be called by outsiders are declared + * in btparse.h. + */ + +void print_error (bt_error *err); +void report_error (bt_errclass class, + char * filename, int line, const char * item_desc, int item, + const char * format, va_list arglist); + +void general_error (bt_errclass class, + char * filename, int line, const char * item_desc, int item, + char * format, ...); +void error (bt_errclass class, char * filename, int line, char * format, ...); +void ast_error (bt_errclass class, AST * ast, char * format, ...); + +void notify (const char *format,...); +void usage_warning (const char * format, ...); +void usage_error (const char * format, ...); +void internal_error (const char * format, ...); + +#endif diff --git a/src/translators/btparse/format_name.c b/src/translators/btparse/format_name.c new file mode 100644 index 0000000..d6c99ae --- /dev/null +++ b/src/translators/btparse/format_name.c @@ -0,0 +1,841 @@ +/* ------------------------------------------------------------------------ +@NAME : format_name.c +@DESCRIPTION: bt_format_name() and support functions: everything needed + to turn a bt_name structure (as returned by bt_split_name()) + back into a string according to a highly customizable format. +@GLOBALS : +@CREATED : +@MODIFIED : +@VERSION : $Id: format_name.c,v 1.12 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include "btparse.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ +#include "bt_debug.h" + + +static char EmptyString[] = ""; + + +#if DEBUG +/* prototypes to shut "gcc -Wmissing-prototypes" up */ +void print_tokens (char *partname, char **tokens, int num_tokens); +void dump_name (bt_name * name); +void dump_format (bt_name_format * format); +#endif + + +/* ---------------------------------------------------------------------- + * Interface to create/customize bt_name_format structures + */ + +/* ------------------------------------------------------------------------ +@NAME : bt_create_name_format +@INPUT : parts - a string of letters (maximum four, from the set + f, v, l, j, with no repetition) denoting the order + and presence of name parts. Also used to determine + certain pre-part text strings. + abbrev_first - flag: should first names be abbreviated? +@OUTPUT : +@RETURNS : +@DESCRIPTION: Creates a bt_name_format structure, slightly customized + according to the caller's choice of token order and + whether to abbreviate the first name. Use + bt_free_name_format() to free the structure (and any sub- + structures that may be allocated here). Use + bt_set_format_text() and bt_set_format_options() for + further customization of the format structure; do not + fiddle its fields directly. + + Fills in the structures `parts' field according to `parts' + string: 'f' -> BTN_FIRST, and so on. + + Sets token join methods: inter-token join (within each part) + is set to BTJ_MAYTIE (a "discretionary tie") for all parts; + inter-part join is set to BTJ_SPACE, except for a 'von' + token immediately preceding a 'last' token; there, we have + a discretionary tie. + + Sets abbreviation flags: FALSE for everything except `first', + which follows `abbrev_first' argument. + + Sets surrounding text (pre- and post-part, pre- and post- + token): empty string for everything, except: + - post-token for 'first' is "." if abbrev_first true + - if 'jr' immediately preceded by 'last': + pre-part for 'jr' is ", ", join for 'last' is nothing + - if 'first' immediately preceded by 'last' + pre-part for 'first' is ", " , join for 'last' is nothing + - if 'first' immediately preceded by 'jr' and 'jr' immediately + preceded by 'last': + pre-part for 'first' and 'jr' is ", " , + join for 'last' and 'jr' is nothing +@CREATED : 1997/11/02, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +bt_name_format * +bt_create_name_format (char * parts, boolean abbrev_first) +{ + int num_parts; + int num_valid_parts; + bt_name_format * + format; + int part_pos[BT_MAX_NAMEPARTS]; + int i; + + /* + * Check that the part list (a string with one letter -- f, v, l, or j + * -- for each part is valid: no longer than four characters, and no + * invalid characters. + */ + + num_parts = strlen (parts); + num_valid_parts = strspn (parts, BT_VALID_NAMEPARTS); + if (num_parts > BT_MAX_NAMEPARTS) + { + usage_error ("bt_create_name_format: part list must have no more than " + "%d letters", BT_MAX_NAMEPARTS); + } + if (num_valid_parts != num_parts) + { + usage_error ("bt_create_name_format: bad part abbreviation \"%c\" " + "(must be one of \"%s\")", + parts[num_valid_parts], BT_VALID_NAMEPARTS); + } + + + /* User input is OK -- let's create the structure */ + + format = (bt_name_format *) malloc (sizeof (bt_name_format)); + format->num_parts = num_parts; + for (i = 0; i < num_parts; i++) + { + switch (parts[i]) + { + case 'f': format->parts[i] = BTN_FIRST; break; + case 'v': format->parts[i] = BTN_VON; break; + case 'l': format->parts[i] = BTN_LAST; break; + case 'j': format->parts[i] = BTN_JR; break; + default: internal_error ("bad part abbreviation \"%c\"", parts[i]); + } + part_pos[format->parts[i]] = i; + } + for (; i < BT_MAX_NAMEPARTS; i++) + { + format->parts[i] = BTN_NONE; + } + + + /* + * Set the token join methods: between tokens for all parts is a + * discretionary tie, and the join between parts is a space (except for + * 'von': if followed by 'last', we will have a discretionary tie). + */ + for (i = 0; i < num_parts; i++) + { + format->join_tokens[i] = BTJ_MAYTIE; + format->join_part[i] = BTJ_SPACE; + } + if (part_pos[BTN_VON] + 1 == part_pos[BTN_LAST]) + format->join_part[BTN_VON] = BTJ_MAYTIE; + + + /* + * Now the abbreviation flags: follow 'abbrev_first' flag for 'first', + * and FALSE for everything else. + */ + format->abbrev[BTN_FIRST] = abbrev_first; + format->abbrev[BTN_VON] = FALSE; + format->abbrev[BTN_LAST] = FALSE; + format->abbrev[BTN_JR] = FALSE; + + + + /* + * Now fill in the "surrounding text" fields (pre- and post-part, pre- + * and post-token) -- start out with everything NULL (empty string), + * and then tweak it to handle abbreviated first names, 'jr' following + * 'last', and 'first' following 'last' or 'last' and 'jr'. In the + * last three cases, we put in some pre-part text (", "), and also + * set the join method for the *previous* part (jr or last) to + * BTJ_NOTHING, so we don't get extraneous space before the ", ". + */ + for (i = 0; i < BT_MAX_NAMEPARTS; i++) + { + format->pre_part[i] = EmptyString; + format->post_part[i] = EmptyString; + format->pre_token[i] = EmptyString; + format->post_token[i] = EmptyString; + } + + /* abbreviated first name: + * "Blow J" -> "Blow J.", or "J Blow" -> "J. Blow" + */ + if (abbrev_first) + { + format->post_token[BTN_FIRST] = "."; + } + /* 'jr' after 'last': "Joe Blow Jr." -> "Joe Blow, Jr." */ + if (part_pos[BTN_JR] == part_pos[BTN_LAST]+1) + { + format->pre_part[BTN_JR] = ", "; + format->join_part[BTN_LAST] = BTJ_NOTHING; + /* 'first' after 'last' and 'jr': "Blow, Jr. Joe"->"Blow, Jr., Joe" */ + if (part_pos[BTN_FIRST] == part_pos[BTN_JR]+1) + { + format->pre_part[BTN_FIRST] = ", "; + format->join_part[BTN_JR] = BTJ_NOTHING; + } + } + /* first after last: "Blow Joe" -> "Blow, Joe" */ + if (part_pos[BTN_FIRST] == part_pos[BTN_LAST]+1) + { + format->pre_part[BTN_FIRST] = ", "; + format->join_part[BTN_LAST] = BTJ_NOTHING; + } + + DBG_ACTION + (1, printf ("bt_create_name_format(): returning structure %p\n", format)) + + return format; + +} /* bt_create_name_format() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_free_name_format() +@INPUT : format - free()'d, so this is an invalid pointer after the call +@OUTPUT : +@RETURNS : +@DESCRIPTION: Frees a bt_name_format structure created by + bt_create_name_format(). +@CREATED : 1997/11/02, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_free_name_format (bt_name_format * format) +{ + free (format); +} + + + +/* ------------------------------------------------------------------------ +@NAME : bt_set_format_text +@INPUT : format - the format structure to update + part - which name-part to change the surrounding text for + pre_part - "pre-part" text, or NULL to leave alone + post_part - "post-part" text, or NULL to leave alone + pre_token - "pre-token" text, or NULL to leave alone + post_token - "post-token" text, or NULL to leave alone +@OUTPUT : format - pre_part, post_part, pre_token, post_token + arrays updated (only those with corresponding + non-NULL parameters are touched) +@RETURNS : +@DESCRIPTION: Sets the "surrounding text" for a particular name part in + a name format structure. +@CREATED : 1997/11/02, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_set_format_text (bt_name_format * format, + bt_namepart part, + char * pre_part, + char * post_part, + char * pre_token, + char * post_token) +{ + if (pre_part) format->pre_part[part] = pre_part; + if (post_part) format->post_part[part] = post_part; + if (pre_token) format->pre_token[part] = pre_token; + if (post_token) format->post_token[part] = post_token; +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_set_format_options() +@INPUT : format + part + abbrev + join_tokens + join_part +@OUTPUT : format - abbrev, join_tokens, join_part arrays all updated +@RETURNS : +@DESCRIPTION: Sets various formatting options for a particular name part in + a name format structure. +@CREATED : 1997/11/02, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_set_format_options (bt_name_format * format, + bt_namepart part, + boolean abbrev, + bt_joinmethod join_tokens, + bt_joinmethod join_part) +{ + format->abbrev[part] = abbrev; + format->join_tokens[part] = join_tokens; + format->join_part[part] = join_part; +} + + + +/* ---------------------------------------------------------------------- + * Functions for actually formatting a name (given a name and a name + * format structure). + */ + +/* ------------------------------------------------------------------------ +@NAME : count_virtual_char() +@INPUT : string + offset +@OUTPUT : vchar_count +@INOUT : depth + in_special +@RETURNS : +@DESCRIPTION: Munches a single physical character from a string, updating + the virtual character count, the depth, and an "in special + character" flag. + + The virtual character count is incremented by any character + not part of a special character, and also by the right-brace + that closes a special character. The depth is incremented by + a left brace, and decremented by a right brace. in_special + is set to TRUE when we encounter a left brace at depth zero + that is immediately followed by a backslash; it is set to + false when we encounter the end of the special character, + i.e. when in_special is TRUE and we hit a right brace that + brings us back to depth zero. + + *vchar_count and *depth should both be set to zero the first + time you call count_virtual_char() on a particular string, + and in_special should be set to FALSE. +@CALLS : +@CALLERS : string_length() + string_prefix() +@CREATED : 1997/11/03, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +count_virtual_char (char * string, + int offset, + int * vchar_count, + int * depth, + boolean * in_special) +{ + switch (string[offset]) + { + case '{': + { + /* start of a special char? */ + if (*depth == 0 && string[offset+1] == '\\') + *in_special = TRUE; + (*depth)++; + break; + } + case '}': + { + /* end of a special char? */ + if (*depth == 1 && *in_special) + { + *in_special = FALSE; + (*vchar_count)++; + } + (*depth)--; + break; + } + default: + { + /* anything else? (possibly inside a special char) */ + if (! *in_special) (*vchar_count)++; + } + } +} /* count_virtual_char () */ + + +/* this should probably be publicly available, documented, etc. */ +/* ------------------------------------------------------------------------ +@NAME : string_length() +@INPUT : string +@OUTPUT : +@RETURNS : "virtual length" of `string' +@DESCRIPTION: Counts the number of "virtual characters" in a string. A + virtual character is either an entire BibTeX special character, + or any character outside of a special character. + + Thus, "Hello" has virtual length 5, and so does + "H{\\'e}ll{\\\"o}". "{\\noop Hello there how are you?}" has + virtual length one. +@CALLS : count_virtual_char() +@CALLERS : format_name() +@CREATED : 1997/11/03, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static int +string_length (char * string) +{ + int length; + int depth; + boolean in_special; + int i; + + length = 0; + depth = 0; + in_special = FALSE; + + for (i = 0; string[i] != 0; i++) + { + count_virtual_char (string, i, &length, &depth, &in_special); + } + + return length; +} /* string_length() */ + + +/* ------------------------------------------------------------------------ +@NAME : string_prefix() +@INPUT : string + prefix_len +@OUTPUT : +@RETURNS : physical length of the prefix of `string' with a virtual length + of `prefix_len' +@DESCRIPTION: Counts the number of physical characters from the beginning + of `string' needed to extract a sub-string with virtual + length `prefix_len'. +@CALLS : count_virtual_char() +@CALLERS : format_name() +@CREATED : 1997/11/03, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static int +string_prefix (char * string, int prefix_len) +{ + int i; + int vchars_seen; + int depth; + boolean in_special; + + vchars_seen = 0; + depth = 0; + in_special = FALSE; + + for (i = 0; string[i] != 0; i++) + { + count_virtual_char (string, i, &vchars_seen, &depth, &in_special); + if (vchars_seen == prefix_len) + return i+1; + } + + return i; + +} /* string_prefix() */ + + +/* ------------------------------------------------------------------------ +@NAME : append_text() +@INOUT : string +@INPUT : offset + text + start + len +@OUTPUT : +@RETURNS : number of characters copied from text+start to string+offset +@DESCRIPTION: Copies at most `len' characters from text+start to + string+offset. (I don't use strcpy() or strncpy() for this + because I need to get the number of characters actually + copied.) +@CALLS : +@CALLERS : format_name() +@CREATED : 1997/11/03, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static int +append_text (char * string, + int offset, + const char * text, + int start, + int len) +{ + int i; + + if (text == NULL) return 0; /* no text -- none appended! */ + + for (i = 0; text[start+i] != 0; i++) + { + if (len > 0 && i == len) + break; /* exit loop without i++, right?!? */ + string[offset+i] = text[start+i]; + } /* for i */ + + return i; /* number of characters copied */ + +} /* append_text () */ + + +/* ------------------------------------------------------------------------ +@NAME : append_join +@INOUT : string +@INPUT : offset + method + should_tie +@OUTPUT : +@RETURNS : number of charactersa appended to string+offset (either 0 or 1) +@DESCRIPTION: Copies a "join character" ('~' or ' ') or nothing to + string+offset, according to the join method specified by + `method' and the `should_tie' flag. + + Specifically: if `method' is BTJ_SPACE, a space is appended + and 1 is returned; if `method' is BTJ_FORCETIE, a TeX "tie" + character ('~') is appended and 1 is returned. If `method' + is BTJ_NOTHING, `string' is unchanged and 0 is returned. If + `method' is BTJ_MAYTIE then either a tie (if should_tie is + true) or a space (otherwise) is appended, and 1 is returned. +@CALLS : +@CALLERS : format_name() +@CREATED : 1997/11/03, GPW +@MODIFIED : +@COMMENTS : This should allow "tie" strings other than TeX's '~' -- I + think this could be done by putting a "tie string" field in + the name format structure, and using it here. +-------------------------------------------------------------------------- */ +static int +append_join (char * string, + int offset, + bt_joinmethod method, + boolean should_tie) +{ + switch (method) + { + case BTJ_MAYTIE: /* a "discretionary tie" -- pay */ + { /* attention to should_tie */ + if (should_tie) + string[offset] = '~'; + else + string[offset] = ' '; + return 1; + } + case BTJ_SPACE: + { + string[offset] = ' '; + return 1; + } + case BTJ_FORCETIE: + { + string[offset] = '~'; + return 1; + } + case BTJ_NOTHING: + { + return 0; + } + default: + internal_error ("bad token join method %d", (int) method); + } + + return 0; /* can't happen -- just here to */ + /* keep gcc -Wall happy */ +} /* append_join () */ + + +#define STRLEN(s) (s == NULL) ? 0 : strlen (s) + +/* ------------------------------------------------------------------------ +@NAME : format_firstpass() +@INPUT : name + format +@OUTPUT : +@RETURNS : +@DESCRIPTION: Makes the first pass over a name for formatting, in order to + establish an upper bound on the length of the formatted name. +@CALLS : +@CALLERS : bt_format_name() +@CREATED : 1997/11/03, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static unsigned +format_firstpass (bt_name * name, + bt_name_format * format) +{ + int i; /* loop over parts */ + int j; /* loop over tokens */ + unsigned max_length; + bt_namepart part; + char ** tok; + int num_tok; + + max_length = 0; + + for (i = 0; i < format->num_parts; i++) + { + part = format->parts[i]; /* 'cause I'm a lazy typist */ + tok = name->parts[part]; + num_tok = name->part_len[part]; + + assert ((tok != NULL) == (num_tok > 0)); + if (tok) + { + max_length += STRLEN (format->pre_part[part]); + max_length += STRLEN (format->post_part[part]); + max_length += STRLEN (format->pre_token[part]) * num_tok; + max_length += STRLEN (format->post_token[part]) * num_tok; + max_length += num_tok + 1; /* one join char per token, plus */ + /* join char to next part */ + + /* + * We ignore abbreviation here -- just overestimates the maximum + * length, so no big deal. Also saves us the bother of computing + * the physical length of the prefix of virtual length 1. + */ + for (j = 0; j < num_tok; j++) + max_length += strlen (tok[j]); + } + + } /* for i (loop over parts) */ + + return max_length; + +} /* format_firstpass() */ + + +/* ------------------------------------------------------------------------ +@NAME : format_name() +@INPUT : format + tokens - token list (eg. from format_firstpass()) + num_tokens - token count list (eg. from format_firstpass()) +@OUTPUT : fname - filled in, must be preallocated by caller +@RETURNS : +@DESCRIPTION: Performs the second pass over a name and format, to actually + put the name into a single string according to `format'. +@CALLS : +@CALLERS : bt_format_name() +@CREATED : 1997/11/03, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +format_name (bt_name_format * format, + char *** tokens, + int * num_tokens, + char * fname) +{ + bt_namepart parts[BT_MAX_NAMEPARTS]; /* culled list from format */ + int num_parts; + + int offset; /* into fname */ + int i; /* loop over parts */ + int j; /* loop over tokens */ + bt_namepart part; + int prefix_len; + int token_len; /* "physical" length (characters) */ + int token_vlen; /* "virtual" length (special char */ + /* counts as one character) */ + boolean should_tie; + + /* + * Cull format->parts down by keeping only those parts that are actually + * present in the current name (keeps the main loop simpler: makes it + * easy to know if the "next part" is present or not, so we know whether + * to append a join character. + */ + num_parts = 0; + for (i = 0; i < format->num_parts; i++) + { + part = format->parts[i]; + if (tokens[part]) /* name actually has this part */ + parts[num_parts++] = part; + } + + offset = 0; + token_vlen = -1; /* sanity check, and keeps */ + /* "gcc -O -Wall" happy */ + + for (i = 0; i < num_parts; i++) + { + part = parts[i]; + + offset += append_text (fname, offset, + format->pre_part[part], 0, -1); + + for (j = 0; j < num_tokens[part]; j++) + { + offset += append_text (fname, offset, + format->pre_token[part], 0, -1); + if (format->abbrev[part]) + { + prefix_len = string_prefix (tokens[part][j], 1); + token_len = append_text (fname, offset, + tokens[part][j], 0, prefix_len); + token_vlen = 1; + } + else + { + token_len = append_text (fname, offset, + tokens[part][j], 0, -1); + token_vlen = string_length (tokens[part][j]); + } + offset += token_len; + offset += append_text (fname, offset, + format->post_token[part], 0, -1); + + /* join to next token, but only if there is a next token! */ + if (j < num_tokens[part]-1) + { + should_tie = (num_tokens[part] > 1) + && (((j == 0) && (token_vlen < 3)) + || (j == num_tokens[part]-2)); + offset += append_join (fname, offset, + format->join_tokens[part], should_tie); + } + + } /* for j */ + + offset += append_text (fname, offset, + format->post_part[part], 0, -1); + /* join to the next part, but again only if there is a next part */ + if (i < num_parts-1) + { + if (token_vlen == -1) + { + internal_error ("token_vlen uninitialized -- no tokens in a part " + "that I checked existed"); + } + should_tie = (num_tokens[part] == 1 && token_vlen < 3); + offset += append_join (fname, offset, + format->join_part[part], should_tie); + } + + } /* for i (loop over parts) */ + + fname[offset] = 0; + +} /* format_name () */ + + +#if DEBUG + +#define STATIC /* so BibTeX.xs can call 'em too */ + +/* borrowed print_tokens() and dump_name() from t/name_test.c */ +STATIC void +print_tokens (char *partname, char **tokens, int num_tokens) +{ + int i; + + if (tokens) + { + printf ("%s = (", partname); + for (i = 0; i < num_tokens; i++) + { + printf ("%s%c", tokens[i], i == num_tokens-1 ? ')' : '|'); + } + putchar ('\n'); + } +} + + +STATIC void +dump_name (bt_name * name) +{ + if (name == NULL) + { + printf (" name: null\n"); + return; + } + + if (name->tokens == NULL) + { + printf (" name: null token list\n"); + return; + } + + printf (" name (%p):\n", name); + printf (" total number of tokens = %d\n", name->tokens->num_items); + print_tokens (" first", name->parts[BTN_FIRST], name->part_len[BTN_FIRST]); + print_tokens (" von", name->parts[BTN_VON], name->part_len[BTN_VON]); + print_tokens (" last", name->parts[BTN_LAST], name->part_len[BTN_LAST]); + print_tokens (" jr", name->parts[BTN_JR], name->part_len[BTN_JR]); +} + + +STATIC void +dump_format (bt_name_format * format) +{ + int i; + static char * nameparts[] = { "first", "von", "last", "jr" }; + static char * joinmethods[] = {"may tie", "space", "force tie", "nothing"}; + + printf (" name format (%p):\n", format); + printf (" order:"); + for (i = 0; i < format->num_parts; i++) + printf (" %s", nameparts[format->parts[i]]); + printf ("\n"); + + for (i = 0; i < BT_MAX_NAMEPARTS; i++) + { + printf (" %-5s: pre-part=%p (%s), post-part=%p (%s)\n", + nameparts[i], + format->pre_part[i], format->pre_part[i], + format->post_part[i], format->post_part[i]); + printf (" %-5s pre-token=%p (%s), post-token=%p (%s)\n", + "", + format->pre_token[i], format->pre_token[i], + format->post_token[i],format->post_token[i]); + printf (" %-5s abbrev=%s, join_tokens=%s, join_parts=%s\n", + "", + format->abbrev[i] ? "yes" : "no", + joinmethods[format->join_tokens[i]], + joinmethods[format->join_part[i]]); + } +} +#endif + + +/* ------------------------------------------------------------------------ +@NAME : bt_format_name() +@INPUT : name + format +@OUTPUT : +@RETURNS : formatted name (allocated with malloc(); caller must free() it) +@DESCRIPTION: Formats an already-split name according to a pre-constructed + format structure. +@GLOBALS : +@CALLS : format_firstpass(), format_name() +@CALLERS : +@CREATED : 1997/11/03, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +char * +bt_format_name (bt_name * name, + bt_name_format * format) +{ + unsigned max_length; + char * fname; + +#if DEBUG >= 2 + printf ("bt_format_name():\n"); + dump_name (name); + dump_format (format); +#endif + + max_length = format_firstpass (name, format); + fname = (char *) malloc ((max_length+1) * sizeof (char)); +#if 0 + memset (fname, '_', max_length); + fname[max_length] = 0; +#endif + format_name (format, name->parts, name->part_len, fname); + assert (strlen (fname) <= max_length); + return fname; + +} /* bt_format_name() */ diff --git a/src/translators/btparse/init.c b/src/translators/btparse/init.c new file mode 100644 index 0000000..4a1ec06 --- /dev/null +++ b/src/translators/btparse/init.c @@ -0,0 +1,42 @@ +/* ------------------------------------------------------------------------ +@NAME : init.c +@DESCRIPTION: Initialization and cleanup functions for the btparse library. +@GLOBALS : +@CALLS : +@CREATED : 1997/01/19, Greg Ward +@MODIFIED : +@VERSION : $Id: init.c,v 1.8 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include "stdpccts.h" /* for zzfree_ast() prototype */ +#include "parse_auxiliary.h" /* for fix_token_names() proto */ +#include "prototypes.h" /* for other prototypes */ +/*#include "my_dmalloc.h"*/ + +void bt_initialize (void) +{ + /* Initialize data structures */ + + fix_token_names (); + init_macros (); +} + + +void bt_free_ast (AST *ast) +{ + zzfree_ast (ast); +} + + +void bt_cleanup (void) +{ + done_macros (); +} diff --git a/src/translators/btparse/input.c b/src/translators/btparse/input.c new file mode 100644 index 0000000..dbb7b44 --- /dev/null +++ b/src/translators/btparse/input.c @@ -0,0 +1,499 @@ +/* ------------------------------------------------------------------------ +@NAME : input.c +@DESCRIPTION: Routines for input of BibTeX data. +@GLOBALS : InputFilename + StringOptions +@CALLS : +@CREATED : 1997/10/14, Greg Ward (from code in bibparse.c) +@MODIFIED : +@VERSION : $Id: input.c,v 1.18 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <stdio.h> +#include <limits.h> +#include <assert.h> +#include "stdpccts.h" +#include "lex_auxiliary.h" +#include "prototypes.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ + + +char * InputFilename; +ushort StringOptions[NUM_METATYPES] = +{ + 0, /* BTE_UNKNOWN */ + BTO_FULL, /* BTE_REGULAR */ + BTO_MINIMAL, /* BTE_COMMENT */ + BTO_MINIMAL, /* BTE_PREAMBLE */ + BTO_MACRO /* BTE_MACRODEF */ +}; + + +/* ------------------------------------------------------------------------ +@NAME : bt_set_filename +@INPUT : filename +@OUTPUT : +@RETURNS : +@DESCRIPTION: Sets the current input filename -- used for generating + error and warning messages. +@GLOBALS : InputFilename +@CALLS : +@CREATED : Feb 1997, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +#if 0 +void bt_set_filename (char *filename) +{ + InputFilename = filename; +} +#endif + +/* ------------------------------------------------------------------------ +@NAME : bt_set_stringopts +@INPUT : metatype + options +@OUTPUT : +@RETURNS : +@DESCRIPTION: Sets the string-processing options for a particular + entry metatype. Used later on by bt_parse_* to determine + just how to post-process each particular entry. +@GLOBALS : StringOptions +@CREATED : 1997/08/24, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void bt_set_stringopts (bt_metatype metatype, ushort options) +{ + if (metatype < BTE_REGULAR || metatype > BTE_MACRODEF) + usage_error ("bt_set_stringopts: illegal metatype"); + if (options & ~BTO_STRINGMASK) + usage_error ("bt_set_stringopts: illegal options " + "(must only set string option bits"); + + StringOptions[metatype] = options; +} + + +/* ------------------------------------------------------------------------ +@NAME : start_parse +@INPUT : infile input stream we'll read from (or NULL if reading + from string) + instring input string we'll read from (or NULL if reading + from stream) + line line number of the start of the string (just + use 1 if the string is standalone and independent; + if it comes from a file, you should supply the + line number where it starts for better error + messages) (ignored if infile != NULL) +@OUTPUT : +@RETURNS : +@DESCRIPTION: Prepares things for parsing, in particular initializes the + lexical state and lexical buffer, prepares DLG for + reading (either from a stream or a string), and reads + the first token. +@GLOBALS : +@CALLS : initialize_lexer_state() + alloc_lex_buffer() + zzrdstream() or zzrdstr() + zzgettok() +@CALLERS : +@CREATED : 1997/06/21, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +start_parse (FILE *infile, char *instring, int line) +{ + if ( (infile == NULL) == (instring == NULL) ) + { + internal_error ("start_parse(): exactly one of infile and " + "instring may be non-NULL"); + } + initialize_lexer_state (); + alloc_lex_buffer (ZZLEXBUFSIZE); + if (infile) + { + zzrdstream (infile); + } + else + { + zzrdstr (instring); + zzline = line; + } + + zzendcol = zzbegcol = 0; + zzgettok (); +} + + + +/* ------------------------------------------------------------------------ +@NAME : finish_parse() +@INPUT : err_counts - pointer to error count list (which is local to + the parsing functions, hence has to be passed in) +@OUTPUT : +@RETURNS : +@DESCRIPTION: Frees up what was needed to parse a whole file or a sequence + of strings: the lexical buffer and the error count list. +@GLOBALS : +@CALLS : free_lex_buffer() +@CALLERS : +@CREATED : 1997/06/21, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +finish_parse (int **err_counts) +{ + free_lex_buffer (); + free (*err_counts); + *err_counts = NULL; +} + + +/* ------------------------------------------------------------------------ +@NAME : parse_status() +@INPUT : saved_counts +@OUTPUT : +@RETURNS : false if there were serious errors in the recently-parsed input + true otherwise (no errors or just warnings) +@DESCRIPTION: Gets the "error status" bitmap relative to a saved set of + error counts and masks of non-serious errors. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/06/21, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static boolean +parse_status (int *saved_counts) +{ + ushort ignore_emask; + + /* + * This bit-twiddling fetches the error status (which has a bit + * for each error class), masks off the bits for trivial errors + * to get "true" if there were any serious errors, and then + * returns the opposite of that. + */ + ignore_emask = + (1<<BTERR_NOTIFY) | (1<<BTERR_CONTENT) | (1<<BTERR_LEXWARN); + return !(bt_error_status (saved_counts) & ~ignore_emask); +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_parse_entry_s() +@INPUT : entry_text - string containing the entire entry to parse, + or NULL meaning we're done, please cleanup + options - standard btparse options bitmap + line - current line number (if that makes any sense) + -- passed to the parser to set zzline, so that + lexical and syntax errors are properly localized +@OUTPUT : *top - newly-allocated AST for the entry + (or NULL if entry_text was NULL, ie. at EOF) +@RETURNS : 1 with *top set to AST for entry on successful read/parse + 1 with *top==NULL if entry_text was NULL, ie. at EOF + 0 if any serious errors seen in input (*top is still + set to the AST, but only for as much of the input as we + were able to parse) + (A "serious" error is a lexical or syntax error; "trivial" + errors such as warnings and notifications count as "success" + for the purposes of this function's return value.) +@DESCRIPTION: Parses a BibTeX entry contained in a string. +@GLOBALS : +@CALLS : ANTLR +@CREATED : 1997/01/18, GPW (from code in bt_parse_entry()) +@MODIFIED : +-------------------------------------------------------------------------- */ +AST * bt_parse_entry_s (char * entry_text, + char * filename, + int line, + ushort options, + boolean * status) +{ + AST * entry_ast = NULL; + static int * err_counts = NULL; + + if (options & BTO_STRINGMASK) /* any string options set? */ + { + usage_error ("bt_parse_entry_s: illegal options " + "(string options not allowed"); + } + + InputFilename = filename; + err_counts = bt_get_error_counts (err_counts); + + if (entry_text == NULL) /* signal to clean up */ + { + finish_parse (&err_counts); + if (status) *status = TRUE; + return NULL; + } + + zzast_sp = ZZAST_STACKSIZE; /* workaround apparent pccts bug */ + start_parse (NULL, entry_text, line); + + entry (&entry_ast); /* enter the parser */ + ++zzasp; /* why is this done? */ + + if (entry_ast == NULL) /* can happen with very bad input */ + { + if (status) *status = FALSE; + return entry_ast; + } + +#if DEBUG + dump_ast ("bt_parse_entry_s: single entry, after parsing:\n", + entry_ast); +#endif + bt_postprocess_entry (entry_ast, + StringOptions[entry_ast->metatype] | options); +#if DEBUG + dump_ast ("bt_parse_entry_s: single entry, after post-processing:\n", + entry_ast); +#endif + + if (status) *status = parse_status (err_counts); + return entry_ast; + +} /* bt_parse_entry_s () */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_parse_entry() +@INPUT : infile - file to read next entry from + options - standard btparse options bitmap +@OUTPUT : *top - AST for the entry, or NULL if no entries left in file +@RETURNS : same as bt_parse_entry_s() +@DESCRIPTION: Starts (or continues) parsing from a file. +@GLOBALS : +@CALLS : +@CREATED : Jan 1997, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +AST * bt_parse_entry (FILE * infile, + char * filename, + ushort options, + boolean * status) +{ + AST * entry_ast = NULL; + static int * err_counts = NULL; + static FILE * prev_file = NULL; + + if (prev_file != NULL && infile != prev_file) + { + usage_error ("bt_parse_entry: you can't interleave calls " + "across different files"); + } + + if (options & BTO_STRINGMASK) /* any string options set? */ + { + usage_error ("bt_parse_entry: illegal options " + "(string options not allowed)"); + } + + InputFilename = filename; + err_counts = bt_get_error_counts (err_counts); + + if (feof (infile)) + { + if (prev_file != NULL) /* haven't already done the cleanup */ + { + prev_file = NULL; + finish_parse (&err_counts); + } + else + { + usage_warning ("bt_parse_entry: second attempt to read past eof"); + } + + if (status) *status = TRUE; + return NULL; + } + + /* + * Here we do some nasty poking about the innards of PCCTS in order to + * enter the parser multiple times on the same input stream. This code + * comes from expanding the macro invokation: + * + * ANTLR (entry (top), infile); + * + * When LL_K, ZZINF_LOOK, and DEMAND_LOOK are all undefined, this + * ultimately expands to + * + * zzbufsize = ZZLEXBUFSIZE; + * { + * static char zztoktext[ZZLEXBUFSIZE]; + * zzlextext = zztoktext; + * zzrdstream (f); + * zzgettok(); + * } + * entry (top); + * ++zzasp; + * + * (I'm expanding hte zzenterANTLR, zzleaveANTLR, and zzPrimateLookAhead + * macros, but leaving ZZLEXBUFSIZE -- a simple constant -- alone.) + * + * There are two problems with this: 1) zztoktext is a statically + * allocated buffer, and when it overflows we just ignore further + * characters that should belong to that lexeme; and 2) zzrdstream() and + * zzgettok() are called every time we enter the parser, which means the + * token left over from the previous entry will be discarded when we + * parse entries 2 .. N. + * + * I handle the static buffer problem with alloc_lex_buffer() and + * realloc_lex_buffer() (in lex_auxiliary.c), and by rewriting the ZZCOPY + * macro to call realloc_lex_buffer() when overflow is detected. + * + * I handle the extra token-read by hanging on to a static file + * pointer, prev_file, between calls to bt_parse_entry() -- when + * the program starts it is NULL, and we reset it to NULL on + * finishing a file. Thus, any call that is the first on a given + * file will allocate the lexical buffer and read the first token; + * thereafter, we skip those steps, and free the buffer on reaching + * end-of-file. Currently, this method precludes interleaving + * calls to bt_parse_entry() on different files -- perhaps I could + * fix this with the zz{save,restore}_{antlr,dlg}_state() + * functions? + */ + + zzast_sp = ZZAST_STACKSIZE; /* workaround apparent pccts bug */ + +#if defined(LL_K) || defined(ZZINF_LOOK) || defined(DEMAND_LOOK) +# error One of LL_K, ZZINF_LOOK, or DEMAND_LOOK was defined +#endif + if (prev_file == NULL) /* only read from input stream if */ + { /* starting afresh with a file */ + start_parse (infile, NULL, 0); + prev_file = infile; + } + assert (prev_file == infile); + + entry (&entry_ast); /* enter the parser */ + ++zzasp; /* why is this done? */ + + if (entry_ast == NULL) /* can happen with very bad input */ + { + if (status) *status = FALSE; + return entry_ast; + } + +#if DEBUG + dump_ast ("bt_parse_entry(): single entry, after parsing:\n", + entry_ast); +#endif + bt_postprocess_entry (entry_ast, + StringOptions[entry_ast->metatype] | options); +#if DEBUG + dump_ast ("bt_parse_entry(): single entry, after post-processing:\n", + entry_ast); +#endif + + if (status) *status = parse_status (err_counts); + return entry_ast; + +} /* bt_parse_entry() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_parse_file () +@INPUT : filename - name of file to open. If NULL or "-", we read + from stdin rather than opening a new file. + options +@OUTPUT : top +@RETURNS : 0 if any entries in the file had serious errors + 1 if all entries were OK +@DESCRIPTION: Parses an entire BibTeX file, and returns a linked list + of ASTs (or, if you like, a forest) for the entries in it. + (Any entries with serious errors are omitted from the list.) +@GLOBALS : +@CALLS : bt_parse_entry() +@CREATED : 1997/01/18, from process_file() in bibparse.c +@MODIFIED : +@COMMENTS : This function bears a *striking* resemblance to bibparse.c's + process_file(). Eventually, I plan to replace this with + a generalized process_file() that takes a function pointer + to call for each entry. Until I decide on the right interface + for that, though, I'm sticking with this simpler (but possibly + memory-intensive) approach. +-------------------------------------------------------------------------- */ +AST * bt_parse_file (char * filename, + ushort options, + boolean * status) +{ + FILE * infile; + AST * entries, + * cur_entry, + * last; + boolean entry_status, + overall_status; + + if (options & BTO_STRINGMASK) /* any string options set? */ + { + usage_error ("bt_parse_file: illegal options " + "(string options not allowed"); + } + + /* + * If a string was given, and it's *not* "-", then open that filename. + * Otherwise just use stdin. + */ + + if (filename != NULL && strcmp (filename, "-") != 0) + { + InputFilename = filename; + infile = fopen (filename, "r"); + if (infile == NULL) + { + perror (filename); + return 0; + } + } + else + { + InputFilename = "(stdin)"; + infile = stdin; + } + + entries = NULL; + last = NULL; + +#if 1 + /* explicit loop over entries, with junk cleaned out by read_entry () */ + + overall_status = TRUE; /* assume success */ + while ((cur_entry = bt_parse_entry + (infile, InputFilename, options, &entry_status))) + { + overall_status &= entry_status; + if (!entry_status) continue; /* bad entry -- try next one */ + if (!cur_entry) break; /* at eof -- we're done */ + if (last == NULL) /* this is the first entry */ + entries = cur_entry; + else /* have already seen one */ + last->right = cur_entry; + + last = cur_entry; + } + +#else + /* let the PCCTS lexer/parser handle everything */ + + initialize_lexer_state (); + ANTLR (bibfile (top), infile); + +#endif + + fclose (infile); + InputFilename = NULL; + if (status) *status = overall_status; + return entries; + +} /* bt_parse_file() */ diff --git a/src/translators/btparse/lex_auxiliary.c b/src/translators/btparse/lex_auxiliary.c new file mode 100644 index 0000000..8fac463 --- /dev/null +++ b/src/translators/btparse/lex_auxiliary.c @@ -0,0 +1,939 @@ +/* ------------------------------------------------------------------------ +@NAME : lex_auxiliary.c +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: The code and global variables here have three main purposes: + - maintain the lexical buffer (zztoktext, which + traditionally with PCCTS is a static array; I have + changed things so that it's dynamically allocated and + resized on overflow) + - keep track of lexical state that's not handled by PCCTS + code (like "where are we in terms of BibTeX entries?" or + "what are the delimiters for the current entry/string?") + - everything called from lexical actions is here, to keep + the grammar file itself neat and clean +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : Greg Ward, 1996/07/25-28 +@MODIFIED : Jan 1997 + Jun 1997 +@VERSION : $Id: lex_auxiliary.c,v 1.31 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#include <assert.h> +#include "lex_auxiliary.h" +#include "stdpccts.h" +#include "error.h" +#include "prototypes.h" +/*#include "my_dmalloc.h"*/ + +#define DUPE_TEXT 0 + +extern char * InputFilename; /* from input.c */ + +GEN_PRIVATE_ERRFUNC (lexical_warning, (const char * fmt, ...), + BTERR_LEXWARN, InputFilename, zzline, NULL, -1, fmt) +GEN_PRIVATE_ERRFUNC (lexical_error, (const char * fmt, ...), + BTERR_LEXERR, InputFilename, zzline, NULL, -1, fmt) + + + +/* ---------------------------------------------------------------------- + * Global variables + */ + +/* First, the lexical buffer. This is used elsewhere, so can't be static */ +char * zztoktext = NULL; + +/* + * Now, the lexical state -- first, stuff that arises from scanning + * at top-level and the beginnings of entries; + * EntryState: + * toplevel when we start scanning a file, or when we are in in_entry + * mode and see '}' or ')' + * after_at when we are in toplevel mode and see an '@' + * after_type when we are in after_at mode and see a name (!= 'comment') + * in_comment when we are in after_at mode and see a name (== 'comment') + * in_entry when we are in after_type mode and see '{' or '(' + * EntryOpener: + * the character ('(' or '{') which opened the entry currently being + * scanned (we use this to make sure that the entry opener and closer + * match; if not, we issue a warning) + * EntryMetatype: (NB. typedef for bt_metatype is in btparse.h) + * classifies entries according to the syntax we will use to parse them; + * also winds up (after being changed to a bt_nodetype value) in the + * node that roots the entry AST: + * comment - anything between () or {} + * preamble - a single compound value + * string - a list of "name = compound_value" assignments; no key + * alias - a single "name = compound_value" assignment (where + * the compound value in this case is presumably a + * name, rather than a string -- this is not syntactically + * checked though) + * modify, + * entry - a key followed by a list of "name = compound_value" + * assignments + * JunkCount: + * the number of non-whitespace, non-'@' characters seen at toplevel + * between two entries (used to print out a warning when we hit + * the beginning of entry, to help people catch "old style" implicit + * comments + */ +static enum { toplevel, after_at, after_type, in_comment, in_entry } + EntryState; +static char EntryOpener; /* '(' or '{' */ +static bt_metatype + EntryMetatype; +static int JunkCount; /* non-whitespace chars at toplevel */ + +/* + * String state -- these are maintained and used by the functions called + * from actions in the string lexer. + * BraceDepth: + * brace depth within a string; we can only end the current string + * when this is zero + * ParenDepth: + * parenthesis depth within a string; needed for @comment entries + * that are paren-delimited (because the comment in that case is + * a paren-delimited string) + * StringOpener: + * similar to EntryOpener, but stronger than merely warning of token + * mismatch -- this determines which character ('"' or '}') can + * actually end the string + * StringStart: + * line on which current string started; if we detect an apparent + * runaway, this is used to report where the runaway started + * ApparentRunaway: + * flags if we have already detected (and warned) that the current + * string appears to be a runaway, so that we don't warn again + * (and again and again and again) + * QuoteWarned: + * flags if we have already warned about seeing a '"' in a string, + * because they tend to come in pairs and one warning per string + * is enough + * + * (See bibtex.g for an explanation of my runaway string detection heuristic.) + */ +static char StringOpener = '\0'; /* '{' or '"' */ +static int BraceDepth; /* depth of brace-nesting */ +static int ParenDepth; /* depth of parenthesis-nesting */ +static int StringStart = -1; /* start line of current string */ +static int ApparentRunaway; /* current string looks like runaway */ +static int QuoteWarned; /* already warned about " in string? */ + + + +/* ---------------------------------------------------------------------- + * Miscellaneous functions: + * lex_info() (handy for debugging) + * zzcr_attr() (called from PCCTS-generated code) + */ + +void lex_info (void) +{ + printf ("LA(1) = \"%s\" token %d, %s\n", LATEXT(1), LA(1), zztokens[LA(1)]); +#ifdef LL_K + printf ("LA(2) = \"%s\" token %d, %s\n", LATEXT(2), LA(2), zztokens[LA(2)]); +#endif +} + + +void zzcr_attr (Attrib *a, int tok, char *txt) +{ + if (tok == STRING) + { + int len = strlen (txt); + + assert ((txt[0] == '{' && txt[len-1] == '}') + || (txt[0] == '"' && txt[len-1] == '"')); + txt[len-1] = (char) 0; /* remove closing quote from string */ + txt++; /* so we'll skip the opening quote */ + } + +#if DUPE_TEXT + a->text = strdup (txt); +#else + a->text = txt; +#endif + a->token = tok; + a->line = zzline; + a->offset = zzbegcol; +#if DEBUG > 1 + dprintf ("zzcr_attr: input txt = %p (%s)\n", txt, txt); + dprintf (" dupe txt = %p (%s)\n", a->text, a->text); +#endif +} + + +#if DUPE_TEXT +void zzd_attr (Attrib *attr) +{ + free (attr->text); +} +#endif + + +/* ---------------------------------------------------------------------- + * Lexical buffer functions: + * alloc_lex_buffer() + * realloc_lex_buffer() + * free_lex_buffer() + * lexer_overflow() + * zzcopy() (only if ZZCOPY_FUNCTION is defined and true) + */ + + +/* + * alloc_lex_buffer() + * + * allocates the lexical buffer with `size' characters. Clears the buffer, + * points zzlextext at it, and sets zzbufsize to `size'. + * + * Does nothing if the buffer is already allocated. + * + * globals: zztoktext, zzlextext, zzbufsize + * callers: bt_parse_entry() (in input.c) + */ +void alloc_lex_buffer (int size) +{ + if (zztoktext == NULL) + { + zztoktext = (char *) malloc (size * sizeof (char)); + memset (zztoktext, 0, size); + zzlextext = zztoktext; + zzbufsize = size; + } +} /* alloc_lex_buffer() */ + + +/* + * realloc_lex_buffer() + * + * Reallocates the lexical buffer -- size is increased by `size_increment' + * characters (which could be negative). Updates all globals that point + * to or into the buffer (zzlextext, zzbegexpr, zzendexpr), as well as + * zztoktext (the buffer itself) zzbufsize (the buffer size). + * + * This is only meant to be called (ultimately) from zzgettok(), part of + * the DLG code. (In fact, zzgettok() invokes the ZZCOPY() macro, which + * calls lexer_overflow() on buffer overflow, which calls + * realloc_lex_buffer(). Whatever.) The `lastpos' and `nextpos' arguments + * correspond, respectively, to a local variable in zzgettok() and a static + * global in dlgauto.h (hence really in scan.c). They both point into + * the lexical buffer, so have to be passed by reference here so that + * we can update them to point into the newly-reallocated buffer. + * + * globals: zztottext, zzbufsize, zzlextext, zzbegexpr, zzendexpr + * callers: lexer_overflow() + */ +static void +realloc_lex_buffer (int size_increment, + unsigned char ** lastpos, + unsigned char ** nextpos) +{ + int beg, end, next; + + if (zztoktext == NULL) + internal_error ("attempt to reallocate unallocated lexical buffer"); + + zztoktext = (char *) realloc (zztoktext, zzbufsize+size_increment); + memset (zztoktext+zzbufsize, 0, size_increment); + zzbufsize += size_increment; + + beg = zzbegexpr - zzlextext; + end = zzendexpr - zzlextext; + next = *nextpos - zzlextext; + zzlextext = zztoktext; + + if (lastpos != NULL) + *lastpos = zzlextext+zzbufsize-1; + zzbegexpr = zzlextext + beg; + zzendexpr = zzlextext + end; + *nextpos = zzlextext + next; + +} /* realloc_lex_buffer() */ + + +/* + * free_lex_buffer() + * + * Frees the lexical buffer allocated by alloc_lex_buffer(). + */ +void free_lex_buffer (void) +{ + if (zztoktext == NULL) + internal_error ("attempt to free unallocated (or already freed) " + "lexical buffer"); + + free (zztoktext); + zztoktext = NULL; +} /* free_lex_buffer() */ + + +/* + * lexer_overflow() + * + * Prints a warning and calls realloc_lex_buffer() to increase the size + * of the lexical buffer by ZZLEXBUFSIZE (a constant -- hence the buffer + * size increases linearly, not exponentially). + * + * Also prints a couple of lines of useful debugging stuff if DEBUG is true. + */ +void lexer_overflow (unsigned char **lastpos, unsigned char **nextpos) +{ +#if DEBUG + char head[16], tail[16]; + + printf ("zzcopy: overflow detected\n"); + printf (" zzbegcol=%d, zzendcol=%d, zzline=%d\n", + zzbegcol, zzendcol, zzline); + strncpy (head, zzlextext, 15); head[15] = 0; + strncpy (tail, zzlextext+ZZLEXBUFSIZE-15, 15); tail[15] = 0; + printf (" zzlextext=>%s...%s< (last char=%d (%c))\n", + head, tail, + zzlextext[ZZLEXBUFSIZE-1], zzlextext[ZZLEXBUFSIZE-1]); + printf (" zzchar = %d (%c), zzbegexpr=zzlextext+%d\n", + zzchar, zzchar, zzbegexpr-zzlextext); +#endif + + notify ("lexical buffer overflowed (reallocating to %d bytes)", + zzbufsize+ZZLEXBUFSIZE); + realloc_lex_buffer (ZZLEXBUFSIZE, lastpos, nextpos); + +} /* lexer_overflow () */ + + +#if ZZCOPY_FUNCTION +/* + * zzcopy() + * + * Does the same as the ZZCOPY macro (in lex_auxiliary.h), but as a + * function for easier debugging. + */ +void zzcopy (char **nextpos, char **lastpos, int *ovf_flag) +{ + if (*nextpos >= *lastpos) + { + lexer_overflow (lastpos, nextpos); + } + + **nextpos = zzchar; + (*nextpos)++; +} +#endif + + + +/* ---------------------------------------------------------------------- + * Report/maintain lexical state + * report_state() (only meaningful if DEBUG) + * initialize_lexer_state() + * + * Note that the lexical action functions, below, also fiddle with + * the lexical state variables an awful lot. + */ + +#if DEBUG +char *state_names[] = + { "toplevel", "after_at", "after_type", "in_comment", "in_entry" }; +char *metatype_names[] = + { "unknown", "comment", "preamble", "string", "alias", "modify", "entry" }; + +static void +report_state (char *where) +{ + printf ("%s: lextext=%s (line %d, offset %d), token=%d, " + "EntryState=%s\n", + where, zzlextext, zzline, zzbegcol, NLA, + state_names[EntryState]); +} +#else +# define report_state(where) +/* +static void +report_state (char *where) { } +*/ +#endif + +void initialize_lexer_state (void) +{ + zzmode (START); + EntryState = toplevel; + EntryOpener = (char) 0; + EntryMetatype = BTE_UNKNOWN; + JunkCount = 0; +} + + +bt_metatype entry_metatype (void) +{ + return EntryMetatype; +} + + + +/* ---------------------------------------------------------------------- + * Lexical actions (START and LEX_ENTRY modes) + */ + +/* + * newline () + * + * Does everything needed to handle newline outside of a quoted string: + * increments line counter and skips the newline. + */ +void newline (void) +{ + zzline++; + zzskip(); +} + + +void comment (void) +{ + zzline++; + zzskip(); +} + + +void at_sign (void) +{ + if (EntryState == toplevel) + { + EntryState = after_at; + zzmode (LEX_ENTRY); + if (JunkCount > 0) + { + lexical_warning ("%d characters of junk seen at toplevel", JunkCount); + JunkCount = 0; + } + } + else + { + /* internal_error ("lexer recognized \"@\" at other than top-level"); */ + lexical_warning ("\"@\" in strange place -- should get syntax error"); + } + report_state ("at_sign"); +} + + +void toplevel_junk (void) +{ + JunkCount += strlen (zzlextext); + zzskip (); +} + + +void name (void) +{ + report_state ("name (pre)"); + + switch (EntryState) + { + case toplevel: + { + internal_error ("junk at toplevel (\"%s\")", zzlextext); + break; + } + case after_at: + { + char * etype = zzlextext; + EntryState = after_type; + + if (strcasecmp (etype, "comment") == 0) + { + EntryMetatype = BTE_COMMENT; + EntryState = in_comment; + } + + else if (strcasecmp (etype, "preamble") == 0) + EntryMetatype = BTE_PREAMBLE; + + else if (strcasecmp (etype, "string") == 0) + EntryMetatype = BTE_MACRODEF; +/* + else if (strcasecmp (etype, "alias") == 0) + EntryMetatype = BTE_ALIAS; + + else if (strcasecmp (etype, "modify") == 0) + EntryMetatype = BTE_MODIFY; +*/ + else + EntryMetatype = BTE_REGULAR; + + break; + } + case after_type: + case in_comment: + case in_entry: + break; /* do nothing */ + } + + report_state ("name (post)"); + +} + + +void lbrace (void) +{ + /* + * Currently takes a restrictive view of "when an lbrace is an entry + * opener" -- ie. *only* after '@name' (as determined by EntryState), + * where name is not 'comment'. This means that lbrace usually + * determines a string (in particular, when it's seen at toplevel -- + * which will happen under certain error situations), which in turn + * means that some unexpected things can become strings (like whole + * entries). + */ + + if (EntryState == in_entry || EntryState == in_comment) + { + start_string ('{'); + } + else if (EntryState == after_type) + { + EntryState = in_entry; + EntryOpener = '{'; + NLA = ENTRY_OPEN; + } + else + { + lexical_warning ("\"{\" in strange place -- should get a syntax error"); + } + + report_state ("lbrace"); +} + + +void rbrace (void) +{ + if (EntryState == in_entry) + { + if (EntryOpener == '(') + lexical_warning ("entry started with \"(\", but ends with \"}\""); + NLA = ENTRY_CLOSE; + initialize_lexer_state (); + } + else + { + lexical_warning ("\"}\" in strange place -- should get a syntax error"); + } + report_state ("rbrace"); +} + + +void lparen (void) +{ + if (EntryState == in_comment) + { + start_string ('('); + } + else if (EntryState == after_type) + { + EntryState = in_entry; + EntryOpener = '('; + } + else + { + lexical_warning ("\"(\" in strange place -- should get a syntax error"); + } + report_state ("lparen"); +} + + +void rparen (void) +{ + if (EntryState == in_entry) + { + if (EntryOpener == '{') + lexical_warning ("entry started with \"{\", but ends with \")\""); + initialize_lexer_state (); + } + else + { + lexical_warning ("\")\" in strange place -- should get a syntax error"); + } + report_state ("rparen"); +} + + +/* ---------------------------------------------------------------------- + * Stuff for processing strings. + */ + + +/* + * start_string () + * + * Called when we see a '{' or '"' in the field data. Records which quote + * character was used, and calls open_brace() to increment the depth + * counter if it was a '{'. Switches to LEX_STRING mode, and tells the + * lexer to continue slurping characters into the same buffer. + */ +void start_string (char start_char) +{ + StringOpener = start_char; + BraceDepth = 0; + ParenDepth = 0; + StringStart = zzline; + ApparentRunaway = 0; + QuoteWarned = 0; + if (start_char == '{') + open_brace (); + if (start_char == '(') + ParenDepth++; + if (start_char == '"' && EntryState == in_comment) + { + lexical_error ("comment entries must be delimited by either braces or parentheses"); + EntryState = toplevel; + zzmode (START); + return; + } + +#ifdef USER_ZZMODE_STACK + if (zzauto != LEX_ENTRY || EntryState != in_entry) +#else + if (EntryState != in_entry && EntryState != in_comment) +#endif + { + lexical_warning ("start of string seen at weird place"); + } + + zzmore (); + zzmode (LEX_STRING); +} + + +/* + * end_string () + * + * Called when we see either a '"' (at depth 0) or '}' (if it brings us + * down to depth 0) in a quoted string. Just makes sure that braces are + * balanced, and then goes back to the LEX_FIELD mode. + */ +void end_string (char end_char) +{ + char match; + +#ifndef ALLOW_WARNINGS + match = (char) 0; /* silence "might be used" */ + /* uninitialized" warning */ +#endif + + switch (end_char) + { + case '}': match = '{'; break; + case ')': match = '('; break; + case '"': match = '"'; break; + default: + internal_error ("end_string(): invalid end_char \"%c\"", end_char); + } + + assert (StringOpener == match); + + /* + * If we're at non-zero BraceDepth, that probably means mismatched braces + * somewhere -- complain about it and reset BraceDepth to minimize future + * confusion. + */ + + if (BraceDepth > 0) + { + lexical_error ("unbalanced braces: too many {'s"); + BraceDepth = 0; + } + + StringOpener = (char) 0; + StringStart = -1; + NLA = STRING; + + if (EntryState == in_comment) + { + int len = strlen (zzlextext); + + /* + * ARG! no, this is wrong -- what if unbalanced braces in the string + * and we try to output put it later? + * + * ARG! again, this is no more wrong than when we strip quotes in + * post_parse.c, and blithely assume that we can put them back on + * later for output in BibTeX syntax. Hmmm. + * + * Actually, it looks like this isn't a problem after all: you + * can't have unbalanced braces in a BibTeX string (at least + * not as parsed by btparse). + */ + + if (zzlextext[0] == '(') /* convert to standard quote delims */ + { + zzlextext[ 0] = '{'; + zzlextext[len-1] = '}'; + } + + EntryState = toplevel; + zzmode (START); + } + else + { + zzmode (LEX_ENTRY); + } + + report_state ("string"); +} + + +/* + * open_brace () + * + * Called when we see a '{', either to start a string (in which case + * it's called from start_string()) or inside a string (called directly + * from the lexer). + */ +void open_brace (void) +{ + BraceDepth++; + zzmore (); + report_state ("open_brace"); +} + + +/* + * close_brace () + * + * Called when we see a '}' inside a string. Decrements the depth counter + * and checks to see if we are down to depth 0, in which case the string is + * ended and the current lookahead token is set to STRING. Otherwise, + * just tells the lexer to keep slurping characters into the buffer. + */ +void close_brace (void) +{ + BraceDepth--; + if (StringOpener == '{' && BraceDepth == 0) + { + end_string ('}'); + } + + /* + * This could happen if some bonehead puts an unmatched right-brace + * in a quote-delimited string (eg. "Hello}"). To attempt to recover, + * we reset the depth to zero and continue slurping into the string. + */ + else if (BraceDepth < 0) + { + lexical_error ("unbalanced braces: too many }'s"); + BraceDepth = 0; + zzmore (); + } + + /* Otherwise, it's just any old right brace in a string -- keep eating */ + else + { + zzmore (); + } + report_state ("close_brace"); +} + + +void lparen_in_string (void) +{ + ParenDepth++; + zzmore (); +} + + +void rparen_in_string (void) +{ + ParenDepth--; + if (StringOpener == '(' && ParenDepth == 0) + { + end_string (')'); + } + else + { + zzmore (); + } +} + + +/* + * quote_in_string () + * + * Called when we see '"' in a string. Ends the string if the quote is at + * depth 0 and the string was started with a quote, otherwise instructs the + * lexer to continue munching happily along. (Also prints a warning, + * assuming that input is destined for processing by TeX and you really + * want either `` or '' rather than ".) + */ +void quote_in_string (void) +{ + if (StringOpener == '"' && BraceDepth == 0) + { + end_string ('"'); + } + else + { + boolean at_top = FALSE;; + + /* + * Note -- this warning assumes that strings are destined + * to be processed by TeX, so it should be optional. Hmmm. + */ + + if (StringOpener == '"' || StringOpener == '(') + at_top = (BraceDepth == 0); + else if (StringOpener == '{') + at_top = (BraceDepth == 1); + else + internal_error ("Illegal string opener \"%c\"", StringOpener); + + if (!QuoteWarned && at_top) + { + lexical_warning ("found \" at brace-depth zero in string " + "(TeX accents in BibTeX should be inside braces)"); + QuoteWarned = 1; + } + zzmore (); + } +} + + +/* + * check_runaway_string () + * + * Called from the lexer whenever we see a newline in a string. See + * bibtex.g for a detailed explanation; basically, this function + * looks for an entry start ("@name{") or new field ("name=") immediately + * after a newline (with possible whitespace). This is a heuristic + * check for runaway strings, under the assumption that text that looks + * like a new entry or new field won't actually occur inside a string + * very often. + */ +void check_runaway_string (void) +{ + int len; + int i; + + /* + * could these be made significantly more efficient by a 256-element + * lookup table instead of calling strchr()? + */ + static const char *alpha_chars = "abcdefghijklmnopqrstuvwxyz"; + static const char *name_chars = "abcdefghijklmnopqrstuvwxyz0123456789:+/'.-"; + + /* + * on entry: zzlextext contains the whole string, starting with { + * and with newlines/tabs converted to space; zzbegexpr points to + * a chunk of the string starting with newline (newlines and + * tabs have not yet been converted) + */ + +#if DEBUG > 1 + printf ("check_runaway_string(): zzline=%d\n", zzline); + printf ("zzlextext=>%s<\nzzbegexpr=>%s<\n", + zzlextext, zzbegexpr); +#endif + + + /* + * increment zzline to take the leading newline into account -- but + * first a sanity check to be sure that newline is there! + */ + + if (zzbegexpr[0] != '\n') + { + lexical_warning ("huh? something's wrong (buffer overflow?) near " + "offset %d (line %d)", zzendcol, zzline); + /* internal_error ("zzbegexpr (line %d, offset %d-%d, " + "text >%s<, expr >%s<)" + "should start with a newline", + zzline, zzbegcol, zzendcol, zzlextext, zzbegexpr); + */ + } + else + { + zzline++; + } + + /* standardize whitespace (convert all to space) */ + + len = strlen (zzbegexpr); + for (i = 0; i < len; i++) + { + if (isspace (zzbegexpr[i])) + zzbegexpr[i] = ' '; + } + + + if (!ApparentRunaway) /* haven't already warned about it */ + { + enum { none, entry, field, giveup } guess; + + i = 1; + guess = none; + while (i < len && zzbegexpr[i] == ' ') i++; + + if (zzbegexpr[i] == '@') + { + i++; + while (i < len && zzbegexpr[i] == ' ') i++; + guess = entry; + } + + if (strchr (alpha_chars, tolower (zzbegexpr[i])) != NULL) + { + while (i < len && strchr (name_chars, tolower (zzbegexpr[i])) != NULL) + i++; + while (i < len && zzbegexpr[i] == ' ') i++; + if (i == len) + { + guess = giveup; + } + else + { + if (guess == entry) + { + if (zzbegexpr[i] != '{' && zzbegexpr[i] != '(') + guess = giveup; + } + else /* assume it's a field */ + { + if (zzbegexpr[i] == '=') + guess = field; + else + guess = giveup; + } + } + } + else /* no name seen after WS or @ */ + { + guess = giveup; + } + + if (guess == none) + internal_error ("gee, I should have made a guess by now"); + + if (guess != giveup) + { + lexical_warning ("possible runaway string started at line %d", + StringStart); + ApparentRunaway = 1; + } + } + + zzmore(); +} + diff --git a/src/translators/btparse/lex_auxiliary.h b/src/translators/btparse/lex_auxiliary.h new file mode 100644 index 0000000..ebbf053 --- /dev/null +++ b/src/translators/btparse/lex_auxiliary.h @@ -0,0 +1,71 @@ +/* ------------------------------------------------------------------------ +@NAME : lex_auxiliary.h +@DESCRIPTION: Macros and function prototypes needed by the lexical scanner. + Some of these are called from internal PCCTS code, and some + are explicitly called from the lexer actions in bibtex.g. +@CREATED : Summer 1996, Greg Ward +@MODIFIED : +@VERSION : $Id: lex_auxiliary.h,v 1.15 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ +#ifndef LEX_AUXILIARY_H +#define LEX_AUXILIARY_H + +#include "btparse.h" +#include "attrib.h" + +#define ZZCOPY_FUNCTION 0 + +#if ZZCOPY_FUNCTION +#define ZZCOPY zzcopy (&zznextpos, &lastpos, &zzbufovf) +#else +#define ZZCOPY \ + if (zznextpos >= lastpos) \ + { \ + lexer_overflow (&lastpos, &zznextpos); \ + } \ + *(zznextpos++) = zzchar; +#endif + + +/* Function prototypes: */ + +void lex_info (void); +void zzcr_attr (Attrib *a, int tok, char *txt); + +void alloc_lex_buffer (int size); +void free_lex_buffer (void); +void lexer_overflow (unsigned char **lastpos, unsigned char **nextpos); +#if ZZCOPY_FUNCTION +void zzcopy (char **nextpos, char **lastpos, int *ovf_flag); +#endif + +void initialize_lexer_state (void); +bt_metatype entry_metatype (void); + +void newline (void); +void comment (void); +void at_sign (void); +void toplevel_junk (void); +void name (void); +void lbrace (void); +void rbrace (void); +void lparen (void); +void rparen (void); + +void start_string (char start_char); +void end_string (char end_char); +void open_brace (void); +void close_brace (void); +void lparen_in_string (void); +void rparen_in_string (void); +void quote_in_string (void); +void check_runaway_string (void); + +#endif /* ! defined LEX_AUXILIARY_H */ diff --git a/src/translators/btparse/macros.c b/src/translators/btparse/macros.c new file mode 100644 index 0000000..06db983 --- /dev/null +++ b/src/translators/btparse/macros.c @@ -0,0 +1,367 @@ +/* ------------------------------------------------------------------------ +@NAME : macros.c +@DESCRIPTION: Front-end to the standard PCCTS symbol table code (sym.c) + to abstract my "macro table". +@GLOBALS : +@CALLS : +@CREATED : 1997/01/12, Greg Ward +@MODIFIED : +@VERSION : $Id: macros.c,v 1.19 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <string.h> +#include "sym.h" +#include "prototypes.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ +#include "bt_debug.h" + + +/* + * NUM_MACROS and STRING_SIZE define the size of the static data + * structure that holds the macro table. The defaults are to allocate + * 4096 bytes of string space that will be divided up amongst 547 + * macros. This should be fine for most applications, but if you have a + * big macro table you might need to change these and recompile (don't + * forget to rebuild and reinstall Text::BibTeX if you're using it!). + * You can set these as high as you like; just remember that a block of + * STRING_SIZE bytes will be allocated and not freed as long as you're + * using btparse. Also, NUM_MACROS defines the size of a hashtable, so + * it should probably be a prime a bit greater than a power of 2 -- or + * something like that. I'm not sure of the exact Knuthian + * specification. + */ +#define NUM_MACROS 547 +#define STRING_SIZE 4096 + +Sym *AllMacros = NULL; /* `scope' so we can get back list */ + /* of all macros when done */ + + +GEN_PRIVATE_ERRFUNC (macro_warning, + (char * filename, int line, const char * fmt, ...), + BTERR_CONTENT, filename, line, NULL, -1, fmt) + + +/* ------------------------------------------------------------------------ +@NAME : init_macros() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Initializes the symbol table used to store macro values. +@GLOBALS : AllMacros +@CALLS : zzs_init(), zzs_scope() (sym.c) +@CALLERS : bt_initialize() (init.c) +@CREATED : Jan 1997, GPW +-------------------------------------------------------------------------- */ +void +init_macros (void) +{ + zzs_init (NUM_MACROS, STRING_SIZE); + zzs_scope (&AllMacros); +} + + +/* ------------------------------------------------------------------------ +@NAME : done_macros() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Frees up all the macro values in the symbol table, and + then frees up the symbol table itself. +@GLOBALS : AllMacros +@CALLS : zzs_rmscope(), zzs_done() +@CALLERS : bt_cleanup() (init.c) +@CREATED : Jan 1997, GPW +-------------------------------------------------------------------------- */ +void +done_macros (void) +{ + bt_delete_all_macros (); + zzs_done (); +} + + +static void +delete_macro_entry (Sym * sym) +{ + Sym * cur; + Sym * prev; + + /* + * Yechh! All this mucking about with the scope list really + * ought to be handled by the symbol table code. Must write + * my own someday. + */ + + /* Find this entry in the list of all macro table entries */ + cur = AllMacros; + prev = NULL; + while (cur != NULL && cur != sym) + { + prev = cur; + cur = cur->scope; + } + + if (cur == NULL) /* uh-oh -- wasn't found! */ + { + internal_error ("macro table entry for \"%s\" not found in scope list", + sym->symbol); + } + + /* Now unlink from the "scope" list */ + if (prev == NULL) /* it's the head of the list */ + AllMacros = cur->scope; + else + prev->scope = cur->scope; + + /* Remove it from the macro hash table */ + zzs_del (sym); + + /* And finally, free up the entry's text and the entry itself */ + if (sym->text) free (sym->text); + free (sym); +} /* delete_macro_entry() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_add_macro_value() +@INPUT : assignment - AST node representing "macro = value" + options - string-processing options that were used to + process this string after parsing +@OUTPUT : +@RETURNS : +@DESCRIPTION: Adds a value to the symbol table used for macros. + + If the value was not already post-processed as a macro value + (expand macros, paste substrings, but don't collapse + whitespace), then this post-processing is done before adding + the macro text to the table. + + If the macro is already defined, a warning is printed and + the old text is overridden. +@GLOBALS : +@CALLS : bt_add_macro_text() + bt_postprocess_field() +@CALLERS : bt_postprocess_entry() (post_parse.c) +@CREATED : Jan 1997, GPW +-------------------------------------------------------------------------- */ +void +bt_add_macro_value (AST *assignment, ushort options) +{ + AST * value; + char * macro; + char * text; + boolean free_text; + + if (assignment == NULL || assignment->down == NULL) return; + value = assignment->down; + + /* + * If the options that were used to process the macro's expansion text + * are anything other than BTO_MACRO, then we'll have to do it ourselves. + */ + + if ((options & BTO_STRINGMASK) != BTO_MACRO) + { + text = bt_postprocess_field (assignment, BTO_MACRO, FALSE); + free_text = TRUE; /* because it's alloc'd by */ + /* bt_postprocess_field() */ + } + else + { + /* + * First a sanity check to make sure that the presumed post-processing + * had the desired effect. + */ + + if (value->nodetype != BTAST_STRING || value->right != NULL) + { + internal_error ("add_macro: macro value was not " + "correctly preprocessed"); + } + + text = assignment->down->text; + free_text = FALSE; + } + + macro = assignment->text; + bt_add_macro_text (macro, text, assignment->filename, assignment->line); + if (free_text && text != NULL) + free (text); + +} /* bt_add_macro_value() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_add_macro_text() +@INPUT : macro - the name of the macro to define + text - the macro text + filename, line - where the macro is defined; pass NULL + for filename if no file, 0 for line if no line number + (just used to generate warning message) +@OUTPUT : +@RETURNS : +@DESCRIPTION: Sets the text value for a macro. If the macro is already + defined, a warning is printed and the old value is overridden. +@GLOBALS : +@CALLS : zzs_get(), zzs_newadd() +@CALLERS : bt_add_macro_value() + (exported from library) +@CREATED : 1997/11/13, GPW (from code in bt_add_macro_value()) +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_add_macro_text (char * macro, char * text, char * filename, int line) +{ + Sym * sym; + Sym * new_rec; + +#if DEBUG == 1 + printf ("adding macro \"%s\" = \"%s\"\n", macro, text); +#elif DEBUG >= 2 + printf ("add_macro: macro = %p (%s)\n" + " text = %p (%s)\n", + macro, macro, text, text); +#endif + + if ((sym = zzs_get (macro))) + { + macro_warning (filename, line, + "overriding existing definition of macro \"%s\"", + macro); + delete_macro_entry (sym); + } + + new_rec = zzs_newadd (macro); + new_rec->text = (text != NULL) ? strdup (text) : NULL; + DBG_ACTION + (2, printf (" saved = %p (%s)\n", + new_rec->text, new_rec->text);) + +} /* bt_add_macro_text() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_delete_macro() +@INPUT : macro - name of macro to delete +@DESCRIPTION: Deletes a macro from the macro table. +@CALLS : zzs_get() +@CALLERS : +@CREATED : 1998/03/01, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_delete_macro (char * macro) +{ + Sym * sym; + + sym = zzs_get (macro); + if (! sym) return; + delete_macro_entry (sym); +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_delete_all_macros() +@DESCRIPTION: Deletes all macros from the macro table. +@CALLS : zzs_rmscore() +@CALLERS : +@CREATED : 1998/03/01, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_delete_all_macros (void) +{ + Sym *cur, *next; + + DBG_ACTION (2, printf ("bt_delete_all_macros():\n");) + + /* + * Use the current `scope' (same one for all macros) to get access to + * a linked list of all macros. Then traverse the list, free()'ing + * both the text (which was strdup()'d in add_macro(), below) and + * the records themselves (which are calloc()'d by zzs_new()). + */ + + cur = zzs_rmscope (&AllMacros); + while (cur != NULL) + { + DBG_ACTION + (2, printf (" freeing macro \"%s\" (%p=\"%s\") at %p\n", + cur->symbol, cur->text, cur->text, cur);) + + next = cur->scope; + if (cur->text != NULL) free (cur->text); + free (cur); + cur = next; + } +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_macro_length() +@INPUT : macro - the macro name +@OUTPUT : +@RETURNS : length of the macro's text, or zero if the macro is undefined +@DESCRIPTION: Returns length of a macro's text. +@GLOBALS : +@CALLS : zzs_get() +@CALLERS : bt_postprocess_value() + (exported from library) +@CREATED : Jan 1997, GPW +-------------------------------------------------------------------------- */ +int +bt_macro_length (char *macro) +{ + Sym *sym; + + DBG_ACTION + (2, printf ("bt_macro_length: looking up \"%s\"\n", macro);) + + sym = zzs_get (macro); + if (sym) + return strlen (sym->text); + else + return 0; +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_macro_text() +@INPUT : macro - the macro name + filename, line - where the macro was invoked; NULL for + `filename' and zero for `line' if not applicable +@OUTPUT : +@RETURNS : The text of the macro, or NULL if it's undefined. +@DESCRIPTION: Fetches a macros text; prints warning and returns NULL if + macro is undefined. +@CALLS : zzs_get() +@CALLERS : bt_postprocess_value() +@CREATED : Jan 1997, GPW +-------------------------------------------------------------------------- */ +char * +bt_macro_text (char * macro, char * filename, int line) +{ + Sym * sym; + + DBG_ACTION + (2, printf ("bt_macro_text: looking up \"%s\"\n", macro);) + + sym = zzs_get (macro); + if (!sym) + { + macro_warning (filename, line, "undefined macro \"%s\"", macro); + return NULL; + } + + return sym->text; +} diff --git a/src/translators/btparse/mode.h b/src/translators/btparse/mode.h new file mode 100644 index 0000000..25b36ce --- /dev/null +++ b/src/translators/btparse/mode.h @@ -0,0 +1,3 @@ +#define START 0 +#define LEX_ENTRY 1 +#define LEX_STRING 2 diff --git a/src/translators/btparse/modify.c b/src/translators/btparse/modify.c new file mode 100644 index 0000000..2d8d9c1 --- /dev/null +++ b/src/translators/btparse/modify.c @@ -0,0 +1,75 @@ +/* ------------------------------------------------------------------------ +@NAME : modify.c +@DESCRIPTION: Routines for modifying the AST for a single entry. +@GLOBALS : +@CALLS : +@CREATED : 1999/11/25, Greg Ward (based on code supplied by + Stphane Genaud <genaud@icps.u-strasbg.fr>) +@MODIFIED : +@VERSION : $Id: modify.c,v 1.2 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <string.h> +#include "btparse.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ + + +/* ------------------------------------------------------------------------ +@NAME : bt_set_text () +@INPUT : node + new_text +@OUTPUT : node->text +@RETURNS : +@DESCRIPTION: Replace the text member of an AST node with a new string. + The passed in string, 'new_text', is duplicated, so the + caller may free it without worry. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1999/11/25, GPW (from Stphane Genaud) +@MODIFIED : +-------------------------------------------------------------------------- */ +void bt_set_text (AST * node, char * new_text) +{ + free(node->text); + node->text = strdup (new_text); +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_entry_set_key () +@INPUT : entry + new_key +@OUTPUT : entry->down->text +@RETURNS : +@DESCRIPTION: Changes the key of a regular entry to 'new_key'. If 'entry' + is not a regular entry, or if it doesn't already have a child + node holding an entry key, bombs via 'usage_error()'. + Otherwise a duplicate of 'new_key' is copied into the entry + AST (so the caller can free that string without worry). +@CALLS : bt_set_text () +@CREATED : 1999/11/25, GPW (from Stphane Genaud) +@MODIFIED : +-------------------------------------------------------------------------- */ +void bt_entry_set_key (AST * entry, char * new_key) +{ + if (entry->metatype == BTE_REGULAR && + entry->down && entry->down->nodetype == BTAST_KEY) + { + bt_set_text (entry->down, new_key); + } + else + { + usage_error ("can't set entry key -- not a regular entry, " + "or doesn't have a key already"); + } +} diff --git a/src/translators/btparse/my_alloca.h b/src/translators/btparse/my_alloca.h new file mode 100644 index 0000000..0466157 --- /dev/null +++ b/src/translators/btparse/my_alloca.h @@ -0,0 +1,35 @@ +/* ------------------------------------------------------------------------ +@NAME : my_alloca.h +@DESCRIPTION: All-out assault at making alloca() available on any Unix + platform. Stolen from the GNU Autoconf manual. +@CREATED : 1997/10/30, Greg Ward +@VERSION : $Id: my_alloca.h,v 1.1 1997/10/31 03:56:17 greg Rel $ +@COPYRIGHT : This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +#ifndef MY_ALLOCA_H +#define MY_ALLOCA_H + +#ifdef __GNUC__ +# ifndef alloca +# define alloca __builtin_alloca +# endif +#else +# if HAVE_ALLOCA_H +# include <alloca.h> +# else +# ifdef _AIX +# pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +char *alloca (); +# endif +# endif +# endif +#endif + +#endif /* MY_ALLOCA_H */ diff --git a/src/translators/btparse/names.c b/src/translators/btparse/names.c new file mode 100644 index 0000000..11c4bfd --- /dev/null +++ b/src/translators/btparse/names.c @@ -0,0 +1,915 @@ +/* ------------------------------------------------------------------------ +@NAME : names.c +@DESCRIPTION: Functions for dealing with BibTeX names and lists of names: + bt_split_list + bt_split_name +@GLOBALS : +@CALLS : +@CREATED : 1997/05/05, Greg Ward (as string_util.c) +@MODIFIED : 1997/05/14-05/16, GW: added all the code to split individual + names, renamed file to names.c +@VERSION : $Id: names.c,v 1.23 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include "btparse.h" +#include "prototypes.h" +#include "error.h" +#include "my_alloca.h" +/*#include "my_dmalloc.h"*/ +#include "bt_debug.h" + + +#define MAX_COMMAS 2 + +#define update_depth(s,offs,depth) \ +switch (s[offs]) \ +{ \ + case '{': depth++; break; \ + case '}': depth--; break; \ +} + +/* + * `name_loc' specifies where a name is found -- used for generating + * useful warning messages. `line' and `name_num' are both 1-based. + */ +typedef struct +{ + char * filename; + int line; + int name_num; +} name_loc; + + +GEN_PRIVATE_ERRFUNC (name_warning, + (name_loc * loc, const char * fmt, ...), + BTERR_CONTENT, loc->filename, loc->line, + "name", loc->name_num, fmt) + + +/* ------------------------------------------------------------------------ +@NAME : bt_split_list() +@INPUT : string - string to split up; whitespace must be collapsed + eg. by bt_postprocess_string() + delim - delimiter to use; must be lowercase and should be + free of whitespace (code requires that delimiters + in string be surrounded by whitespace) + filename - source of string (for warning messages) + line - 1-based line number into file (for warning messages) + description - what substrings are (eg. "name") (for warning + messages); if NULL will use "substring" +@OUTPUT : substrings (*substrings is allocated by bt_split_list() for you) +@RETURNS : number of substrings found +@DESCRIPTION: Splits a string using a fixed delimiter, in the BibTeX way: + * delimiters at beginning or end of string are ignored + * delimiters in string must be surrounded by whitespace + * case insensitive + * delimiters at non-zero brace depth are ignored + + The list of substrings is returned as *substrings, which + is an array of pointers into a duplicate of string. This + duplicate copy has been scribbled on such that there is + a nul byte at the end of every substring. You should + call bt_free_list() to free both the duplicate copy + of string and *substrings itself. Do *not* walk over + the array free()'ing the substrings yourself, as this is + invalid -- they were not malloc()'d! +@GLOBALS : +@CALLS : +@CALLERS : anyone (exported by library) +@CREATED : 1997/05/05, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +bt_stringlist * +bt_split_list (char * string, + char * delim, + char * filename, + int line, + char * description) +{ + int depth; /* brace depth */ + int i, j; /* offset into string and delim */ + int inword; /* flag telling if prev. char == ws */ + int string_len; + int delim_len; + int maxdiv; /* upper limit on no. of divisions */ + int maxoffs; /* max offset of delim in string */ + int numdiv; /* number of divisions */ + int * start; /* start of each division */ + int * stop; /* stop of each division */ + bt_stringlist * + list; /* structure to return */ + + if (string == NULL) + return NULL; + if (description == NULL) + description = "substring"; + + string_len = strlen (string); + delim_len = strlen (delim); + maxdiv = (string_len / delim_len) + 1; + maxoffs = string_len - delim_len + 1; + + /* + * This is a bit of a band-aid solution to the "split empty string" + * bug (formerly hit the internal_error() at the end of hte function). + * Still need a general "detect and fix unpreprocessed string" -- + * admittedly a different bug/misfeature. + */ + if (string_len == 0) + return NULL; + + start = (int *) alloca (maxdiv * sizeof (int)); + stop = (int *) alloca (maxdiv * sizeof (int)); + + list = (bt_stringlist *) malloc (sizeof (bt_stringlist)); + + depth = 0; + i = j = 0; + inword = 1; /* so leading delim ignored */ + numdiv = 0; + start[0] = 0; /* first substring @ start of string */ + + while (i < maxoffs) + { + /* does current char. in string match current char. in delim? */ + if (depth == 0 && !inword && tolower (string[i]) == delim[j]) + { + j++; i++; + + /* have we found an entire delim, followed by a space? */ + if (j == delim_len && string[i] == ' ') + { + + stop[numdiv] = i - delim_len - 1; + start[++numdiv] = ++i; + j = 0; + +#if DEBUG + printf ("found complete delim; i == %d, numdiv == %d: " + "stop[%d] == %d, start[%d] == %d\n", + i, numdiv, + numdiv-1, stop[numdiv-1], + numdiv, start[numdiv]); +#endif + } + } + + /* no match between string and delim, at non-zero depth, or in a word */ + else + { + update_depth (string, i, depth); + inword = (i < string_len) && (string[i] != ' '); + i++; + j = 0; + } + } + + stop[numdiv] = string_len; /* last substring ends just past eos */ + list->num_items = numdiv+1; + + + /* + * OK, now we know how many divisions there are and where they are -- + * so let's split that string up for real! + * + * list->items will be an array of pointers into a duplicate of + * `string'; we duplicate `string' so we can safely scribble on it and + * free() it later (in bt_free_list()). + */ + + list->items = (char **) malloc (list->num_items * sizeof (char *)); + list->string = strdup (string); + + for (i = 0; i < list->num_items; i++) + { + /* + * Possible cases: + * - stop < start is for empty elements, e.g. "and and" seen in + * input. (`start' for empty element will be the 'a' of the + * second 'and', and its stop will be the ' ' *before* the + * second 'and'.) + * - stop > start is for anything else between two and's (the usual) + * - stop == start should never happen if the loop above is correct + */ + + if (stop[i] > start[i]) /* the usual case */ + { + list->string[stop[i]] = 0; + list->items[i] = list->string+start[i]; + } + else if (stop[i] < start[i]) /* empty element */ + { + list->items[i] = NULL; + general_error (BTERR_CONTENT, filename, line, + description, i+1, "empty %s", description); + } + else /* should not happen! */ + { + internal_error ("stop == start for substring %d", i); + } + } + + return list; +/* return num_substrings; */ + +} /* bt_split_list () */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_free_list() +@INPUT : list +@OUTPUT : +@RETURNS : +@DESCRIPTION: Frees the list of strings created by bt_split_list(). +@GLOBALS : +@CALLS : +@CALLERS : anyone (exported by library) +@CREATED : 1997/05/06, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void bt_free_list (bt_stringlist *list) +{ + if (list && list->string) free (list->string); + if (list && list->items) free (list->items); + if (list) free (list); +} + + + +/* ---------------------------------------------------------------------- + * Stuff for splitting up a single name + */ + + +/* ------------------------------------------------------------------------ +@NAME : find_commas +@INPUT : name - string to search for commas + max_commas - maximum number of commas to allow (if more than + this number are seen, a warning is printed and + the excess commas are removed) +@OUTPUT : +@RETURNS : number of commas found +@DESCRIPTION: Counts and records positions of commas at brace-depth 0. + Modifies string in-place to remove whitespace around commas, + excess commas, and any trailing commas; warns on excess or + trailing commas. Excess commas are removed by replacing them + with space and calling bt_postprocess_string() to collapse + whitespace a second time; trailing commas are simply replaced + with (char) 0 to truncate the string. + + Assumes whitespace has been collapsed (ie. no space at + beginning or end of string, and all internal strings of + whitespace reduced to exactly one space). +@GLOBALS : +@CALLS : name_warning() (if too many commas, or commas at end) +@CALLERS : bt_split_name() +@CREATED : 1997/05/14, Greg Ward +@MODIFIED : +-------------------------------------------------------------------------- */ +static int +find_commas (name_loc * loc, char *name, int max_commas) +{ + int i, j; + int depth; + int num_commas; + int len; + boolean at_comma; + boolean warned; + + i = j = 0; + depth = 0; + num_commas = 0; + len = strlen (name); + warned = 0; + + /* First pass to check for and blank out excess commas */ + + for (i = 0; i < len; i++) + { + if (depth == 0 && name[i] == ',') + { + num_commas++; + if (num_commas > max_commas) + { + if (! warned) + { + name_warning (loc, "too many commas in name (removing extras)"); + warned = TRUE; + } + name[i] = ' '; + } + } + } + + /* + * If we blanked out a comma, better re-collapse whitespace. (This is + * a bit of a cop-out -- I could probably adjust i and j appropriately + * in the above loop to do the collapsing for me, but my brain + * hurt when I tried to think it through. Some other time, perhaps. + */ + + if (warned) + bt_postprocess_string (name, BTO_COLLAPSE); + + /* Now the real comma-finding loop (only if necessary) */ + + if (num_commas == 0) + return 0; + + num_commas = 0; + i = 0; + while (i < len) + { + at_comma = (depth == 0 && name[i] == ','); + if (at_comma) + { + while (j > 0 && name[j-1] == ' ') j--; + num_commas++; + } + + update_depth (name, i, depth); + if (i != j) + name[j] = name[i]; + + i++; j++; + if (at_comma) + { + while (i < len && name[i] == ' ') i++; + } + } /* while i */ + + if (i != j) name[j] = (char) 0; + j--; + + if (name[j] == ',') + { + name_warning (loc, "comma(s) at end of name (removing)"); + while (name[j] == ',') + { + name[j--] = (char) 0; + num_commas--; + } + } + + return num_commas; + +} /* find_commas() */ + + +/* ------------------------------------------------------------------------ +@NAME : find_tokens +@INPUT : name - string to tokenize (should be a private copy + that we're free to clobber and mangle) +@OUTPUT : comma_token- number of token immediately preceding each comma + (caller must allocate with at least one element + per comma in `name') +@RETURNS : newly-allocated bt_stringlist structure +@DESCRIPTION: Finds tokens in a string; delimiter is space or comma at + brace-depth zero. Assumes whitespace has been collapsed + and find_commas has been run on the string to remove + whitespace around commas and any trailing commas. + + The bt_stringlist structure returned can (and should) be + freed with bt_free_list(). +@GLOBALS : +@CALLS : +@CALLERS : bt_split_name() +@CREATED : 1997/05/14, Greg Ward +@MODIFIED : +-------------------------------------------------------------------------- */ +static bt_stringlist * +find_tokens (char * name, + int * comma_token) +{ + int i; /* index into name */ + int num_tok; + int in_boundary; /* previous char was ' ' or ',' */ + int cur_comma; /* index into comma_token */ + int len; + int depth; + bt_stringlist * + tokens; + + i = 0; + in_boundary = 1; /* so first char will start a token */ + cur_comma = 0; + len = strlen (name); + depth = 0; + + tokens = (bt_stringlist *) malloc (sizeof (bt_stringlist)); + /* tokens->string = name ? strdup (name) : NULL; */ + tokens->string = name; + num_tok = 0; + tokens->items = NULL; + + if (len == 0) /* empty string? */ + return tokens; /* return empty token list */ + + tokens->items = (char **) malloc (sizeof (char *) * len); + + while (i < len) + { + if (depth == 0 && in_boundary) /* at start of a new token */ + { + tokens->items[num_tok++] = name+i; + } + + if (depth == 0 && (name[i] == ' ' || name[i] == ',')) + { + /* if we're at a comma, record the token preceding the comma */ + + if (name[i] == ',') + { + comma_token[cur_comma++] = num_tok-1; + } + + /* + * if already in a boundary zone, we have an empty token + * (caused by multiple consecutive commas) + */ + if (in_boundary) + { + tokens->items[num_tok-1] = NULL; + } + num_tok--; + + /* in any case, mark the end of one token and prepare for the + * start of the next + */ + name[i] = (char) 0; + in_boundary = 1; + } + else + { + in_boundary = 0; /* inside a token */ + } + + update_depth (name, i, depth); + i++; + + } /* while i */ + + tokens->num_items = num_tok; + return tokens; + +} /* find_tokens() */ + + +/* ------------------------------------------------------------------------ +@NAME : find_lc_tokens() +@INPUT : tokens +@OUTPUT : first_lc + last_lc +@RETURNS : +@DESCRIPTION: Finds the first contiguous string of lowercase tokens in + `name'. The string must already be tokenized by + find_tokens(), and the input args num_tok, tok_start, and + tok_stop are the return value and the two same-named output + arguments from find_tokens(). +@GLOBALS : +@CALLS : +@CALLERS : bt_split_name() +@CREATED : 1997/05/14, Greg Ward +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +find_lc_tokens (bt_stringlist * tokens, + int * first_lc, + int * last_lc) +{ + int i; /* iterate over token list this time */ + int in_lc_sequence; /* in contig. sequence of lc tokens? */ + + *first_lc = *last_lc = -1; /* haven't found either yet */ + in_lc_sequence = 0; + + i = 0; + while (i < tokens->num_items) + { + if (*first_lc == -1 && islower (tokens->items[i][0])) + { + *first_lc = i; + + i++; + while (i < tokens->num_items && islower (tokens->items[i][0])) + i++; + + *last_lc = i-1; + } + else + { + i++; + } + } +} /* find_lc_tokens() */ + + +/* ------------------------------------------------------------------------ +@NAME : resolve_token_range() +@INPUT : tokens - structure containing the token list + tok_range - two-element array with start and stop token number +@OUTPUT : *part - set to point to first token in range, or NULL + if empty range + *num_tok - number of tokens in the range +@RETURNS : +@DESCRIPTION: Given a list of tokens and a range of token numbers (as a + two-element array, tok_range), computes the number of tokens + in the range. If this is >= 0, sets *part to point + to the first token in the range; otherwise, sets *part + to NULL. +@CALLERS : +@CREATED : May 1997, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +resolve_token_range (bt_stringlist *tokens, + int * tok_range, + char *** part, + int * num_tok) +{ + *num_tok = (tok_range[1] - tok_range[0]) + 1; + if (*num_tok <= 0) + { + *num_tok = 0; + *part = NULL; + } + else + { + *part = tokens->items + tok_range[0]; + } +} /* resolve_token_range() */ + + +/* ------------------------------------------------------------------------ +@NAME : split_simple_name() +@INPUT : name + first_lc + last_lc +@OUTPUT : name +@RETURNS : +@DESCRIPTION: Splits up a name (represented as a string divided into + non-overlapping, whitespace-separated tokens) according + to the BibTeX rules for names without commas. Specifically: + * tokens up to (but not including) the first lowercase + token, or the last token of the string if there + are no lowercase tokens, become the `first' part + * the earliest contiguous sequence of lowercase tokens, + up to (but not including) the last token of the string, + becomes the `von' part + * the tokens following the `von' part, or the last + single token if there is no `von' part, become + the `last' part + * there is no `jr' part +@GLOBALS : +@CALLS : name_warning() (if last lc token taken as lastname) + resolve_token_range() +@CALLERS : bt_split_name() +@CREATED : 1997/05/15, Greg Ward +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +split_simple_name (name_loc * loc, + bt_name * name, + int first_lc, + int last_lc) +{ + int first_t[2], von_t[2], last_t[2]; + int end; + + end = name->tokens->num_items-1; /* token number of last token */ + + if (first_lc > -1) /* any lowercase tokens at all? */ + { + first_t[0] = 0; /* first name goes from beginning */ + first_t[1] = first_lc-1; /* to just before first lc token */ + + if (last_lc == end) /* sequence of lowercase tokens */ + { /* goes all the way to end of string */ + last_lc--; /* -- roll it back by one so we */ + /* still have a lastname */ +#ifdef WARN_LC_LASTNAME + /* + * disable this warning for now because "others" is used fairly + * often as a name in BibTeX databases -- oops! + */ + name_warning (loc, + "no capitalized token at end of name; " + "using \"%s\" as lastname", + name->tokens->items[end]); +#else +# ifndef ALLOW_WARNINGS + loc = NULL; /* avoid "unused parameter" warning */ +# endif +#endif + } + + von_t[0] = first_lc; /* `von' part covers sequence of */ + von_t[1] = last_lc; /* lowercase tokens */ + last_t[0] = last_lc+1; /* lastname from after `von' to end */ + last_t[1] = end; /* of string */ + } + else /* no lowercase tokens */ + { + von_t[0] = 0; /* empty `von' part */ + von_t[1] = -1; + first_t[0] = 0; /* `first' goes from first to second */ + first_t[1] = end-1; /* last token */ + last_t[0] = last_t[1] = end; /* and `last' is just the last token */ + } + + resolve_token_range (name->tokens, first_t, + name->parts+BTN_FIRST, name->part_len+BTN_FIRST); + resolve_token_range (name->tokens, von_t, + name->parts+BTN_VON, name->part_len+BTN_VON); + resolve_token_range (name->tokens, last_t, + name->parts+BTN_LAST, name->part_len+BTN_LAST); + name->parts[BTN_JR] = NULL; /* no jr part possible */ + name->part_len[BTN_JR] = 0; + +} /* split_simple_name() */ + + +/* ------------------------------------------------------------------------ +@NAME : split_general_name() +@INPUT : name + num_commas + comma_token + first_lc + last_lc +@OUTPUT : name +@RETURNS : +@DESCRIPTION: Splits a name according to the BibTeX rules for names + with 1 or 2 commas (> 2 commas is handled elsewhere, + namely by bt_split_name() calling find_commas() with + max_commas == 2). Specifically: + * an initial string of lowercase tokens, up to (but not + including) the token before the first comma, becomes + the `von' part + * tokens from immediately after the `von' part, + or from the beginning of the string if no `von', + up to the first comma become the `last' part + + if one comma: + * all tokens following the sole comma become the + `first' part + + if two commas: + * tokens between the two commas become the `jr' part + * all tokens following the second comma become the + `first' part +@GLOBALS : +@CALLS : name_warning() (if last lc token taken as lastname) + resolve_token_range() +@CALLERS : bt_split_name() +@CREATED : 1997/05/15, Greg Ward +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +split_general_name (name_loc * loc, + bt_name * name, + int num_commas, + int * comma_token, + int first_lc, + int last_lc) +{ + int first_t[2], von_t[2], last_t[2], jr_t[2]; + int end; + + end = name->tokens->num_items-1; /* last token number */ + + if (first_lc == 0) /* we have an initial string of */ + { /* lowercase tokens */ + if (last_lc == comma_token[0]) /* lc string ends at first comma */ + { + name_warning (loc, "no capitalized tokens before first comma"); + last_lc--; + } + + von_t[0] = first_lc; /* `von' covers the sequence of */ + von_t[1] = last_lc; /* lowercase tokens */ + } + else /* no lowercase tokens at start */ + { + von_t[0] = 0; /* empty `von' part */ + von_t[1] = -1; + } + + last_t[0] = von_t[1] + 1; /* start right after end of `von' */ + last_t[1] = comma_token[0]; /* and end at first comma */ + + if (num_commas == 1) + { + first_t[0] = comma_token[0]+1; /* start right after comma */ + first_t[1] = end; /* stop at end of string */ + jr_t[0] = 0; /* empty `jr' part */ + jr_t[1] = -1; + } + else /* more than 1 comma */ + { + jr_t[0] = comma_token[0]+1; /* start after first comma */ + jr_t[1] = comma_token[1]; /* stop at second comma */ + first_t[0] = comma_token[1]+1; /* start after second comma */ + first_t[1] = end; /* and go to end */ + } + + resolve_token_range (name->tokens, first_t, + name->parts+BTN_FIRST, name->part_len+BTN_FIRST); + resolve_token_range (name->tokens, von_t, + name->parts+BTN_VON, name->part_len+BTN_VON); + resolve_token_range (name->tokens, last_t, + name->parts+BTN_LAST, name->part_len+BTN_LAST); + resolve_token_range (name->tokens, jr_t, + name->parts+BTN_JR, name->part_len+BTN_JR); + +} /* split_general_name() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_split_name() +@INPUT : name + filename + line + name_num +@OUTPUT : +@RETURNS : newly-allocated bt_name structure containing the four + parts as token-lists +@DESCRIPTION: Splits a name according to the BibTeX rules. There are + actually two sets of rules: one for names with no commas, + and one for names with 1 or 2 commas. (If a name has + more than 2 commas, the extras are removed and it's treated + as though it had just the first 2.) + + See split_simple_name() for the no-comma rules, and + split_general_name() for the 1-or-2-commas rules. + + The bt_name structure returned can (and should) be freed + with bt_free_name() when you no longer need it. +@GLOBALS : +@CALLS : +@CALLERS : anyone (exported by library) +@CREATED : 1997/05/14, Greg Ward +@MODIFIED : +@COMMENTS : The name-splitting code all implicitly assumes that the + string being split has been post-processed to collapse + whitespace in the BibTeX way. This means that it tends to + dump core on such things as leading whitespace, or more than + one space in a row inside the string. This could probably be + alleviated with a call to bt_postprocess_string(), possibly + preceded by a check for any of those occurences. Before + doing that, though, I want to examine the code carefully to + determine just what assumptions it makes -- so I can + check/correct for all of them. +-------------------------------------------------------------------------- */ +bt_name * +bt_split_name (char * name, + char * filename, + int line, + int name_num) +{ + name_loc loc; + bt_stringlist * + tokens; + int comma_token[MAX_COMMAS]; + int len; + int num_commas; + int first_lc, last_lc; + bt_name * split_name; + int i; + + DBG_ACTION (1, printf ("bt_split_name(): name=%p (%s)\n", name, name)) + + split_name = (bt_name *) malloc (sizeof (bt_name)); + if (name == NULL) + { + len = 0; + } + else + { + name = strdup (name); /* private copy that we may clobber */ + len = strlen (name); + } + + DBG_ACTION (1, printf ("bt_split_name(): split_name=%p\n", split_name)) + + if (len == 0) /* non-existent or empty string? */ + { + split_name->tokens = NULL; + for (i = 0; i < BT_MAX_NAMEPARTS; i++) + { + split_name->parts[i] = NULL; + split_name->part_len[i] = 0; + } + return split_name; + } + + loc.filename = filename; /* so called functions can generate */ + loc.line = line; /* decent warning messages */ + loc.name_num = name_num; + + num_commas = find_commas (&loc, name, MAX_COMMAS); + assert (num_commas <= MAX_COMMAS); + + DBG_ACTION (1, printf ("found %d commas: ", num_commas)) + + tokens = find_tokens (name, comma_token); + +#if DEBUG + printf ("found %d tokens:\n", tokens->num_items); + for (i = 0; i < tokens->num_items; i++) + { + printf (" %d: ", i); + + if (tokens->items[i]) /* non-empty token? */ + { + printf (">%s<\n", tokens->items[i]); + } + else + { + printf ("(empty)\n"); + } + } +#endif + +#if DEBUG + printf ("comma tokens: "); + for (i = 0; i < num_commas; i++) + printf ("%d ", comma_token[i]); + printf ("\n"); +#endif + + find_lc_tokens (tokens, &first_lc, &last_lc); +#if DEBUG + printf ("(first,last) lc tokens = (%d,%d)\n", first_lc, last_lc); +#endif + + if (strlen (name) == 0) /* name now empty? */ + { + split_name->tokens = NULL; + for (i = 0; i < BT_MAX_NAMEPARTS; i++) + { + split_name->parts[i] = NULL; + split_name->part_len[i] = 0; + } + } + else + { + split_name->tokens = tokens; + if (num_commas == 0) /* no commas -- "simple" format */ + { + split_simple_name (&loc, split_name, + first_lc, last_lc); + } + else + { + split_general_name (&loc, split_name, + num_commas, comma_token, + first_lc, last_lc); + } + } + +#if DEBUG + printf ("bt_split_name(): returning structure %p\n", split_name); +#endif + return split_name; +} /* bt_split_name() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_free_name() +@INPUT : name +@OUTPUT : +@RETURNS : +@DESCRIPTION: Frees up any memory allocated for a bt_name structure + (namely, the `tokens' field [a bt_stringlist structure, + this freed with bt_free_list()] and the structure itself.) +@CALLS : bt_free_list() +@CALLERS : anyone (exported) +@CREATED : 1997/11/14, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_free_name (bt_name * name) +{ + DBG_ACTION (2, printf ("bt_free_name(): freeing name %p " + "(%d tokens, string=%p (%s), last[0]=%s)\n", + name, + name->tokens->num_items, + name->tokens->string, + name->tokens->string, + name->parts[BTN_LAST][0])); + bt_free_list (name->tokens); + free (name); + DBG_ACTION (2, printf ("bt_free_name(): done, everything freed\n")); +} diff --git a/src/translators/btparse/parse_auxiliary.c b/src/translators/btparse/parse_auxiliary.c new file mode 100644 index 0000000..f509741 --- /dev/null +++ b/src/translators/btparse/parse_auxiliary.c @@ -0,0 +1,336 @@ +/* ------------------------------------------------------------------------ +@NAME : parse_auxiliary.c +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Anything needed by the parser that's too hairy to go in the + grammar itself. Currently, just stuff needed for generating + syntax errors. (See error.c for how they're actually + printed.) +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1996/08/07, Greg Ward +@MODIFIED : +@VERSION : $Id: parse_auxiliary.c,v 1.20 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include "stdpccts.h" +#include "error.h" +#include "lex_auxiliary.h" +#include "parse_auxiliary.h" +/*#include "my_dmalloc.h"*/ + +extern char * InputFilename; /* from input.c */ + +GEN_PRIVATE_ERRFUNC (syntax_error, (char * fmt, ...), + BTERR_SYNTAX, InputFilename, zzline, NULL, -1, fmt) + + +/* this is stolen from PCCTS' err.h */ +static SetWordType bitmask[] = +{ + 0x00000001, 0x00000002, 0x00000004, 0x00000008, + 0x00000010, 0x00000020, 0x00000040, 0x00000080 +}; + +static struct +{ + int token; + const char *new_name; +} new_tokens[] = +{ + { AT, "\"@\"" }, + { NAME, "name (entry type, key, field, or macro name)" }, + { LBRACE, "left brace (\"{\")" }, + { RBRACE, "right brace (\"}\")" }, + { ENTRY_OPEN, "start of entry (\"{\" or \"(\")" }, + { ENTRY_CLOSE,"end of entry (\"}\" or \")\")" }, + { EQUALS, "\"=\"" }, + { HASH, "\"#\"" }, + { COMMA, "\",\"" }, + { NUMBER, "number" }, + { STRING, "quoted string ({...} or \"...\")" } +}; + + +#ifdef CLEVER_TOKEN_STUFF +char **token_names; +#endif + + +void +fix_token_names (void) +{ + int i; + int num_replace; + +#ifdef CLEVER_TOKEN_STUFF /* clever, but it doesn't work... */ + /* arg! this doesn't work because I don't know how to find out the + * number of tokens + */ + + int num_tok; + + num_tok = (sizeof(zztokens) / sizeof(*zztokens)); + sizeof (zztokens); + sizeof (*zztokens); + token_names = (char **) malloc (sizeof (char *) * num_tok); + + for (i = 0; i < num_tok; i++) + { + token_names[i] = zztokens[i]; + } +#endif + + num_replace = (sizeof(new_tokens) / sizeof(*new_tokens)); + for (i = 0; i < num_replace; i++) + { + const char *new = new_tokens[i].new_name; + const char **old = zztokens + new_tokens[i].token; + + *old = new; + } +} + + +#ifdef USER_ZZSYN + +static void +append_token_set (char *msg, SetWordType *a) +{ + SetWordType *p = a; + SetWordType *endp = &(p[zzSET_SIZE]); + unsigned e = 0; + int tokens_printed = 0; + + do + { + SetWordType t = *p; + SetWordType *b = &(bitmask[0]); + do + { + if (t & *b) + { + strcat (msg, zztokens[e]); + tokens_printed++; + if (tokens_printed < zzset_deg (a) - 1) + strcat (msg, ", "); + else if (tokens_printed == zzset_deg (a) - 1) + strcat (msg, " or "); + } + e++; + } while (++b < &(bitmask[sizeof(SetWordType)*8])); + } while (++p < endp); +} + + +void +zzsyn(const char * text, + int tok, + char * egroup, + SetWordType * eset, + int etok, + int k, + const char * bad_text) +{ + static char msg [MAX_ERROR]; + int len; + +#ifndef ALLOW_WARNINGS + text = NULL; /* avoid "unused parameter" warning */ +#endif + + /* Initial message: give location of error */ + + msg[0] = (char) 0; /* make sure string is empty to start! */ + if (tok == zzEOF_TOKEN) + strcat (msg, "at end of input"); + else + sprintf (msg, "found \"%s\"", bad_text); + + len = strlen (msg); + + + /* Caller supplied neither a single token nor set of tokens expected... */ + + if (!etok && !eset) + { + syntax_error (msg); + return; + } + else + { + strcat (msg, ", "); + len += 2; + } + + + /* I'm not quite sure what this is all about, or where k would be != 1... */ + + if (k != 1) + { + sprintf (msg+len, "; \"%s\" not", bad_text); + if (zzset_deg (eset) > 1) strcat (msg, " in"); + len = strlen (msg); + } + + + /* This is the code that usually gets run */ + + if (zzset_deg (eset) > 0) + { + if (zzset_deg (eset) == 1) + strcat (msg, "expected "); + else + strcat (msg, "expected one of: "); + + append_token_set (msg, eset); + } + else + { + sprintf (msg+len, "expected %s", zztokens[etok]); + if (etok == ENTRY_CLOSE) + { + strcat (msg, " (skipping to next \"@\")"); + initialize_lexer_state (); + } + } + + len = strlen (msg); + if (egroup && strlen (egroup) > 0) + sprintf (msg+len, " in %s", egroup); + + syntax_error (msg); + +} +#endif /* USER_ZZSYN */ + + +void +check_field_name (AST * field) +{ + char * name; + + if (! field || field->nodetype != BTAST_FIELD) + return; + + name = field->text; + if (strchr ("0123456789", name[0])) + syntax_error ("invalid field name \"%s\": cannot start with digit", + name); +} + + +#ifdef STACK_DUMP_CODE + +static void +show_ast_stack_elem (int num) +{ + extern const char *nodetype_names[]; /* nicked from bibtex_ast.c */ + /* bt_nodetype nodetype; + bt_metatype metatype; */ + AST *elem; + + elem = zzastStack[num]; + printf ("zzastStack[%3d] = ", num); + if (elem) + { + /* get_node_type (elem, &nodetype, &metatype); */ + if (elem->nodetype <= BTAST_MACRO) + { + printf ("{ %s: \"%s\" (line %d, char %d) }\n", + nodetype_names[elem->nodetype], + elem->text, elem->line, elem->offset); + } + else + { + printf ("bogus node (uninitialized?)\n"); + } + } + else + { + printf ("NULL\n"); + } +} + + +static void +show_ast_stack_top (char *label) +{ + if (label) + printf ("%s: ast stack top: ", label); + else + printf ("ast stack top: "); + show_ast_stack_elem (zzast_sp); +} + + +static void +dump_ast_stack (char *label) +{ + int i; + + if (label) + printf ("%s: complete ast stack:\n", label); + else + printf ("complete ast stack:\n"); + + for (i = zzast_sp; i < ZZAST_STACKSIZE; i++) + { + printf (" "); + show_ast_stack_elem (i); + } +} + + +static void +show_attrib_stack_elem (int num) +{ + Attrib elem; + + elem = zzaStack[num]; + printf ("zzaStack[%3d] = ", num); + printf ("{ \"%s\" (token %d (%s), line %d, char %d) }\n", + elem.text, elem.token, zztokens[elem.token], + elem.line, elem.offset); +} + + +static void +show_attrib_stack_top (char *label) +{ + if (label) + printf ("%s: attrib stack top: ", label); + else + printf ("attrib stack top: "); + show_attrib_stack_elem (zzasp); +} + + +static void +dump_attrib_stack (char *label) +{ + int i; + + if (label) + printf ("%s: complete attrib stack:\n", label); + else + printf ("complete attrib stack:\n"); + + for (i = zzasp; i < ZZA_STACKSIZE; i++) + { + printf (" "); + show_attrib_stack_elem (i); + } +} + +#endif /* STACK_DUMP_CODE */ diff --git a/src/translators/btparse/parse_auxiliary.h b/src/translators/btparse/parse_auxiliary.h new file mode 100644 index 0000000..5500513 --- /dev/null +++ b/src/translators/btparse/parse_auxiliary.h @@ -0,0 +1,32 @@ +/* ------------------------------------------------------------------------ +@NAME : parse_auxiliary.h +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Prototype declarations for functions in parse_auxiliary.c +@GLOBALS : +@CALLS : +@CREATED : 1997/01/08, Greg Ward +@MODIFIED : +@VERSION : $Id: parse_auxiliary.h,v 1.5 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +#ifndef PARSE_AUXILIARY_H +#define PARSE_AUXILIARY_H + +#include "stdpccts.h" /* for SetWordType typedef */ + +void fix_token_names (void); +void zzsyn (const char *text, int tok, + char *egroup, SetWordType *eset, int etok, + int k, const char *bad_text); +void check_field_name (AST * field); + +#endif /* PARSE_AUXILIARY_H */ diff --git a/src/translators/btparse/postprocess.c b/src/translators/btparse/postprocess.c new file mode 100644 index 0000000..7f7bfd4 --- /dev/null +++ b/src/translators/btparse/postprocess.c @@ -0,0 +1,498 @@ +/* ------------------------------------------------------------------------ +@NAME : postprocess.c +@DESCRIPTION: Operations applied to the AST (or strings in it) after + parsing is complete. +@GLOBALS : +@CALLS : +@CREATED : 1997/01/12, Greg Ward (from code in bibparse.c, lex_auxiliary.c) +@MODIFIED : +@VERSION : $Id: postprocess.c,v 1.25 2000/05/02 23:06:31 greg Exp $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include "btparse.h" +#include "error.h" +#include "parse_auxiliary.h" +#include "prototypes.h" +/*#include "my_dmalloc.h"*/ + +#define DEBUG 1 + + +/* ------------------------------------------------------------------------ +@NAME : bt_postprocess_string () +@INPUT : s + options +@OUTPUT : s (modified in place according to the flags) +@RETURNS : (void) +@DESCRIPTION: Make a pass over string s (which is modified in-place) to + optionally collapse whitespace according to BibTeX rules + (if the BTO_COLLAPSE bit in options is true). + + Rules for collapsing whitespace are: + * whitespace at beginning/end of string is deleted + * within the string, each whitespace sequence is replaced by + a single space + + Note that part of the work is done by the lexer proper, + namely conversion of tabs and newlines to spaces. +@GLOBALS : +@CALLS : +@CREATED : originally in lex_auxiliary.c; moved here 1997/01/12 +@MODIFIED : +@COMMENTS : this only collapses whitespace now -- rename it??? +-------------------------------------------------------------------------- */ +void +bt_postprocess_string (char * s, ushort options) +{ + boolean collapse_whitespace; + char *i, *j; + int len; + + if (s == NULL) return; /* quit if no string supplied */ + +#if DEBUG > 1 + printf ("bt_postprocess_string: looking at >%s<\n", s); +#endif + + /* Extract any relevant options (just one currently) to local flags. */ + collapse_whitespace = options & BTO_COLLAPSE; + + /* + * N.B. i and j will both point into s; j is always >= i, and + * we copy characters from j to i. Whitespace is collapsed/deleted + * by advancing j without advancing i. + */ + i = j = s; /* start both at beginning of string */ + + /* + * If we're supposed to collapse whitespace, then advance j to the + * first non-space character. + */ + if (collapse_whitespace) + { + while (*j == ' ' && *j != (char) 0) + j++; + } + + while (*j != (char) 0) + { + /* + * If we're in a string of spaces (ie. current and previous char. + * are both space), and we're supposed to be collapsing whitespace, + * then skip until we hit a non-space character (or end of string). + */ + if (collapse_whitespace && *j == ' ' && *(j-1) == ' ') + { + while (*j == ' ') j++; /* skip spaces */ + if (*j == (char) 0) /* reached end of string? */ + break; + } + + /* Copy the current character from j down to i */ + *(i++) = *(j++); + } + *i = (char) 0; /* ensure string is terminated */ + + + /* + * And mop up whitespace (if any) at end of string -- note that if there + * was any whitespace there, it has already been collapsed to exactly + * one space. + */ + len = strlen (s); + if (len > 0 && collapse_whitespace && s[len-1] == ' ') + { + s[--len] = (char) 0; + } + +#if DEBUG > 1 + printf (" transformed to >%s<\n", s); +#endif + +} /* bt_postprocess_string */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_postprocess_value() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Post-processes a series of strings (compound value), + frequently found as the value of a "field = value" or "macro + = value" assignment. The actions taken here are governed by + the bits in 'options', but there are two distinct modes of + operation: pasting or not. + + We paste strings if and only if the BTO_PASTE bit in options + is set and there are two or more simple values in the + compound value. In this case, the BTO_EXPAND bit must be set + (it would be very silly to paste together strings with + unexpanded macro names!), and we make two passes over the + data: one to postprocess individual strings and accumulate + the one big string, and a second to postprocess the big + string. In the first pass, the caller-supplied 'options' + variable is largely ignored; we will never collapse + whitespace in the individual strings. The caller's wishes + are fully respected when we make the final post-processing + pass over the concatenation of the individual strings, + though. + + If we're not pasting strings, then the character of the + individual simple values will be preserved; macros might not + be expanded (depending on the BTO_EXPAND bit), numbers will + stay numbers, and strings will be post-processed + independently according to the 'options' variable. (Beware + -- this means you might collapse whitespace in individual + sub-strings, which would be bad if you intend to concatenate + them later in the BibTeX sense.) + + The 'replace' parameter is used to govern whether the + existing strings in the AST should be replaced with their + post-processed versions. This can extend as far as + collapsing a series of simple values into a single BTAST_STRING + node, if we paste sub-strings together. If replace is FALSE, + the returned string is allocated here, and you must free() it + later. +@GLOBALS : +@CALLS : +@CREATED : 1997/01/10, GPW +@MODIFIED : 1997/08/25, GPW: renamed from bt_postprocess_field(), and changed + to take the head of a list of simple values, + rather than the parent of that list +-------------------------------------------------------------------------- */ +char * +bt_postprocess_value (AST * value, ushort options, boolean replace) +{ + AST * simple_value; /* current simple value */ + boolean pasting; + ushort string_opts; /* what to do to individual strings */ + int tot_len; /* total length of pasted string */ + char * new_string; /* in case of string pasting */ + char * tmp_string; + boolean free_tmp; /* should we free() tmp_string? */ + + if (value == NULL) return NULL; + if (value->nodetype != BTAST_STRING && + value->nodetype != BTAST_NUMBER && + value->nodetype != BTAST_MACRO) + { + usage_error ("bt_postprocess_value: invalid AST node (not a value)"); + } + + + /* + * We will paste strings iff the user wants us to, and there are at least + * two simple values in the list headed by 'value'. + */ + + pasting = (options & BTO_PASTE) && (value->right); + + /* + * If we're to concatenate (paste) sub-strings, we need to know the + * total length of them. So make a pass over all the sub-strings + * (simple values), adding up their lengths. + */ + + tot_len = 0; /* these are out here to keep */ + new_string = NULL; /* gcc -Wall happy */ + tmp_string = NULL; + + if (pasting) + { + simple_value = value; + while (simple_value) + { + switch (simple_value->nodetype) + { + case BTAST_MACRO: + tot_len += bt_macro_length (simple_value->text); + break; + case BTAST_STRING: + tot_len += (simple_value->text) + ? (strlen (simple_value->text)) : 0; + break; + case BTAST_NUMBER: + tot_len += (simple_value->text) + ? (strlen (simple_value->text)) : 0; + break; + default: + internal_error ("simple value has bad nodetype (%d)", + (int) simple_value->nodetype); + } + simple_value = simple_value->right; + } + + /* Now allocate the buffer in which we'll accumulate the whole string */ + + new_string = (char *) calloc (tot_len+1, sizeof (char)); + } + + + /* + * Before entering the main loop, figure out just what + * bt_postprocess_string() is supposed to do -- eg. if pasting strings, + * we should not (yet) collapse whitespace. (That'll be done on the + * final, concatenated string -- assuming the caller put BTO_COLLAPSE in + * the options bitmap.) + */ + + if (pasting) + { + string_opts = options & ~BTO_COLLAPSE; /* turn off collapsing */ + } + else + { + string_opts = options; /* leave it alone */ + } + + /* + * Sanity check: if we continue blindly on, we might stupidly + * concatenate a macro name and a literal string. So check for that. + * Converting numbers is superficial, but requiring that it be done + * keeps people honest. + */ + + if (pasting && ! (options & (BTO_CONVERT|BTO_EXPAND))) + { + usage_error ("bt_postprocess_value(): " + "must convert numbers and expand macros " + "when pasting substrings"); + } + + /* + * Now the main loop to process each string, and possibly tack it onto + * new_string. + */ + + simple_value = value; + while (simple_value) + { + tmp_string = NULL; + free_tmp = FALSE; + + /* + * If this simple value is a macro and we're supposed to expand + * macros, then do so. We also have to post-process the string + * returned from the macro table, because they're stored there + * without whitespace collapsed; if we're supposed to be doing that + * to the current value (and we're not pasting), this is where it + * will get done. + */ + if (simple_value->nodetype == BTAST_MACRO && (options & BTO_EXPAND)) + { + tmp_string = bt_macro_text (simple_value->text, + simple_value->filename, + simple_value->line); + if (tmp_string != NULL) + { + tmp_string = strdup (tmp_string); + free_tmp = TRUE; + bt_postprocess_string (tmp_string, string_opts); + } + + if (replace) + { + simple_value->nodetype = BTAST_STRING; + if (simple_value->text) + free (simple_value->text); + simple_value->text = tmp_string; + free_tmp = FALSE; /* mustn't free, it's now in the AST */ + } + } + + /* + * If the current simple value is a literal string, then just + * post-process it. This will be done in-place if 'replace' is + * true, otherwise a copy of the string will be post-processed. + */ + else if (simple_value->nodetype == BTAST_STRING && simple_value->text) + { + if (replace) + { + tmp_string = simple_value->text; + } + else + { + tmp_string = strdup (simple_value->text); + free_tmp = TRUE; + } + + bt_postprocess_string (tmp_string, string_opts); + } + + /* + * Finally, if the current simple value is a number, change it to a + * string (depending on options) and get its value. We generally + * treat strings as numbers as equivalent, except of course numbers + * aren't post-processed -- there can't be any whitespace in them! + * The BTO_CONVERT option is mainly a sop to my strong-typing + * tendencies. + */ + if (simple_value->nodetype == BTAST_NUMBER) + { + if (replace && (options & BTO_CONVERT)) + simple_value->nodetype = BTAST_STRING; + + if (simple_value->text) + { + if (replace) + tmp_string = simple_value->text; + else + { + tmp_string = strdup (simple_value->text); + free_tmp = TRUE; + } + } + } + + if (pasting) + { + if (tmp_string) + strcat (new_string, tmp_string); + if (free_tmp) + free (tmp_string); + } + else + { + /* + * N.B. if tmp_string is NULL (eg. from a single undefined macro) + * we make a strdup() of the empty string -- this is so we can + * safely free() the string returned from this function + * at some future point. + * + * This strdup() seems to cause a 1-byte memory leak in some + * circumstances. I s'pose I should look into that some rainy + * afternoon... + */ + + new_string = (tmp_string != NULL) ? tmp_string : strdup (""); + } + + simple_value = simple_value->right; + } + + if (pasting) + { + int len; + + len = strlen (new_string); + assert (len <= tot_len); /* hope we alloc'd enough! */ + + bt_postprocess_string (new_string, options); + + /* + * If replacing data in the AST, delete all but first child of + * `field', and replace text for first child with new_string. + */ + + if (replace) + { + assert (value->right != NULL); /* there has to be > 1 simple value! */ + zzfree_ast (value->right); /* free from second simple value on */ + value->right = NULL; /* remind ourselves they're gone */ + if (value->text) /* free text of first simple value */ + free (value->text); + value->text = new_string; /* and replace it with concatenation */ + } + } + + return new_string; + +} /* bt_postprocess_value() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_postprocess_field() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Postprocesses all the strings in a single "field = value" + assignment subtree. Just checks that 'field' does indeed + point to an BTAST_FIELD node (presumably the parent of a list + of simple values), downcases the field name, and calls + bt_postprocess_value() on the value. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/08/25, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +char * +bt_postprocess_field (AST * field, ushort options, boolean replace) +{ + if (field == NULL) return NULL; + if (field->nodetype != BTAST_FIELD) + usage_error ("bt_postprocess_field: invalid AST node (not a field)"); + + strlwr (field->text); /* downcase field name */ + return bt_postprocess_value (field->down, options, replace); + +} /* bt_postprocess_field() */ + + + +/* ------------------------------------------------------------------------ +@NAME : bt_postprocess_entry() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Postprocesses all the strings in an entry: collapse whitespace, + concatenate substrings, expands macros, and whatnot. +@GLOBALS : +@CALLS : +@CREATED : 1997/01/10, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_postprocess_entry (AST * top, ushort options) +{ + AST *cur; + + if (top == NULL) return; /* not even an entry at all! */ + if (top->nodetype != BTAST_ENTRY) + usage_error ("bt_postprocess_entry: " + "invalid node type (not entry root)"); + strlwr (top->text); /* downcase entry type */ + + if (top->down == NULL) return; /* no children at all */ + + cur = top->down; + if (cur->nodetype == BTAST_KEY) + cur = cur->right; + + switch (top->metatype) + { + case BTE_REGULAR: + case BTE_MACRODEF: + { + while (cur) + { + bt_postprocess_field (cur, options, TRUE); + if (top->metatype == BTE_MACRODEF && ! (options & BTO_NOSTORE)) + bt_add_macro_value (cur, options); + + cur = cur->right; + } + break; + } + + case BTE_COMMENT: + case BTE_PREAMBLE: + bt_postprocess_value (cur, options, TRUE); + break; + default: + internal_error ("bt_postprocess_entry: unknown entry metatype (%d)", + (int) top->metatype); + } + +} /* bt_postprocess_entry() */ diff --git a/src/translators/btparse/prototypes.h b/src/translators/btparse/prototypes.h new file mode 100644 index 0000000..88beada --- /dev/null +++ b/src/translators/btparse/prototypes.h @@ -0,0 +1,47 @@ +/* ------------------------------------------------------------------------ +@NAME : prototypes.h +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Prototype declarations for functions from various places. + Only functions that are private to the library (but shared + between files within the library) are declared here. + Functions that are "exported from" the library (ie. usable + by and expected to be used by library user) are declared in + btparse.h. +@GLOBALS : +@CALLS : +@CREATED : 1997/01/12, Greg Ward +@MODIFIED : +@VERSION : $Id: prototypes.h,v 1.14 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +#ifndef PROTOTYPES_H +#define PROTOTYPES_H + +#include <stdio.h> +#include "btparse.h" /* for types */ + +/* util.c */ +#if !HAVE_STRLWR +char *strlwr (char *s); +#endif +#if !HAVE_STRUPR +char *strupr (char *s); +#endif + +/* macros.c */ +void init_macros (void); +void done_macros (void); + +/* bibtex_ast.c */ +void dump_ast (char *msg, AST *root); + +#endif /* PROTOTYPES_H */ diff --git a/src/translators/btparse/scan.c b/src/translators/btparse/scan.c new file mode 100644 index 0000000..b9899e4 --- /dev/null +++ b/src/translators/btparse/scan.c @@ -0,0 +1,615 @@ + +/* parser.dlg -- DLG Description of scanner + * + * Generated from: bibtex.g + * + * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994 + * Purdue University Electrical Engineering + * With AHPCRC, University of Minnesota + * ANTLR Version 1.33 + */ + +#include <stdio.h> +#define ANTLR_VERSION 133 + +#define ZZCOL +#define USER_ZZSYN + +#include "btconfig.h" +#include "btparse.h" +#include "attrib.h" +#include "lex_auxiliary.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ + +extern char * InputFilename; /* for zzcr_ast call in pccts/ast.c */ +#include "antlr.h" +#include "ast.h" +#include "tokens.h" +#include "dlgdef.h" +LOOKAHEAD +void zzerraction() +{ + (*zzerr)("invalid token"); + zzadvance(); + zzskip(); +} +/* + * D L G tables + * + * Generated from: parser.dlg + * + * 1989-1994 by Will Cohen, Terence Parr, and Hank Dietz + * Purdue University Electrical Engineering + * DLG Version 1.33 + */ + +#include "mode.h" + + + +static void act1() +{ + NLA = 1; + } + + +static void act2() +{ + NLA = AT; + at_sign (); + } + + +static void act3() +{ + NLA = 3; + newline (); + } + + +static void act4() +{ + NLA = COMMENT; + comment (); + } + + +static void act5() +{ + NLA = 5; + zzskip (); + } + + +static void act6() +{ + NLA = 6; + toplevel_junk (); + } + +static unsigned char shift0[257] = { + 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 4, 2, 5, 5, 4, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 4, 5, 5, 5, 5, 3, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 1, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5 +}; + + +static void act7() +{ + NLA = 1; + } + + +static void act8() +{ + NLA = 7; + newline (); + } + + +static void act9() +{ + NLA = COMMENT; + comment (); + } + + +static void act10() +{ + NLA = 8; + zzskip (); + } + + +static void act11() +{ + NLA = NUMBER; + } + + +static void act12() +{ + NLA = NAME; + name (); + } + + +static void act13() +{ + NLA = LBRACE; + lbrace (); + } + + +static void act14() +{ + NLA = RBRACE; + rbrace (); + } + + +static void act15() +{ + NLA = ENTRY_OPEN; + lparen (); + } + + +static void act16() +{ + NLA = ENTRY_CLOSE; + rparen (); + } + + +static void act17() +{ + NLA = EQUALS; + } + + +static void act18() +{ + NLA = HASH; + } + + +static void act19() +{ + NLA = COMMA; + } + + +static void act20() +{ + NLA = 18; + start_string ('"'); + } + +static unsigned char shift1[257] = { + 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 3, 1, 14, 14, 3, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 3, 5, 13, 11, 5, 2, 5, + 14, 8, 9, 5, 5, 12, 5, 5, 5, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 5, 5, 10, 5, 5, 14, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 14, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 6, 5, 7, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14 +}; + + +static void act21() +{ + NLA = 1; + } + + +static void act22() +{ + NLA = 19; + check_runaway_string (); + } + + +static void act23() +{ + NLA = 20; + zzreplchar (' '); zzmore (); + } + + +static void act24() +{ + NLA = 21; + open_brace (); + } + + +static void act25() +{ + NLA = 22; + close_brace (); + } + + +static void act26() +{ + NLA = 23; + lparen_in_string (); + } + + +static void act27() +{ + NLA = 24; + rparen_in_string (); + } + + +static void act28() +{ + NLA = STRING; + quote_in_string (); + } + + +static void act29() +{ + NLA = 26; + zzmore (); + } + +static unsigned char shift2[257] = { + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 1, 3, 3, 2, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 8, 3, 3, 3, 3, + 3, 6, 7, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 4, 3, 5, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3 +}; + +#define DfaStates 38 +typedef unsigned char DfaState; + +static DfaState st0[7] = { + 1, 2, 3, 4, 5, 6, 38 +}; + +static DfaState st1[7] = { + 38, 38, 38, 38, 38, 38, 38 +}; + +static DfaState st2[7] = { + 38, 38, 38, 38, 38, 38, 38 +}; + +static DfaState st3[7] = { + 38, 38, 38, 38, 38, 38, 38 +}; + +static DfaState st4[7] = { + 38, 7, 8, 9, 7, 9, 38 +}; + +static DfaState st5[7] = { + 38, 38, 38, 38, 5, 38, 38 +}; + +static DfaState st6[7] = { + 38, 38, 38, 6, 38, 6, 38 +}; + +static DfaState st7[7] = { + 38, 7, 8, 7, 7, 7, 38 +}; + +static DfaState st8[7] = { + 38, 38, 38, 38, 38, 38, 38 +}; + +static DfaState st9[7] = { + 38, 7, 8, 9, 7, 9, 38 +}; + +static DfaState st10[16] = { + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 38, 38 +}; + +static DfaState st11[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st12[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st13[16] = { + 38, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 38 +}; + +static DfaState st14[16] = { + 38, 38, 38, 14, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st15[16] = { + 38, 38, 38, 38, 15, 16, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st16[16] = { + 38, 38, 38, 38, 16, 16, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st17[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st18[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st19[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st20[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st21[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st22[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st23[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st24[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st25[16] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38 +}; + +static DfaState st26[16] = { + 38, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 38 +}; + +static DfaState st27[11] = { + 28, 29, 30, 31, 32, 33, 34, 35, 36, 31, + 38 +}; + +static DfaState st28[11] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38 +}; + +static DfaState st29[11] = { + 38, 38, 37, 37, 38, 38, 38, 38, 38, 38, + 38 +}; + +static DfaState st30[11] = { + 38, 38, 31, 31, 38, 38, 38, 38, 38, 31, + 38 +}; + +static DfaState st31[11] = { + 38, 38, 31, 31, 38, 38, 38, 38, 38, 31, + 38 +}; + +static DfaState st32[11] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38 +}; + +static DfaState st33[11] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38 +}; + +static DfaState st34[11] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38 +}; + +static DfaState st35[11] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38 +}; + +static DfaState st36[11] = { + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38 +}; + +static DfaState st37[11] = { + 38, 38, 37, 37, 38, 38, 38, 38, 38, 38, + 38 +}; + + +DfaState *dfa[38] = { + st0, + st1, + st2, + st3, + st4, + st5, + st6, + st7, + st8, + st9, + st10, + st11, + st12, + st13, + st14, + st15, + st16, + st17, + st18, + st19, + st20, + st21, + st22, + st23, + st24, + st25, + st26, + st27, + st28, + st29, + st30, + st31, + st32, + st33, + st34, + st35, + st36, + st37 +}; + + +DfaState accepts[39] = { + 0, 1, 2, 3, 6, 5, 6, 0, 4, 6, + 0, 7, 8, 0, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 9, 0, 0, 21, 22, + 23, 29, 24, 25, 26, 27, 28, 22, 0 +}; + +void (*actions[30])() = { + zzerraction, + act1, + act2, + act3, + act4, + act5, + act6, + act7, + act8, + act9, + act10, + act11, + act12, + act13, + act14, + act15, + act16, + act17, + act18, + act19, + act20, + act21, + act22, + act23, + act24, + act25, + act26, + act27, + act28, + act29 +}; + +static DfaState dfa_base[] = { + 0, + 10, + 27 +}; + +static unsigned char *b_class_no[] = { + shift0, + shift1, + shift2 +}; + + + +#define ZZSHIFT(c) (b_class_no[zzauto][1+c]) +#define MAX_MODE 3 +#include "dlgauto.h" diff --git a/src/translators/btparse/stdpccts.h b/src/translators/btparse/stdpccts.h new file mode 100644 index 0000000..e232634 --- /dev/null +++ b/src/translators/btparse/stdpccts.h @@ -0,0 +1,31 @@ +#ifndef STDPCCTS_H +#define STDPCCTS_H +/* + * stdpccts.h -- P C C T S I n c l u d e + * + * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994 + * Purdue University Electrical Engineering + * With AHPCRC, University of Minnesota + * ANTLR Version 1.33 + */ +#include <stdio.h> +#define ANTLR_VERSION 133 + +#define ZZCOL +#define USER_ZZSYN + +#include "btparse.h" +#include "attrib.h" +#include "lex_auxiliary.h" +#include "error.h" +/*#include "my_dmalloc.h"*/ + +extern char * InputFilename; /* for zzcr_ast call in pccts/ast.c */ +#define GENAST +#define zzSET_SIZE 4 +#include "antlr.h" +#include "ast.h" +#include "tokens.h" +#include "dlgdef.h" +#include "mode.h" +#endif diff --git a/src/translators/btparse/string_util.c b/src/translators/btparse/string_util.c new file mode 100644 index 0000000..3713608 --- /dev/null +++ b/src/translators/btparse/string_util.c @@ -0,0 +1,695 @@ +/* ------------------------------------------------------------------------ +@NAME : string_util.c +@DESCRIPTION: Various string-processing utility functions: + bt_purify_string() + bt_change_case() + + and their helpers: + foreign_letter() + purify_special_char() +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/10/19, Greg Ward +@MODIFIED : 1997/11/25, GPW: renamed to from purify.c to string_util.c + added bt_change_case() and friends +@VERSION : $Id: string_util.c,v 1.10 1999/10/28 22:50:28 greg Rel $ +-------------------------------------------------------------------------- */ + +#include <stdlib.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include "error.h" +#include "btparse.h" +#include "bt_debug.h" + + +/* + * These definitions should be fixed to be consistent with HTML + * entities, just for fun. And perhaps I should add entries for + * accented letters (at least those supported by TeX and HTML). + */ +typedef enum +{ + L_OTHER, /* not a "foreign" letter */ + L_OSLASH_L, /* Eastern European {\o} */ + L_OSLASH_U, + L_LSLASH_L, /* {\l} */ + L_LSLASH_U, + L_OELIG_L, /* Latin {\oe} ligature */ + L_OELIG_U, + L_AELIG_L, /* {\ae} ligature */ + L_AELIG_U, + L_SSHARP_L, /* German "sharp s" {\ss} */ + L_SSHARP_U, + L_ACIRCLE_L, /* Nordic {\aa} */ + L_ACIRCLE_U, + L_INODOT_L, /* undotted i: {\i} */ + L_JNODOT_L /* {\j} */ +} bt_letter; + + +static const char * uc_version[] = +{ + NULL, /* L_OTHER */ + "\\O", /* L_OSLASH_L */ + "\\O", /* L_OSLASH_U */ + "\\L", /* L_LSLASH_L */ + "\\L", /* L_LSLASH_U */ + "\\OE", /* L_OELIG_L */ + "\\OE", /* L_OELIG_U */ + "\\AE", /* L_AELIG_L */ + "\\AE", /* L_AELIG_U */ + "SS", /* L_SSHARP_L -- for LaTeX 2.09 */ + "\\SS", /* L_SSHARP_U */ + "\\AA", /* L_ACIRCLE_L */ + "\\AA", /* L_ACIRCLE_U */ + "I", /* L_INODOT_L */ + "J" /* L_JNODOT_L */ +}; + +static const char * lc_version[] = +{ + NULL, /* L_OTHER */ + "\\o", /* L_OSLASH_L */ + "\\o", /* L_OSLASH_U */ + "\\l", /* L_LSLASH_L */ + "\\l", /* L_LSLASH_U */ + "\\oe", /* L_OELIG_L */ + "\\oe", /* L_OELIG_U */ + "\\ae", /* L_AELIG_L */ + "\\ae", /* L_AELIG_U */ + "\\ss", /* L_SSHARP_L */ + "\\ss", /* L_SSHARP_U */ + "\\aa", /* L_ACIRCLE_L */ + "\\aa", /* L_ACIRCLE_U */ + "\\i", /* L_INODOT_L */ + "\\j" /* L_JNODOT_L */ +}; + + + +/* ------------------------------------------------------------------------ +@NAME : foreign_letter() +@INPUT : str + start + stop +@OUTPUT : letter +@RETURNS : TRUE if the string delimited by start and stop is a foreign + letter control sequence +@DESCRIPTION: Determines if a character sequence is one of (La)TeX's + "foreign letter" control sequences (l, o, ae, oe, aa, ss, plus + uppercase versions). If `letter' is non-NULL, returns which + letter was found in it (as a bt_letter value). +@CALLS : +@CALLERS : purify_special_char() +@CREATED : 1997/10/19, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static boolean +foreign_letter (char *str, int start, int stop, bt_letter * letter) +{ + char c1, c2; + bt_letter dummy; + + + /* + * This is written for speed, not flexibility -- adding new foreign + * letters would be trying and vexatious. + * + * N.B. my gold standard list of foreign letters is Kopka and Daly's + * *A Guide to LaTeX 2e*, section 2.5.6. + */ + + if (letter == NULL) /* so we can assign to *letter */ + letter = &dummy; /* without compunctions */ + *letter = L_OTHER; /* assume not a "foreign" letter */ + + c1 = str[start+0]; /* only two characters that we're */ + c2 = str[start+1]; /* interested in */ + + switch (stop - start) + { + case 1: /* one-character control sequences */ + switch (c1) /* (\o and \l) */ + { + case 'o': + *letter = L_OSLASH_L; return TRUE; + case 'O': + *letter = L_OSLASH_U; return TRUE; + case 'l': + *letter = L_LSLASH_L; return TRUE; + case 'L': + *letter = L_LSLASH_L; return TRUE; + case 'i': + *letter = L_INODOT_L; return TRUE; + case 'j': + *letter = L_JNODOT_L; return TRUE; + default: + return FALSE; + } + break; + case 2: /* two character control sequences */ + switch (c1) /* (\oe, \ae, \aa, and \ss) */ + { + case 'o': + if (c2 == 'e') { *letter = L_OELIG_L; return TRUE; } + case 'O': + if (c2 == 'E') { *letter = L_OELIG_U; return TRUE; } + + /* BibTeX 0.99 does not handle \aa and \AA -- but I do!*/ + case 'a': + if (c2 == 'e') + { *letter = L_AELIG_L; return TRUE; } + else if (c2 == 'a') + { *letter = L_ACIRCLE_L; return TRUE; } + else + return FALSE; + case 'A': + if (c2 == 'E') + { *letter = L_AELIG_U; return TRUE; } + else if (c2 == 'A') + { *letter = L_ACIRCLE_U; return TRUE; } + else + return FALSE; + + /* uppercase sharp-s -- new with LaTeX 2e (so far all I do + * is recognize it as a "foreign" letter) + */ + case 's': + if (c2 == 's') + { *letter = L_SSHARP_L; return TRUE; } + else + return FALSE; + case 'S': + if (c2 == 'S') + { *letter = L_SSHARP_U; return TRUE; } + else + return FALSE; + } + break; + default: + return FALSE; + } /* switch on length of control sequence */ + + internal_error ("foreign_letter(): should never reach end of function"); + return FALSE; /* to keep gcc -Wall happy */ + +} /* foreign_letter */ + + +/* ------------------------------------------------------------------------ +@NAME : purify_special_char() +@INPUT : *src, *dst - pointers into the input and output strings +@OUTPUT : *src - updated to point to the closing brace of the + special char + *dst - updated to point to the next available spot + for copying text to +@RETURNS : +@DESCRIPTION: "Purifies" a BibTeX special character. On input, *src should + point to the opening brace of a special character (ie. the + brace must be at depth 0 of the whole string, and the + character immediately following it must be a backslash). + *dst should point to the next spot to copy into the output + (purified) string. purify_special_char() will skip over the + opening brace and backslash; if the control sequence is one + of LaTeX's foreign letter sequences (as determined by + foreign_letter()), then it is simply copied to *dst. + Otherwise the control sequence is skipped. In either case, + text after the control sequence is either copied (alphabetic + characters) or skipped (anything else, including hyphens, + ties, and digits). +@CALLS : foreign_letter() +@CALLERS : bt_purify_string() +@CREATED : 1997/10/19, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +purify_special_char (char *str, int * src, int * dst) +{ + int depth; + int peek; + + assert (str[*src] == '{' && str[*src + 1] == '\\'); + depth = 1; + + *src += 2; /* jump to start of control sequence */ + peek = *src; /* scan to end of control sequence */ + while (isalpha (str[peek])) + peek++; + if (peek == *src) /* in case of single-char, non-alpha */ + peek++; /* control sequence (eg. {\'e}) */ + + if (foreign_letter (str, *src, peek, NULL)) + { + assert (peek - *src == 1 || peek - *src == 2); + str[(*dst)++] = str[(*src)++]; /* copy first char */ + if (*src < peek) /* copy second char, downcasing */ + str[(*dst)++] = tolower (str[(*src)++]); + } + else /* not a foreign letter -- skip */ + { /* the control sequence entirely */ + *src = peek; + } + + while (str[*src]) + { + switch (str[*src]) + { + case '{': + depth++; + (*src)++; + break; + case '}': + depth--; + if (depth == 0) return; /* done with special char */ + (*src)++; + break; + default: + if (isalpha (str[*src])) /* copy alphabetic chars */ + str[(*dst)++] = str[(*src)++]; + else /* skip everything else */ + (*src)++; + } + } + + /* + * If we get here, we have unbalanced braces -- the '}' case should + * always hit a depth == 0 point if braces are balanced. No warning, + * though, because a) BibTeX doesn't warn about purifying unbalanced + * strings, and b) we (should have) already warned about it in the + * lexer. + */ + +} /* purify_special_char() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_purify_string() +@INOUT : instr +@INPUT : options +@OUTPUT : +@RETURNS : instr - same as input string, but modified in place +@DESCRIPTION: "Purifies" a BibTeX string. This consists of copying + alphanumeric characters, converting hyphens and ties to + space, copying spaces, and skipping everything else. (Well, + almost -- special characters are handled specially, of + course. Basically, accented letters have the control + sequence skipped, while foreign letters have the control + sequence preserved in a reasonable manner. See + purify_special_char() for details.) +@CALLS : purify_special_char() +@CALLERS : +@CREATED : 1997/10/19, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_purify_string (char * string, ushort options) +{ + int src, /* both indeces into string */ + dst; + int depth; /* brace depth in string */ + unsigned orig_len; + + /* + * Since purification always copies or deletes chars, outstr will + * be no longer than string -- so nothing fancy is required to put + * an upper bound on its eventual size. + */ + + depth = 0; + src = 0; + dst = 0; + orig_len = strlen (string); + + DBG_ACTION (1, printf ("bt_purify_string(): input = %p (%s)\n", + string, string)); + + while (string[src] != (char) 0) + { + DBG_ACTION (2, printf (" next: >%c<: ", string[src])); + switch (string[src]) + { + case '~': /* "separator" characters -- */ + case '-': /* replaced with space */ + case ' ': /* and copy an actual space */ + string[dst++] = ' '; + src++; + DBG_ACTION (2, printf ("replacing with space")); + break; + case '{': + if (depth == 0 && string[src+1] == '\\') + { + DBG_ACTION (2, printf ("special char found")); + purify_special_char (string, &src, &dst); + } + else + { + DBG_ACTION (2, printf ("ordinary open brace")); + src++; + } + depth++; + break; + case '}': + DBG_ACTION (2, printf ("close brace")); + depth--; + src++; + break; + default: + if (isalnum (string[src])) /* any alphanumeric char -- */ + { + DBG_ACTION (2, printf ("alphanumeric -- copying")); + string[dst++] = string[src++]; /* copy it */ + } + else /* anything else -- skip it */ + { + DBG_ACTION (2, printf ("non-separator, non-brace, non-alpha")); + src++; + } + } /* switch string[src] */ + + DBG_ACTION (2, printf ("\n")); + + } /* while string[src] */ + + DBG_ACTION (1, printf ("bt_purify_string(): depth on exit: %d\n", depth)); + + string[dst] = (char) 0; + assert (strlen (string) <= orig_len); +} /* bt_purify_string() */ + + +/* ====================================================================== + * Case-transformation stuff + */ + + +/* ------------------------------------------------------------------------ +@NAME : convert_special_char() +@INPUT : transform +@INOUT : string + src + dst + start_sentence + after_colon +@RETURNS : +@DESCRIPTION: Does case conversion on a special character. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/11/25, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +convert_special_char (char transform, + char * string, + int * src, + int * dst, + boolean * start_sentence, + boolean * after_colon) +{ + int depth; + boolean done_special; + int cs_end; + int cs_len; /* counting the backslash */ + bt_letter letter; + const char * repl; + int repl_len; + +#ifndef ALLOW_WARNINGS + repl = NULL; /* silence "might be used" */ + /* uninitialized" warning */ +#endif + + /* First, copy just the opening brace */ + string[(*dst)++] = string[(*src)++]; + + /* + * Now loop over characters inside the braces -- stop when we reach + * the matching close brace, or when the string ends. + */ + depth = 1; /* because we're in a special char */ + done_special = FALSE; + + while (string[*src] != 0 && !done_special) + { + switch (string[*src]) + { + case '\\': /* a control sequence */ + { + cs_end = *src+1; /* scan over chars of c.s. */ + while (isalpha (string[cs_end])) + cs_end++; + + /* + * OK, now *src points to the backslash (so src+*1 points to + * first char. of control sequence), and cs_end points to + * character immediately following end of control sequence. + * Thus we analyze [*src+1..cs_end] to determine if the control + * sequence is a foreign letter, and use (cs_end - (*src+1) + 1) + * = (cs_end - *src) as the length of the control sequence. + */ + + cs_len = cs_end - *src; /* length of cs, counting backslash */ + + if (foreign_letter (string, *src+1, cs_end, &letter)) + { + if (letter == L_OTHER) + internal_error ("impossible foreign letter"); + + switch (transform) + { + case 'u': + repl = uc_version[(int) letter]; + break; + case 'l': + repl = lc_version[(int) letter]; + break; + case 't': + if (*start_sentence || *after_colon) + { + repl = uc_version[(int) letter]; + *start_sentence = *after_colon = FALSE; + } + else + { + repl = lc_version[(int) letter]; + } + break; + default: + internal_error ("impossible case transform \"%c\"", + transform); + } + + repl_len = strlen (repl); + if (repl_len > cs_len) + internal_error + ("replacement text longer than original cs"); + + strncpy (string + *dst, repl, repl_len); + *src = cs_end; + *dst += repl_len; + } /* control sequence is a foreign letter */ + else + { + /* not a foreign letter -- just copy the control seq. as is */ + + + strncpy (string + *dst, string + *src, cs_end - *src); + *src += cs_len; + assert (*src == cs_end); + *dst += cs_len; + } /* control sequence not a foreign letter */ + + break; + } /* case: '\\' */ + + case '{': + { + string[(*dst)++] = string[(*src)++]; + depth++; + break; + } + + case '}': + { + string[(*dst)++] = string[(*src)++]; + depth--; + if (depth == 0) + done_special = TRUE; + break; + } + + default: /* any other character */ + { + switch (transform) + { + /* + * Inside special chars, lowercase and title caps are same. + * (At least, that's bibtex's convention. I might change this + * at some point to be a bit smarter.) + */ + case 'l': + case 't': + string[(*dst)++] = tolower (string[(*src)++]); + break; + case 'u': + string[(*dst)++] = toupper (string[(*src)++]); + break; + default: + internal_error ("impossible case transform \"%c\"", + transform); + } + } /* default char */ + + } /* switch: current char */ + + } /* while: string or special char not done */ + +} /* convert_special_char() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_change_case() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Converts a string (in-place) to either uppercase, lowercase, + or "title capitalization"> +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/11/25, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_change_case (char transform, + char * string, + ushort options) +{ + int len; + int depth; + int src, dst; /* indeces into string */ + boolean start_sentence; + boolean after_colon; + + src = dst = 0; + len = strlen (string); + depth = 0; + + start_sentence = TRUE; + after_colon = FALSE; + + while (string[src] != 0) + { + switch (string[src]) + { + case '{': + + /* + * At start of special character? The entire special char. + * will be handled here, as follows: + * - text at any brace-depth within the s.c. is case-mangled; + * punctuation (sentence endings, colons) are ignored + * - control sequences are left alone, unless they are + * one of the "foreign letter" control sequences, in + * which case they're converted to the appropriate string + * according to the uc_version or lc_version tables. + */ + if (depth == 0 && string[src+1] == '\\') + { + convert_special_char (transform, string, &src, &dst, + &start_sentence, &after_colon); + } + + /* + * Otherwise, it's just something in braces. This is probably + * a proper noun or something encased in braces to protect it + * from case-mangling, so we do not case-mangle it. However, + * we *do* switch out of start_sentence or after_colon mode if + * we happen to be there (otherwise we'll do the wrong thing + * once we're out of the braces). + */ + else + { + string[dst++] = string[src++]; + start_sentence = after_colon = FALSE; + depth++; + } + break; + + case '}': + string[dst++] = string[src++]; + depth--; + break; + + /* + * Sentence-ending punctuation and colons are handled separately + * to allow for exact mimicing of BibTeX's behaviour. I happen + * to think that this behaviour (capitalize first word of sentences + * in a title) is better than BibTeX's, but I want to keep my + * options open for a future goal of perfect compatability. + */ + case '.': + case '?': + case '!': + start_sentence = TRUE; + string[dst++] = string[src++]; + break; + + case ':': + after_colon = TRUE; + string[dst++] = string[src++]; + break; + + default: + if (isspace (string[src])) + { + string[dst++] = string[src++]; + } + else + { + if (depth == 0) + { + switch (transform) + { + case 'u': + string[dst++] = toupper (string[src++]); + break; + case 'l': + string[dst++] = tolower (string[src++]); + break; + case 't': + if (start_sentence || after_colon) + { + /* + * XXX BibTeX only preserves case of character + * immediately after a colon; I do two things + * differently: first, I pay attention to sentence + * punctuation, and second I force uppercase + * at start of sentence or after a colon. + */ + string[dst++] = toupper (string[src++]); + start_sentence = after_colon = FALSE; + } + else + { + string[dst++] = tolower (string[src++]); + } + break; + default: + internal_error ("impossible case transform \"%c\"", + transform); + } + } /* depth == 0 */ + else + { + string[dst++] = string[src++]; + } + } /* not blank */ + } /* switch on current character */ + + } /* while not at end of string */ + +} /* bt_change_case */ diff --git a/src/translators/btparse/sym.c b/src/translators/btparse/sym.c new file mode 100644 index 0000000..2426dea --- /dev/null +++ b/src/translators/btparse/sym.c @@ -0,0 +1,372 @@ +/* + * Simple symbol table manager using coalesced chaining to resolve collisions + * + * Doubly-linked lists are used for fast removal of entries. + * + * 'sym.h' must have a definition for typedef "Sym". Sym must include at + * minimum the following fields: + * + * ... + * char *symbol; + * struct ... *next, *prev, **head, *scope; + * unsigned int hash; + * ... + * + * 'template.h' can be used as a template to create a 'sym.h'. + * + * 'head' is &(table[hash(itself)]). + * The hash table is not resizable at run-time. + * The scope field is used to link all symbols of a current scope together. + * Scope() sets the current scope (linked list) to add symbols to. + * Any number of scopes can be handled. The user passes the address of + * a pointer to a symbol table + * entry (INITIALIZED TO NULL first time). + * + * Available Functions: + * + * zzs_init(s1,s2) -- Create hash table with size s1, string table size s2. + * zzs_done() -- Free hash and string table created with zzs_init(). + * zzs_add(key,rec)-- Add 'rec' with key 'key' to the symbol table. + * zzs_newadd(key) -- create entry; add using 'key' to the symbol table. + * zzs_get(key) -- Return pointer to last record entered under 'key' + * Else return NULL + * zzs_del(p) -- Unlink the entry associated with p. This does + * NOT free 'p' and DOES NOT remove it from a scope + * list. If it was a part of your intermediate code + * tree or another structure. It will still be there. + * It is only removed from further consideration + * by the symbol table. + * zzs_keydel(s) -- Unlink the entry associated with key s. + * Calls zzs_del(p) to unlink. + * zzs_scope(sc) -- Specifies that everything added to the symbol + * table with zzs_add() is added to the list (scope) + * 'sc'. 'sc' is of 'Sym **sc' type and must be + * initialized to NULL before trying to add anything + * to it (passing it to zzs_scope()). Scopes can be + * switched at any time and merely links a set of + * symbol table entries. If a NULL pointer is + * passed, the current scope is returned. + * zzs_rmscope(sc) -- Remove (zzs_del()) all elements of scope 'sc' + * from the symbol table. The entries are NOT + * free()'d. A pointer to the first + * element in the "scope" is returned. The user + * can then manipulate the list as he/she chooses + * (such as freeing them all). NOTE that this + * function sets your scope pointer to NULL, + * but returns a pointer to the list for you to use. + * zzs_stat() -- Print out the symbol table and some relevant stats. + * zzs_new(key) -- Create a new record with calloc() of type Sym. + * Add 'key' to the string table and make the new + * records 'symbol' pointer point to it. + * zzs_strdup(s) -- Add s to the string table and return a pointer + * to it. Very fast allocation routine + * and does not require strlen() nor calloc(). + * + * Example: + * + * #include <stdio.h> + * #include "sym.h" + * + * main() + * { + * Sym *scope1=NULL, *scope2=NULL, *a, *p; + * + * zzs_init(101, 100); + * + * a = zzs_new("Apple"); zzs_add(a->symbol, a); -- No scope + * zzs_scope( &scope1 ); -- enter scope 1 + * a = zzs_new("Plum"); zzs_add(a->symbol, a); + * zzs_scope( &scope2 ); -- enter scope 2 + * a = zzs_new("Truck"); zzs_add(a->symbol, a); + * + * p = zzs_get("Plum"); + * if ( p == NULL ) fprintf(stderr, "Hmmm...Can't find 'Plum'\n"); + * + * p = zzs_rmscope(&scope1) + * for (; p!=NULL; p=p->scope) {printf("Scope1: %s\n", p->symbol);} + * p = zzs_rmscope(&scope2) + * for (; p!=NULL; p=p->scope) {printf("Scope2: %s\n", p->symbol);} + * } + * + * Terence Parr + * Purdue University + * February 1990 + * + * CHANGES + * + * Terence Parr + * May 1991 + * Renamed functions to be consistent with ANTLR + * Made HASH macro + * Added zzs_keydel() + * Added zzs_newadd() + * Fixed up zzs_stat() + * + * July 1991 + * Made symbol table entry save its hash code for fast comparison + * during searching etc... + */ + +/*#include "bt_config.h"*/ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#ifdef MEMCHK +#include "trax.h" +#endif +#include "sym.h" +/*#include "my_dmalloc.h"*/ + +#define StrSame 0 + +static Sym **CurScope = NULL; +static unsigned size = 0; +static Sym **table=NULL; +static char *strings; +static char *strp; +static int strsize = 0; + +void +zzs_init(int sz, int strs) +{ + if ( sz <= 0 || strs <= 0 ) return; + table = (Sym **) calloc(sz, sizeof(Sym *)); + if ( table == NULL ) + { + fprintf(stderr, "Cannot allocate table of size %d\n", sz); + exit(1); + } + strings = (char *) calloc(strs, sizeof(char)); + if ( strings == NULL ) + { + fprintf(stderr, "Cannot allocate string table of size %d\n", strs); + exit(1); + } + size = sz; + strsize = strs; + strp = strings; +} + + +void +zzs_free(void) +{ + unsigned i; + Sym *cur, *next; + + for (i = 0; i < size; i++) + { + cur = table[i]; + while (cur != NULL) + { + next = cur->next; + free (cur); + cur = next; + } + } +} + + +void +zzs_done(void) +{ + if ( table != NULL ) free( table ); + if ( strings != NULL ) free( strings ); +} + +void +zzs_add(char *key, register Sym *rec) +{ + register unsigned int h=0; + register char *p=key; + + HASH_FUN(p, h); + rec->hash = h; /* save hash code for fast comp later */ + h %= size; + + if ( CurScope != NULL ) {rec->scope = *CurScope; *CurScope = rec;} + rec->next = table[h]; /* Add to doubly-linked list */ + rec->prev = NULL; + if ( rec->next != NULL ) (rec->next)->prev = rec; + table[h] = rec; + rec->head = &(table[h]); +} + +Sym * +zzs_get(char *key) +{ + register unsigned int h=0; + register char *p=key; + register Sym *q; + + HASH_FUN(p, h); + + for (q = table[h%size]; q != NULL; q = q->next) + { + if ( q->hash == h ) /* do we even have a chance of matching? */ + if ( strcasecmp(key, q->symbol) == StrSame ) return( q ); + } + return( NULL ); +} + +/* + * Unlink p from the symbol table. Hopefully, it's actually in the + * symbol table. + * + * If p is not part of a bucket chain of the symbol table, bad things + * will happen. + * + * Will do nothing if all list pointers are NULL + */ +void +zzs_del(register Sym *p) +{ + if ( p == NULL ) {fprintf(stderr, "zzs_del(NULL)\n"); exit(1);} + if ( p->prev == NULL ) /* Head of list */ + { + register Sym **t = p->head; + + if ( t == NULL ) return; /* not part of symbol table */ + (*t) = p->next; + if ( (*t) != NULL ) (*t)->prev = NULL; + } + else + { + (p->prev)->next = p->next; + if ( p->next != NULL ) (p->next)->prev = p->prev; + } + p->next = p->prev = NULL; /* not part of symbol table anymore */ + p->head = NULL; +} + +void +zzs_keydel(char *key) +{ + Sym *p = zzs_get(key); + + if ( p != NULL ) zzs_del( p ); +} + +/* S c o p e S t u f f */ + +/* Set current scope to 'scope'; return current scope if 'scope' == NULL */ +Sym ** +zzs_scope(Sym **scope) +{ + if ( scope == NULL ) return( CurScope ); + CurScope = scope; + return( scope ); +} + +/* Remove a scope described by 'scope'. Return pointer to 1st element in scope */ +Sym * +zzs_rmscope(register Sym **scope) +{ + register Sym *p; + Sym *start; + + if ( scope == NULL ) return(NULL); + start = p = *scope; + for (; p != NULL; p=p->scope) { zzs_del( p ); } + *scope = NULL; + return( start ); +} + +void +zzs_stat(void) +{ + static unsigned short count[20]; + unsigned int i,n=0,low=0, hi=0; + register Sym **p; + float avg=0.0; + + for (i=0; i<20; i++) count[i] = 0; + for (p=table; p<&(table[size]); p++) + { + register Sym *q = *p; + unsigned int len; + + if ( q != NULL && low==0 ) low = p-table; + len = 0; + if ( q != NULL ) printf("[%d]", p-table); + while ( q != NULL ) + { + len++; + n++; + printf(" %s", q->symbol); + q = q->next; + if ( q == NULL ) printf("\n"); + } + if ( len>=20 ) printf("zzs_stat: count table too small\n"); + else count[len]++; + if ( *p != NULL ) hi = p-table; + } + + printf("Storing %d recs used %d hash positions out of %d\n", + n, size-count[0], size); + printf("%f %% utilization\n", + ((float)(size-count[0]))/((float)size)); + for (i=0; i<20; i++) + { + if ( count[i] != 0 ) + { + avg += (((float)(i*count[i]))/((float)n)) * i; + printf("Buckets of len %d == %d (%f %% of recs)\n", + i, count[i], 100.0*((float)(i*count[i]))/((float)n)); + } + } + printf("Avg bucket length %f\n", avg); + printf("Range of hash function: %d..%d\n", low, hi); +} + +/* + * Given a string, this function allocates and returns a pointer to a + * symbol table record whose "symbol" pointer is reset to a position + * in the string table. + */ +Sym * +zzs_new(char *text) +{ + Sym *p; + char *zzs_strdup(register char *s); + + if ( (p = (Sym *) calloc(1,sizeof(Sym))) == 0 ) + { + fprintf(stderr,"Out of memory\n"); + exit(1); + } + p->symbol = zzs_strdup(text); + + return p; +} + +/* create a new symbol table entry and add it to the symbol table */ +Sym * +zzs_newadd(char *text) +{ + Sym *p = zzs_new(text); + if ( p != NULL ) zzs_add(text, p); + return p; +} + +/* Add a string to the string table and return a pointer to it. + * Bump the pointer into the string table to next avail position. + */ +char * +zzs_strdup(register char *s) +{ + register char *start=strp; + + while ( *s != '\0' ) + { + if ( strp >= &(strings[strsize-2]) ) + { + fprintf(stderr, "sym: string table overflow (%d chars)\n", strsize); + exit(-1); + } + *strp++ = *s++; + } + *strp++ = '\0'; + + return( start ); +} diff --git a/src/translators/btparse/sym.h b/src/translators/btparse/sym.h new file mode 100644 index 0000000..78983d1 --- /dev/null +++ b/src/translators/btparse/sym.h @@ -0,0 +1,33 @@ +#include <ctype.h> + +/* + * Declarations for symbol table in sym.c + */ + +/* define some hash function */ +#ifndef HASH_FUN +#define HASH_FUN(p, h) while ( *p != '\0' ) h = (h<<1) + tolower (*p++); +#endif + +/* minimum symbol table record */ +typedef struct _sym +{ + char *symbol; /* the macro name */ + char *text; /* its expansion */ + struct _sym *next, *prev, **head, *scope; + unsigned int hash; +} Sym, *SymPtr; + +void zzs_init(int, int); +void zzs_free(void); +void zzs_done(void); +void zzs_add(char *, Sym *); +Sym *zzs_get(char *); +void zzs_del(Sym *); +void zzs_keydel(char *); +Sym **zzs_scope(Sym **); +Sym *zzs_rmscope(Sym **); +void zzs_stat(void); +Sym *zzs_new(char *); +Sym *zzs_newadd(char *); +char *zzs_strdup(char *); diff --git a/src/translators/btparse/tex_tree.c b/src/translators/btparse/tex_tree.c new file mode 100644 index 0000000..0d7d33d --- /dev/null +++ b/src/translators/btparse/tex_tree.c @@ -0,0 +1,414 @@ +/* ------------------------------------------------------------------------ +@NAME : tex_tree.c +@DESCRIPTION: Functions for dealing with strings of TeX code: converting + them to tree representation, traversing the trees to glean + useful information, and converting back to string form. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/05/29, Greg Ward +@MODIFIED : +@VERSION : $Id: tex_tree.c,v 1.4 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "error.h" +#include "btparse.h" +/*#include "my_dmalloc.h"*/ + +/* blech! temp hack until I make error.c perfect and magical */ +#define string_warning(w) fprintf (stderr, w); + +typedef struct treestack_s +{ + bt_tex_tree * node; + struct treestack_s + * prev, + * next; +} treestack; + + +/* ---------------------------------------------------------------------- + * Stack manipulation functions + */ + +/* ------------------------------------------------------------------------ +@NAME : push_treestack() +@INPUT : *stack + node +@OUTPUT : *stack +@RETURNS : +@DESCRIPTION: Creates and initializes new node in a stack, and pushes it + onto the stack. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +push_treestack (treestack **stack, bt_tex_tree *node) +{ + treestack *newtop; + + newtop = (treestack *) malloc (sizeof (treestack)); + newtop->node = node; + newtop->next = NULL; + newtop->prev = *stack; + + if (*stack != NULL) /* stack already has some entries */ + { + (*stack)->next = newtop; + *stack = newtop; + } + + *stack = newtop; + +} /* push_treestack() */ + + +/* ------------------------------------------------------------------------ +@NAME : pop_treestack +@INPUT : *stack +@OUTPUT : *stack +@RETURNS : +@DESCRIPTION: Pops an entry off of a stack of tex_tree nodes, frees up + the wrapper treestack node, and returns the popped tree node. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static bt_tex_tree * +pop_treestack (treestack **stack) +{ + treestack * oldtop; + bt_tex_tree * node; + + if (*stack == NULL) + internal_error ("attempt to pop off empty stack"); + oldtop = (*stack)->prev; + node = (*stack)->node; + free (*stack); + if (oldtop != NULL) + oldtop->next = NULL; + *stack = oldtop; + return node; + +} /* pop_treestack() */ + + +/* ---------------------------------------------------------------------- + * Tree creation/destruction functions + */ + +/* ------------------------------------------------------------------------ +@NAME : new_tex_tree +@INPUT : start +@OUTPUT : +@RETURNS : pointer to newly-allocated node +@DESCRIPTION: Allocates and initializes a bt_tex_tree node. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static bt_tex_tree * +new_tex_tree (char *start) +{ + bt_tex_tree * node; + + node = (bt_tex_tree *) malloc (sizeof (bt_tex_tree)); + node->start = start; + node->len = 0; + node->child = node->next = NULL; + return node; +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_build_tex_tree +@INPUT : string +@OUTPUT : +@RETURNS : pointer to a complete tree; call bt_free_tex_tree() to free + the entire tree +@DESCRIPTION: Traverses a string looking for TeX groups ({...}), and builds + a tree containing pointers into the string and describing + its brace-structure. +@GLOBALS : +@CALLS : +@CALLERS : +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +bt_tex_tree * +bt_build_tex_tree (char * string) +{ + int i; + int depth; + int len; + bt_tex_tree + * top, + * cur, + * new; + treestack + * stack; + + i = 0; + depth = 0; + len = strlen (string); + top = new_tex_tree (string); + stack = NULL; + + cur = top; + + while (i < len) + { + switch (string[i]) + { + case '{': /* go one level deeper */ + { + if (i == len-1) /* open brace in last character? */ + { + string_warning ("unbalanced braces: { at end of string"); + goto error; + } + + new = new_tex_tree (string+i+1); + cur->child = new; + push_treestack (&stack, cur); + cur = new; + depth++; + break; + } + case '}': /* pop level(s) off */ + { + while (i < len && string[i] == '}') + { + if (stack == NULL) + { + string_warning ("unbalanced braces: extra }"); + goto error; + } + cur = pop_treestack (&stack); + depth--; + i++; + } + i--; + + if (i == len-1) /* reached end of string? */ + { + if (depth > 0) /* but not at depth 0 */ + { + string_warning ("unbalanced braces: not enough }'s"); + goto error; + } + + /* + * if we get here, do nothing -- we've reached the end of + * the string and are at depth 0, so will just fall out + * of the while loop at the end of this iteration + */ + } + else /* still have characters left */ + { /* to worry about */ + new = new_tex_tree (string+i+1); + cur->next = new; + cur = new; + } + + break; + } + default: + { + cur->len++; + } + + } /* switch */ + + i++; + + } /* while i */ + + if (depth > 0) + { + string_warning ("unbalanced braces (not enough }'s)"); + goto error; + } + + return top; + +error: + bt_free_tex_tree (&top); + return NULL; + +} /* bt_build_tex_tree() */ + + +/* ------------------------------------------------------------------------ +@NAME : bt_free_tex_tree +@INPUT : *top +@OUTPUT : *top (set to NULL after it's free()'d) +@RETURNS : +@DESCRIPTION: Frees up an entire tree created by bt_build_tex_tree(). +@GLOBALS : +@CALLS : itself, free() +@CALLERS : +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_free_tex_tree (bt_tex_tree **top) +{ + if ((*top)->child) bt_free_tex_tree (&(*top)->child); + if ((*top)->next) bt_free_tex_tree (&(*top)->next); + free (*top); + *top = NULL; +} + + + +/* ---------------------------------------------------------------------- + * Tree traversal functions + */ + +/* ------------------------------------------------------------------------ +@NAME : bt_dump_tex_tree +@INPUT : node + depth + stream +@OUTPUT : +@RETURNS : +@DESCRIPTION: Dumps a TeX tree: one node per line, depth indented according + to depth. +@GLOBALS : +@CALLS : itself +@CALLERS : +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +void +bt_dump_tex_tree (bt_tex_tree *node, int depth, FILE *stream) +{ + char buf[256]; + + if (node == NULL) + return; + + if (node->len > 255) + internal_error ("augughgh! buf too small"); + strncpy (buf, node->start, node->len); + buf[node->len] = (char) 0; + + fprintf (stream, "%*s[%s]\n", depth*2, "", buf); + + bt_dump_tex_tree (node->child, depth+1, stream); + bt_dump_tex_tree (node->next, depth, stream); + +} + + +/* ------------------------------------------------------------------------ +@NAME : count_length +@INPUT : node +@OUTPUT : +@RETURNS : +@DESCRIPTION: Counts the total number of characters that will be needed + to print a string reconstructed from a TeX tree. (Length + of string in each node, plus two [{ and }] for each down + edge.) +@GLOBALS : +@CALLS : itself +@CALLERS : bt_flatten_tex_tree +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static int +count_length (bt_tex_tree *node) +{ + if (node == NULL) return 0; + return + node->len + + (node->child ? 2 : 0) + + count_length (node->child) + + count_length (node->next); +} + + +/* ------------------------------------------------------------------------ +@NAME : flatten_tree +@INPUT : node + *offset +@OUTPUT : *buf + *offset +@RETURNS : +@DESCRIPTION: Dumps a reconstructed string ("flat" representation of the + tree) into a pre-allocated buffer, starting at a specified + offset. +@GLOBALS : +@CALLS : itself +@CALLERS : bt_flatten_tex_tree +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +static void +flatten_tree (bt_tex_tree *node, char *buf, int *offset) +{ + strncpy (buf + *offset, node->start, node->len); + *offset += node->len; + + if (node->child) + { + buf[(*offset)++] = '{'; + flatten_tree (node->child, buf, offset); + buf[(*offset)++] = '}'; + } + + if (node->next) + { + flatten_tree (node->next, buf, offset); + } +} + + +/* ------------------------------------------------------------------------ +@NAME : bt_flatten_tex_tree +@INPUT : top +@OUTPUT : +@RETURNS : flattened string representation of the tree (as a string + allocated with malloc(), so you should free() it when + you're done with it) +@DESCRIPTION: Counts the number of characters needed for a "flat" + string representation of a tree, allocates a string of + that size, and generates the string. +@GLOBALS : +@CALLS : count_length, flatten_tree +@CALLERS : +@CREATED : 1997/05/29, GPW +@MODIFIED : +-------------------------------------------------------------------------- */ +char * +bt_flatten_tex_tree (bt_tex_tree *top) +{ + int len; + int offset; + char * buf; + + len = count_length (top); + buf = (char *) malloc (sizeof (char) * (len+1)); + offset = 0; + flatten_tree (top, buf, &offset); + return buf; +} diff --git a/src/translators/btparse/tokens.h b/src/translators/btparse/tokens.h new file mode 100644 index 0000000..6f9405a --- /dev/null +++ b/src/translators/btparse/tokens.h @@ -0,0 +1,41 @@ +#ifndef tokens_h +#define tokens_h +/* tokens.h -- List of labelled tokens and stuff + * + * Generated from: bibtex.g + * + * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994 + * Purdue University Electrical Engineering + * ANTLR Version 1.33 + */ +#define zzEOF_TOKEN 1 +#define AT 2 +#define COMMENT 4 +#define NUMBER 9 +#define NAME 10 +#define LBRACE 11 +#define RBRACE 12 +#define ENTRY_OPEN 13 +#define ENTRY_CLOSE 14 +#define EQUALS 15 +#define HASH 16 +#define COMMA 17 +#define STRING 25 + +void bibfile(AST**_root); +void entry(AST**_root); +void body(AST**_root, bt_metatype metatype ); +void contents(AST**_root, bt_metatype metatype ); +void fields(AST**_root); +void field(AST**_root); +void value(AST**_root); +void simple_value(AST**_root); + +#endif +extern SetWordType zzerr1[]; +extern SetWordType zzerr2[]; +extern SetWordType zzerr3[]; +extern SetWordType zzerr4[]; +extern SetWordType setwd1[]; +extern SetWordType zzerr5[]; +extern SetWordType setwd2[]; diff --git a/src/translators/btparse/traversal.c b/src/translators/btparse/traversal.c new file mode 100644 index 0000000..c7e10a2 --- /dev/null +++ b/src/translators/btparse/traversal.c @@ -0,0 +1,187 @@ +/* ------------------------------------------------------------------------ +@NAME : traversal.c +@DESCRIPTION: Routines for traversing the AST for a single entry. +@GLOBALS : +@CALLS : +@CREATED : 1997/01/21, Greg Ward +@MODIFIED : +@VERSION : $Id: traversal.c,v 1.17 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ +/*#include "bt_config.h"*/ +#include <stdlib.h> +#include "btparse.h" +#include "parse_auxiliary.h" +#include "prototypes.h" +/*#include "my_dmalloc.h"*/ + + +AST *bt_next_entry (AST *entry_list, AST *prev_entry) +{ + if (entry_list == NULL || entry_list->nodetype != BTAST_ENTRY) + return NULL; + + if (prev_entry) + { + if (prev_entry->nodetype != BTAST_ENTRY) + return NULL; + else + return prev_entry->right; + } + else + return entry_list; +} + + +bt_metatype bt_entry_metatype (AST *entry) +{ + if (!entry) return BTE_UNKNOWN; + if (entry->nodetype != BTAST_ENTRY) + return BTE_UNKNOWN; + else + return entry->metatype; +} + + +char *bt_entry_type (AST *entry) +{ + if (!entry) return NULL; + if (entry->nodetype != BTAST_ENTRY) + return NULL; + else + return entry->text; +} + + +char *bt_entry_key (AST *entry) +{ + if (entry->metatype == BTE_REGULAR && + entry->down && entry->down->nodetype == BTAST_KEY) + { + return entry->down->text; + } + else + { + return NULL; + } +} + + +AST *bt_next_field (AST *entry, AST *prev, char **name) +{ + AST *field; + bt_metatype metatype; + + *name = NULL; + if (!entry || !entry->down) return NULL; /* protect against empty entry */ + + metatype = entry->metatype; + if (metatype != BTE_MACRODEF && metatype != BTE_REGULAR) + return NULL; + + if (prev == NULL) /* no previous field -- they must */ + { /* want the first one */ + field = entry->down; + if (metatype == BTE_REGULAR && field->nodetype == BTAST_KEY) + field = field->right; /* skip over citation key if present */ + } + else /* they really do want the next one */ + { + field = prev->right; + } + + if (!field) return NULL; /* protect against field-less entry */ + if (name) *name = field->text; + return field; +} /* bt_next_field() */ + + +AST *bt_next_macro (AST *entry, AST *prev, char **name) +{ + return bt_next_field (entry, prev, name); +} + + +AST *bt_next_value (AST *top, AST *prev, bt_nodetype *nodetype, char **text) +{ + bt_nodetype nt; /* type of `top' node (to check) */ + bt_metatype mt; + AST * value; + + if (nodetype) *nodetype = BTAST_BOGUS; + if (text) *text = NULL; + + if (!top) return NULL; + /* get_node_type (top, &nt, &mt); */ + nt = top->nodetype; + mt = top->metatype; + + if ((nt == BTAST_FIELD) || + (nt == BTAST_ENTRY && (mt == BTE_COMMENT || mt == BTE_PREAMBLE))) + { + if (prev == NULL) /* no previous value -- give 'em */ + { /* the first one */ + value = top->down; + if (!value) return NULL; + if (nodetype) *nodetype = value->nodetype; + } + else + { + value = prev->right; + if (!value) return NULL; + if (nodetype) *nodetype = value->nodetype; + } + + if (nt == BTAST_ENTRY && value->nodetype != BTAST_STRING) + internal_error ("found comment or preamble with non-string value"); + } + else + { + value = NULL; + } + + if (text && value) *text = value->text; + + return value; +} /* bt_next_value() */ + + +char *bt_get_text (AST *node) +{ + ushort pp_options = BTO_FULL; /* options for full processing: */ + /* expand macros, paste strings, */ + /* collapse whitespace */ + bt_nodetype nt; + bt_metatype mt; + + nt = node->nodetype; + mt = node->metatype; + + if (nt == BTAST_FIELD) + { +#if DEBUG + char *value; + + dump_ast ("bt_get_text (pre): node =\n", node); + value = bt_postprocess_field (node, pp_options, FALSE); + dump_ast ("bt_get_text (post): node =\n", node); + return value; +#else + return bt_postprocess_field (node, pp_options, FALSE); +#endif + } + else if (nt == BTAST_ENTRY && (mt == BTE_COMMENT || mt == BTE_PREAMBLE)) + { + return bt_postprocess_value (node->down, pp_options, FALSE); + } + else + { + return NULL; + } +} diff --git a/src/translators/btparse/util.c b/src/translators/btparse/util.c new file mode 100644 index 0000000..1330176 --- /dev/null +++ b/src/translators/btparse/util.c @@ -0,0 +1,79 @@ +/* ------------------------------------------------------------------------ +@NAME : util.c +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Miscellaneous utility functions. So far, just: + strlwr + strupr +@CREATED : Summer 1996, Greg Ward +@MODIFIED : +@VERSION : $Id: util.c,v 1.6 1999/11/29 01:13:10 greg Rel $ +@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved. + + This file is part of the btparse library. This library is + free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. +-------------------------------------------------------------------------- */ + +/*#include "bt_config.h"*/ +#include <string.h> +#include <ctype.h> +#include "prototypes.h" +/*#include "my_dmalloc.h"*/ + +/* ------------------------------------------------------------------------ +@NAME : strlwr() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Converts a string to lowercase in place. +@GLOBALS : +@CALLS : +@CREATED : 1996/01/06, GPW +@MODIFIED : +@COMMENTS : This should work the same as strlwr() in DOS compilers -- + why this isn't mandated by ANSI is a mystery to me... +-------------------------------------------------------------------------- */ +#if !HAVE_STRLWR +char *strlwr (char *s) +{ + int len, i; + + len = strlen (s); + for (i = 0; i < len; i++) + s[i] = tolower (s[i]); + + return s; +} +#endif + + + +/* ------------------------------------------------------------------------ +@NAME : strupr() +@INPUT : +@OUTPUT : +@RETURNS : +@DESCRIPTION: Converts a string to uppercase in place. +@GLOBALS : +@CALLS : +@CREATED : 1996/01/06, GPW +@MODIFIED : +@COMMENTS : This should work the same as strupr() in DOS compilers -- + why this isn't mandated by ANSI is a mystery to me... +-------------------------------------------------------------------------- */ +#if !HAVE_STRUPR +char *strupr (char *s) +{ + int len, i; + + len = strlen (s); + for (i = 0; i < len; i++) + s[i] = toupper (s[i]); + + return s; +} +#endif diff --git a/src/translators/csvexporter.cpp b/src/translators/csvexporter.cpp new file mode 100644 index 0000000..bb206e1 --- /dev/null +++ b/src/translators/csvexporter.cpp @@ -0,0 +1,190 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "csvexporter.h" +#include "../document.h" +#include "../collection.h" +#include "../filehandler.h" + +#include <klocale.h> +#include <kdebug.h> +#include <klineedit.h> +#include <kconfig.h> + +#include <qgroupbox.h> +#include <qcheckbox.h> +#include <qlayout.h> +#include <qbuttongroup.h> +#include <qradiobutton.h> +#include <qwhatsthis.h> + +using Tellico::Export::CSVExporter; + +CSVExporter::CSVExporter() : Tellico::Export::Exporter(), + m_includeTitles(true), + m_delimiter(QChar(',')), + m_widget(0) { +} + +QString CSVExporter::formatString() const { + return i18n("CSV"); +} + +QString CSVExporter::fileFilter() const { + return i18n("*.csv|CSV Files (*.csv)") + QChar('\n') + i18n("*|All Files"); +} + +QString& CSVExporter::escapeText(QString& text_) { + bool quotes = false; + if(text_.find('"') != -1) { + quotes = true; + // quotation marks will be escaped by using a double pair + text_.replace('"', QString::fromLatin1("\"\"")); + } + // if the text contains quotes or the delimiter, it needs to be surrounded by quotes + if(quotes || text_.find(m_delimiter) != -1) { + text_.prepend('"'); + text_.append('"'); + } + return text_; +} + +bool CSVExporter::exec() { + if(!collection()) { + return false; + } + + QString text; + + Data::FieldVec fields = collection()->fields(); + Data::FieldVec::Iterator fIt; + + if(m_includeTitles) { + for(fIt = fields.begin(); fIt != fields.end(); ++fIt) { + QString title = fIt->title(); + text += escapeText(title); + if(!fIt.nextEnd()) { + text += m_delimiter; + } + } + text += '\n'; + } + + bool format = options() & Export::ExportFormatted; + + QString tmp; + for(Data::EntryVec::ConstIterator entryIt = entries().begin(); entryIt != entries().end(); ++entryIt) { + for(fIt = fields.begin(); fIt != fields.end(); ++fIt) { + tmp = entryIt->field(fIt->name(), format); + text += escapeText(tmp); + if(!fIt.nextEnd()) { + text += m_delimiter; + } + } + fIt = fields.begin(); + text += '\n'; + } + + return FileHandler::writeTextURL(url(), text, options() & ExportUTF8, options() & Export::ExportForce); +} + +QWidget* CSVExporter::widget(QWidget* parent_, const char* name_/*=0*/) { + if(m_widget && m_widget->parent() == parent_) { + return m_widget; + } + + m_widget = new QWidget(parent_, name_); + QVBoxLayout* l = new QVBoxLayout(m_widget); + + QGroupBox* box = new QGroupBox(1, Qt::Horizontal, i18n("CSV Options"), m_widget); + l->addWidget(box); + + m_checkIncludeTitles = new QCheckBox(i18n("Include field titles as column headers"), box); + m_checkIncludeTitles->setChecked(m_includeTitles); + QWhatsThis::add(m_checkIncludeTitles, i18n("If checked, a header row will be added with the " + "field titles.")); + + QButtonGroup* delimiterGroup = new QButtonGroup(0, Qt::Vertical, i18n("Delimiter"), box); + QGridLayout* m_delimiterGroupLayout = new QGridLayout(delimiterGroup->layout()); + m_delimiterGroupLayout->setAlignment(Qt::AlignTop); + QWhatsThis::add(delimiterGroup, i18n("In addition to a comma, other characters may be used as " + "a delimiter, separating each value in the file.")); + + m_radioComma = new QRadioButton(delimiterGroup); + m_radioComma->setText(i18n("Comma")); + m_radioComma->setChecked(true); + QWhatsThis::add(m_radioComma, i18n("Use a comma as the delimiter.")); + m_delimiterGroupLayout->addWidget(m_radioComma, 0, 0); + + m_radioSemicolon = new QRadioButton( delimiterGroup); + m_radioSemicolon->setText(i18n("Semicolon")); + QWhatsThis::add(m_radioSemicolon, i18n("Use a semi-colon as the delimiter.")); + m_delimiterGroupLayout->addWidget(m_radioSemicolon, 0, 1); + + m_radioTab = new QRadioButton(delimiterGroup); + m_radioTab->setText(i18n("Tab")); + QWhatsThis::add(m_radioTab, i18n("Use a tab as the delimiter.")); + m_delimiterGroupLayout->addWidget(m_radioTab, 1, 0); + + m_radioOther = new QRadioButton(delimiterGroup); + m_radioOther->setText(i18n("Other")); + QWhatsThis::add(m_radioOther, i18n("Use a custom string as the delimiter.")); + m_delimiterGroupLayout->addWidget(m_radioOther, 1, 1); + + m_editOther = new KLineEdit(delimiterGroup); + m_editOther->setEnabled(m_radioOther->isChecked()); + QWhatsThis::add(m_editOther, i18n("A custom string, such as a colon, may be used as a delimiter.")); + m_delimiterGroupLayout->addWidget(m_editOther, 1, 2); + QObject::connect(m_radioOther, SIGNAL(toggled(bool)), + m_editOther, SLOT(setEnabled(bool))); + + if(m_delimiter == QChar(',')) { + m_radioComma->setChecked(true); + } else if(m_delimiter == QChar(';')) { + m_radioSemicolon->setChecked(true); + } else if(m_delimiter == QChar('\t')) { + m_radioTab->setChecked(true); + } else if(!m_delimiter.isEmpty()) { + m_radioOther->setChecked(true); + m_editOther->setEnabled(true); + m_editOther->setText(m_delimiter); + } + + l->addStretch(1); + return m_widget; +} + +void CSVExporter::readOptions(KConfig* config_) { + KConfigGroup group(config_, QString::fromLatin1("ExportOptions - %1").arg(formatString())); + m_includeTitles = group.readBoolEntry("Include Titles", m_includeTitles); + m_delimiter = group.readEntry("Delimiter", m_delimiter); +} + +void CSVExporter::saveOptions(KConfig* config_) { + m_includeTitles = m_checkIncludeTitles->isChecked(); + if(m_radioComma->isChecked()) { + m_delimiter = QChar(','); + } else if(m_radioSemicolon->isChecked()) { + m_delimiter = QChar(';'); + } else if(m_radioTab->isChecked()) { + m_delimiter = QChar('\t'); + } else { + m_delimiter = m_editOther->text(); + } + + KConfigGroup group(config_, QString::fromLatin1("ExportOptions - %1").arg(formatString())); + group.writeEntry("Include Titles", m_includeTitles); + group.writeEntry("Delimiter", m_delimiter); +} + +#include "csvexporter.moc" diff --git a/src/translators/csvexporter.h b/src/translators/csvexporter.h new file mode 100644 index 0000000..23624e3 --- /dev/null +++ b/src/translators/csvexporter.h @@ -0,0 +1,63 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef CSVEXPORTER_H +#define CSVEXPORTER_H + +class KLineEdit; +class KConfig; + +class QWidget; +class QCheckBox; +class QRadioButton; + +#include "exporter.h" + +namespace Tellico { + namespace Export { + +/** + * @author Robby Stephenson + */ +class CSVExporter : public Exporter { +Q_OBJECT + +public: + CSVExporter(); + + virtual bool exec(); + virtual QString formatString() const; + virtual QString fileFilter() const; + + virtual QWidget* widget(QWidget* parent, const char* name=0); + virtual void readOptions(KConfig* config); + virtual void saveOptions(KConfig* config); + +private: + QString& escapeText(QString& text); + + bool m_includeTitles; + QString m_delimiter; + + QWidget* m_widget; + QCheckBox* m_checkIncludeTitles; + QRadioButton* m_radioComma; + QRadioButton* m_radioSemicolon; + QRadioButton* m_radioTab; + QRadioButton* m_radioOther; + KLineEdit* m_editOther; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/csvimporter.cpp b/src/translators/csvimporter.cpp new file mode 100644 index 0000000..f0c0900 --- /dev/null +++ b/src/translators/csvimporter.cpp @@ -0,0 +1,552 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "csvimporter.h" +#include "translators.h" // needed for ImportAction +#include "../collectionfieldsdialog.h" +#include "../document.h" +#include "../collection.h" +#include "../progressmanager.h" +#include "../tellico_debug.h" +#include "../collectionfactory.h" +#include "../gui/collectiontypecombo.h" +#include "../latin1literal.h" +#include "../stringset.h" + +extern "C" { +#include "libcsv.h" +} + +#include <klineedit.h> +#include <kcombobox.h> +#include <knuminput.h> +#include <kpushbutton.h> +#include <kapplication.h> +#include <kiconloader.h> +#include <kconfig.h> +#include <kmessagebox.h> + +#include <qgroupbox.h> +#include <qlayout.h> +#include <qhbox.h> +#include <qlabel.h> +#include <qcheckbox.h> +#include <qbuttongroup.h> +#include <qradiobutton.h> +#include <qwhatsthis.h> +#include <qtable.h> +#include <qvaluevector.h> +#include <qregexp.h> + +using Tellico::Import::CSVImporter; + +static void writeToken(char* buffer, size_t len, void* data); +static void writeRow(char buffer, void* data); + +class CSVImporter::Parser { +public: + Parser(const QString& str) : stream(new QTextIStream(&str)) { csv_init(&parser, 0); } + ~Parser() { csv_free(parser); delete stream; stream = 0; } + + void setDelimiter(const QString& s) { Q_ASSERT(s.length() == 1); csv_set_delim(parser, s[0].latin1()); } + void reset(const QString& str) { delete stream; stream = new QTextIStream(&str); }; + bool hasNext() { return !stream->atEnd(); } + void skipLine() { stream->readLine(); } + + void addToken(const QString& t) { tokens += t; } + void setRowDone(bool b) { done = b; } + + QStringList nextTokens() { + tokens.clear(); + done = false; + while(hasNext() && !done) { + QCString line = stream->readLine().utf8() + '\n'; // need the eol char + csv_parse(parser, line, line.length(), &writeToken, &writeRow, this); + } + csv_fini(parser, &writeToken, &writeRow, this); + return tokens; + } + +private: + struct csv_parser* parser; + QTextIStream* stream; + QStringList tokens; + bool done; +}; + +static void writeToken(char* buffer, size_t len, void* data) { + CSVImporter::Parser* p = static_cast<CSVImporter::Parser*>(data); + p->addToken(QString::fromUtf8(buffer, len)); +} + +static void writeRow(char c, void* data) { + Q_UNUSED(c); + CSVImporter::Parser* p = static_cast<CSVImporter::Parser*>(data); + p->setRowDone(true); +} + +CSVImporter::CSVImporter(const KURL& url_) : Tellico::Import::TextImporter(url_), + m_coll(0), + m_existingCollection(0), + m_firstRowHeader(false), + m_delimiter(QString::fromLatin1(",")), + m_cancelled(false), + m_widget(0), + m_table(0), + m_hasAssignedFields(false), + m_parser(new Parser(text())) { + m_parser->setDelimiter(m_delimiter); +} + +CSVImporter::~CSVImporter() { + delete m_parser; + m_parser = 0; +} + +Tellico::Data::CollPtr CSVImporter::collection() { + // don't just check if m_coll is non-null since the collection can be created elsewhere + if(m_coll && m_coll->entryCount() > 0) { + return m_coll; + } + + if(!m_coll) { + m_coll = CollectionFactory::collection(m_comboColl->currentType(), true); + } + + const QStringList existingNames = m_coll->fieldNames(); + + QValueVector<int> cols; + QStringList names; + for(int col = 0; col < m_table->numCols(); ++col) { + QString t = m_table->horizontalHeader()->label(col); + if(m_existingCollection && m_existingCollection->fieldByTitle(t)) { + // the collection might have the right field, but a different title, say for translations + Data::FieldPtr f = m_existingCollection->fieldByTitle(t); + if(m_coll->hasField(f->name())) { + // might have different values settings + m_coll->removeField(f->name(), true /* force */); + } + m_coll->addField(new Data::Field(*f)); + cols.push_back(col); + names << f->name(); + } else if(m_coll->fieldByTitle(t)) { + cols.push_back(col); + names << m_coll->fieldNameByTitle(t); + } + } + + if(names.isEmpty()) { + myDebug() << "CSVImporter::collection() - no fields assigned" << endl; + return 0; + } + + m_parser->reset(text()); + + // if the first row are headers, skip it + if(m_firstRowHeader) { + m_parser->skipLine(); + } + + const uint numLines = text().contains('\n'); + const uint stepSize = QMAX(s_stepSize, numLines/100); + const bool showProgress = options() & ImportProgress; + + ProgressItem& item = ProgressManager::self()->newProgressItem(this, progressLabel(), true); + item.setTotalSteps(numLines); + connect(&item, SIGNAL(signalCancelled(ProgressItem*)), SLOT(slotCancel())); + ProgressItem::Done done(this); + + uint j = 0; + while(!m_cancelled && m_parser->hasNext()) { + bool empty = true; + Data::EntryPtr entry = new Data::Entry(m_coll); + QStringList values = m_parser->nextTokens(); + for(uint i = 0; i < names.size(); ++i) { +// QString value = values[cols[i]].simplifyWhiteSpace(); + QString value = values[cols[i]].stripWhiteSpace(); + bool success = entry->setField(names[i], value); + // we might need to add a new allowed value + // assume that if the user is importing the value, it should be allowed + if(!success && m_coll->fieldByName(names[i])->type() == Data::Field::Choice) { + Data::FieldPtr f = m_coll->fieldByName(names[i]); + StringSet allow; + allow.add(f->allowed()); + allow.add(value); + f->setAllowed(allow.toList()); + m_coll->modifyField(f); + success = entry->setField(names[i], value); + } + if(empty && success) { + empty = false; + } + } + if(!empty) { + m_coll->addEntries(entry); + } + + if(showProgress && j%stepSize == 0) { + ProgressManager::self()->setProgress(this, j); + kapp->processEvents(); + } + ++j; + } + + { + KConfigGroup config(KGlobal::config(), QString::fromLatin1("ImportOptions - CSV")); + config.writeEntry("Delimiter", m_delimiter); + config.writeEntry("First Row Titles", m_firstRowHeader); + } + + return m_coll; +} + +QWidget* CSVImporter::widget(QWidget* parent_, const char* name_) { + if(m_widget && m_widget->parent() == parent_) { + return m_widget; + } + + m_widget = new QWidget(parent_, name_); + QVBoxLayout* l = new QVBoxLayout(m_widget); + + QGroupBox* group = new QGroupBox(1, Qt::Horizontal, i18n("CSV Options"), m_widget); + l->addWidget(group); + + QHBox* box = new QHBox(group); + box->setSpacing(5); + QLabel* lab = new QLabel(i18n("Collection &type:"), box); + m_comboColl = new GUI::CollectionTypeCombo(box); + lab->setBuddy(m_comboColl); + QWhatsThis::add(m_comboColl, i18n("Select the type of collection being imported.")); + connect(m_comboColl, SIGNAL(activated(int)), SLOT(slotTypeChanged())); + // need a spacer + QWidget* w = new QWidget(box); + box->setStretchFactor(w, 1); + + m_checkFirstRowHeader = new QCheckBox(i18n("&First row contains field titles"), group); + QWhatsThis::add(m_checkFirstRowHeader, i18n("If checked, the first row is used as field titles.")); + connect(m_checkFirstRowHeader, SIGNAL(toggled(bool)), SLOT(slotFirstRowHeader(bool))); + + QHBox* hbox2 = new QHBox(group); + m_delimiterGroup = new QButtonGroup(0, Qt::Vertical, i18n("Delimiter"), hbox2); + QGridLayout* m_delimiterGroupLayout = new QGridLayout(m_delimiterGroup->layout(), 3, 3); + m_delimiterGroupLayout->setAlignment(Qt::AlignTop); + QWhatsThis::add(m_delimiterGroup, i18n("In addition to a comma, other characters may be used as " + "a delimiter, separating each value in the file.")); + connect(m_delimiterGroup, SIGNAL(clicked(int)), SLOT(slotDelimiter())); + + m_radioComma = new QRadioButton(m_delimiterGroup); + m_radioComma->setText(i18n("&Comma")); + m_radioComma->setChecked(true); + QWhatsThis::add(m_radioComma, i18n("Use a comma as the delimiter.")); + m_delimiterGroupLayout->addWidget(m_radioComma, 1, 0); + + m_radioSemicolon = new QRadioButton( m_delimiterGroup); + m_radioSemicolon->setText(i18n("&Semicolon")); + QWhatsThis::add(m_radioSemicolon, i18n("Use a semi-colon as the delimiter.")); + m_delimiterGroupLayout->addWidget(m_radioSemicolon, 1, 1); + + m_radioTab = new QRadioButton(m_delimiterGroup); + m_radioTab->setText(i18n("Ta&b")); + QWhatsThis::add(m_radioTab, i18n("Use a tab as the delimiter.")); + m_delimiterGroupLayout->addWidget(m_radioTab, 2, 0); + + m_radioOther = new QRadioButton(m_delimiterGroup); + m_radioOther->setText(i18n("Ot&her:")); + QWhatsThis::add(m_radioOther, i18n("Use a custom string as the delimiter.")); + m_delimiterGroupLayout->addWidget(m_radioOther, 2, 1); + + m_editOther = new KLineEdit(m_delimiterGroup); + m_editOther->setEnabled(false); + m_editOther->setFixedWidth(m_widget->fontMetrics().width('X') * 4); + m_editOther->setMaxLength(1); + QWhatsThis::add(m_editOther, i18n("A custom string, such as a colon, may be used as a delimiter.")); + m_delimiterGroupLayout->addWidget(m_editOther, 2, 2); + connect(m_radioOther, SIGNAL(toggled(bool)), + m_editOther, SLOT(setEnabled(bool))); + connect(m_editOther, SIGNAL(textChanged(const QString&)), SLOT(slotDelimiter())); + + w = new QWidget(hbox2); + hbox2->setStretchFactor(w, 1); + + m_table = new QTable(5, 0, group); + m_table->setSelectionMode(QTable::Single); + m_table->setFocusStyle(QTable::FollowStyle); + m_table->setLeftMargin(0); + m_table->verticalHeader()->hide(); + m_table->horizontalHeader()->setClickEnabled(true); + m_table->setReadOnly(true); + m_table->setMinimumHeight(m_widget->fontMetrics().lineSpacing() * 8); + QWhatsThis::add(m_table, i18n("The table shows up to the first five lines of the CSV file.")); + connect(m_table, SIGNAL(currentChanged(int, int)), SLOT(slotCurrentChanged(int, int))); + connect(m_table->horizontalHeader(), SIGNAL(clicked(int)), SLOT(slotHeaderClicked(int))); + + QWidget* hbox = new QWidget(group); + QHBoxLayout* hlay = new QHBoxLayout(hbox, 5); + hlay->addStretch(10); + QWhatsThis::add(hbox, i18n("<qt>Set each column to correspond to a field in the collection by choosing " + "a column, selecting the field, then clicking the <i>Assign Field</i> button.</qt>")); + lab = new QLabel(i18n("Co&lumn:"), hbox); + hlay->addWidget(lab); + m_colSpinBox = new KIntSpinBox(hbox); + hlay->addWidget(m_colSpinBox); + m_colSpinBox->setMinValue(1); + connect(m_colSpinBox, SIGNAL(valueChanged(int)), SLOT(slotSelectColumn(int))); + lab->setBuddy(m_colSpinBox); + hlay->addSpacing(10); + + lab = new QLabel(i18n("&Data field in this column:"), hbox); + hlay->addWidget(lab); + m_comboField = new KComboBox(hbox); + hlay->addWidget(m_comboField); + connect(m_comboField, SIGNAL(activated(int)), SLOT(slotFieldChanged(int))); + lab->setBuddy(m_comboField); + hlay->addSpacing(10); + + m_setColumnBtn = new KPushButton(i18n("&Assign Field"), hbox); + hlay->addWidget(m_setColumnBtn); + m_setColumnBtn->setIconSet(SmallIconSet(QString::fromLatin1("apply"))); + connect(m_setColumnBtn, SIGNAL(clicked()), SLOT(slotSetColumnTitle())); + hlay->addStretch(10); + + l->addStretch(1); + + KConfigGroup config(KGlobal::config(), QString::fromLatin1("ImportOptions - CSV")); + m_delimiter = config.readEntry("Delimiter", m_delimiter); + m_firstRowHeader = config.readBoolEntry("First Row Titles", m_firstRowHeader); + + m_checkFirstRowHeader->setChecked(m_firstRowHeader); + if(m_delimiter == Latin1Literal(",")) { + m_radioComma->setChecked(true); + slotDelimiter(); // since the comma box was already checked, the slot won't fire + } else if(m_delimiter == Latin1Literal(";")) { + m_radioSemicolon->setChecked(true); + } else if(m_delimiter == Latin1Literal("\t")) { + m_radioTab->setChecked(true); + } else if(!m_delimiter.isEmpty()) { + m_radioOther->setChecked(true); + m_editOther->setEnabled(true); + m_editOther->setText(m_delimiter); + } + + return m_widget; +} + +bool CSVImporter::validImport() const { + // at least one column has to be defined + if(!m_hasAssignedFields) { + KMessageBox::sorry(m_widget, i18n("At least one column must be assigned to a field. " + "Only assigned columns will be imported.")); + } + return m_hasAssignedFields; +} + +void CSVImporter::fillTable() { + if(!m_table) { + return; + } + + m_parser->reset(text()); + // not skipping first row since the updateHeader() call depends on it + + int maxCols = 0; + int row = 0; + for( ; m_parser->hasNext() && row < m_table->numRows(); ++row) { + QStringList values = m_parser->nextTokens(); + if(static_cast<int>(values.count()) > m_table->numCols()) { + m_table->setNumCols(values.count()); + m_colSpinBox->setMaxValue(values.count()); + } + int col = 0; + for(QStringList::ConstIterator it = values.begin(); it != values.end(); ++it) { + m_table->setText(row, col, *it); + m_table->adjustColumn(col); + ++col; + } + if(col > maxCols) { + maxCols = col; + } + } + for( ; row < m_table->numRows(); ++row) { + for(int col = 0; col < m_table->numCols(); ++col) { + m_table->clearCell(row, col); + } + } + + m_table->setNumCols(maxCols); +} + +void CSVImporter::slotTypeChanged() { + // iterate over the collection names until it matches the text of the combo box + Data::Collection::Type type = static_cast<Data::Collection::Type>(m_comboColl->currentType()); + m_coll = CollectionFactory::collection(type, true); + + updateHeader(true); + m_comboField->clear(); + m_comboField->insertStringList(m_existingCollection ? m_existingCollection->fieldTitles() : m_coll->fieldTitles()); + m_comboField->insertItem('<' + i18n("New Field") + '>'); + + // hack to force a resize + m_comboField->setFont(m_comboField->font()); + m_comboField->updateGeometry(); +} + +void CSVImporter::slotFirstRowHeader(bool b_) { + m_firstRowHeader = b_; + updateHeader(false); + fillTable(); +} + +void CSVImporter::slotDelimiter() { + if(m_radioComma->isChecked()) { + m_delimiter = ','; + } else if(m_radioSemicolon->isChecked()) { + m_delimiter = ';'; + } else if(m_radioTab->isChecked()) { + m_delimiter = '\t'; + } else { + m_editOther->setFocus(); + m_delimiter = m_editOther->text(); + } + if(!m_delimiter.isEmpty()) { + m_parser->setDelimiter(m_delimiter); + fillTable(); + updateHeader(false); + } +} + +void CSVImporter::slotCurrentChanged(int, int col_) { + int pos = col_+1; + m_colSpinBox->setValue(pos); //slotSelectColumn() gets called because of the signal +} + +void CSVImporter::slotHeaderClicked(int col_) { + int pos = col_+1; + m_colSpinBox->setValue(pos); //slotSelectColumn() gets called because of the signal +} + +void CSVImporter::slotSelectColumn(int pos_) { + // pos is really the number of the position of the column + int col = pos_ - 1; + m_table->ensureCellVisible(0, col); + m_comboField->setCurrentItem(m_table->horizontalHeader()->label(col)); +} + +void CSVImporter::slotSetColumnTitle() { + int col = m_colSpinBox->value()-1; + const QString title = m_comboField->currentText(); + m_table->horizontalHeader()->setLabel(col, title); + m_hasAssignedFields = true; + // make sure none of the other columns have this title + bool found = false; + for(int i = 0; i < col; ++i) { + if(m_table->horizontalHeader()->label(i) == title) { + m_table->horizontalHeader()->setLabel(i, QString::number(i+1)); + found = true; + break; + } + } + // if found, then we're done + if(found) { + return; + } + for(int i = col+1; i < m_table->numCols(); ++i) { + if(m_table->horizontalHeader()->label(i) == title) { + m_table->horizontalHeader()->setLabel(i, QString::number(i+1)); + break; + } + } +} + +void CSVImporter::updateHeader(bool force_) { + if(!m_table) { + return; + } + if(!m_firstRowHeader && !force_) { + return; + } + + Data::CollPtr c = m_existingCollection ? m_existingCollection : m_coll; + for(int col = 0; col < m_table->numCols(); ++col) { + QString s = m_table->text(0, col); + Data::FieldPtr f; + if(c) { + c->fieldByTitle(s); + if(!f) { + f = c->fieldByName(s); + } + } + if(m_firstRowHeader && !s.isEmpty() && c && f) { + m_table->horizontalHeader()->setLabel(col, f->title()); + m_hasAssignedFields = true; + } else { + m_table->horizontalHeader()->setLabel(col, QString::number(col+1)); + } + } +} + +void CSVImporter::slotFieldChanged(int idx_) { + // only care if it's the last item -> add new field + if(idx_ < m_comboField->count()-1) { + return; + } + + Data::CollPtr c = m_existingCollection ? m_existingCollection : m_coll; + uint count = c->fieldTitles().count(); + CollectionFieldsDialog dlg(c, m_widget); +// dlg.setModal(true); + if(dlg.exec() == QDialog::Accepted) { + m_comboField->clear(); + m_comboField->insertStringList(c->fieldTitles()); + m_comboField->insertItem('<' + i18n("New Field") + '>'); + if(count != c->fieldTitles().count()) { + fillTable(); + } + m_comboField->setCurrentItem(0); + } +} + +void CSVImporter::slotActionChanged(int action_) { + Data::CollPtr currColl = Data::Document::self()->collection(); + if(!currColl) { + m_existingCollection = 0; + return; + } + + switch(action_) { + case Import::Replace: + { + int currType = m_comboColl->currentType(); + m_comboColl->reset(); + m_comboColl->setCurrentType(currType); + m_existingCollection = 0; + } + break; + + case Import::Append: + case Import::Merge: + { + m_comboColl->clear(); + QString name = CollectionFactory::nameMap()[currColl->type()]; + m_comboColl->insertItem(name, currColl->type()); + m_existingCollection = currColl; + } + break; + } + slotTypeChanged(); +} + +void CSVImporter::slotCancel() { + m_cancelled = true; +} + +#include "csvimporter.moc" diff --git a/src/translators/csvimporter.h b/src/translators/csvimporter.h new file mode 100644 index 0000000..6561584 --- /dev/null +++ b/src/translators/csvimporter.h @@ -0,0 +1,107 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef CSVIMPORTER_H +#define CSVIMPORTER_H + +class CSVImporterWidget; + +class KLineEdit; +class KComboBox; +class KIntSpinBox; +class KPushButton; + +class QButtonGroup; +class QCheckBox; +class QRadioButton; +class QTable; + +#include "textimporter.h" +#include "../datavectors.h" + +namespace Tellico { + namespace GUI { + class CollectionTypeCombo; + } + namespace Import { + +/** + * @author Robby Stephenson + */ +class CSVImporter : public TextImporter { +Q_OBJECT + +public: + class Parser; + + /** + */ + CSVImporter(const KURL& url); + ~CSVImporter(); + + /** + * @return A pointer to a @ref Data::Collection, or 0 if none can be created. + */ + virtual Data::CollPtr collection(); + /** + */ + virtual QWidget* widget(QWidget* parent, const char* name=0); + + virtual bool validImport() const; + +public slots: + void slotActionChanged(int action); + void slotCancel(); + +private slots: + void slotTypeChanged(); + void slotFieldChanged(int idx); + void slotFirstRowHeader(bool b); + void slotDelimiter(); + void slotCurrentChanged(int row, int col); + void slotHeaderClicked(int col); + void slotSelectColumn(int col); + void slotSetColumnTitle(); + +private: + void fillTable(); + void updateHeader(bool force); + + Data::CollPtr m_coll; + Data::CollPtr m_existingCollection; // used to grab fields from current collection in window + bool m_firstRowHeader; + QString m_delimiter; + bool m_cancelled; + + QWidget* m_widget; + GUI::CollectionTypeCombo* m_comboColl; + QCheckBox* m_checkFirstRowHeader; + QButtonGroup* m_delimiterGroup; + QRadioButton* m_radioComma; + QRadioButton* m_radioSemicolon; + QRadioButton* m_radioTab; + QRadioButton* m_radioOther; + KLineEdit* m_editOther; + QTable* m_table; + KIntSpinBox* m_colSpinBox; + KComboBox* m_comboField; + KPushButton* m_setColumnBtn; + bool m_hasAssignedFields; + + friend class Parser; + Parser* m_parser; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/dataimporter.h b/src/translators/dataimporter.h new file mode 100644 index 0000000..4d21a53 --- /dev/null +++ b/src/translators/dataimporter.h @@ -0,0 +1,71 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef DATAIMPORTER_H +#define DATAIMPORTER_H + +#include "importer.h" +#include "../filehandler.h" + +namespace Tellico { + namespace Import { + +/** + * @author Robby Stephenson + */ +class DataImporter : public Importer { +Q_OBJECT + +public: + enum Source { URL, Text }; + + /** + * @param url The URL of the file to import + */ +// DataImporter(const KURL& url) : Importer(url), m_data(FileHandler::readDataFile(url)), m_source(URL) {} + DataImporter(const KURL& url) : Importer(url), m_source(URL) { m_fileRef = FileHandler::fileRef(url); } + /** + * Since the conversion to a QCString appends a \0 character at the end, remove it. + * + * @param text The text. It MUST be in UTF-8. + */ + DataImporter(const QString& text) : Importer(text), m_data(text.utf8()), m_source(Text), m_fileRef(0) + { m_data.truncate(m_data.size()-1); } + /** + */ + virtual ~DataImporter() { delete m_fileRef; m_fileRef = 0; } + + Source source() const { return m_source; } + + virtual void setText(const QString& text) { + Importer::setText(text); m_data = text.utf8(); m_data.truncate(m_data.size()-1); m_source = Text; + } + +protected: + /** + * Return the data in the imported file + * + * @return the file data + */ + const QByteArray& data() const { return m_data; } + FileHandler::FileRef& fileRef() const { return *m_fileRef; } + +private: + QByteArray m_data; + Source m_source; + FileHandler::FileRef* m_fileRef; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/dcimporter.cpp b/src/translators/dcimporter.cpp new file mode 100644 index 0000000..c8bb59f --- /dev/null +++ b/src/translators/dcimporter.cpp @@ -0,0 +1,128 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "dcimporter.h" +#include "../collections/bookcollection.h" +#include "tellico_xml.h" +#include "../tellico_debug.h" + +using Tellico::Import::DCImporter; + +DCImporter::DCImporter(const KURL& url_) : XMLImporter(url_) { +} + +DCImporter::DCImporter(const QString& text_) : XMLImporter(text_) { +} + +DCImporter::DCImporter(const QDomDocument& dom_) : XMLImporter(dom_) { +} + +Tellico::Data::CollPtr DCImporter::collection() { + const QString& dc = XML::nsDublinCore; + const QString& zing = XML::nsZing; + + Data::CollPtr c = new Data::BookCollection(true); + + QDomDocument doc = domDocument(); + + QRegExp authorDateRX(QString::fromLatin1(",?(\\s+\\d{4}-?(?:\\d{4})?\\.?)(.*)$")); + QRegExp dateRX(QString::fromLatin1("\\d{4}")); + + QDomNodeList recordList = doc.elementsByTagNameNS(zing, QString::fromLatin1("recordData")); + myDebug() << "DCImporter::collection() - number of records: " << recordList.count() << endl; + + enum { UnknownNS, UseNS, NoNS } useNS = UnknownNS; + +#define GETELEMENTS(s) (useNS == NoNS) \ + ? elem.elementsByTagName(QString::fromLatin1(s)) \ + : elem.elementsByTagNameNS(dc, QString::fromLatin1(s)) + + for(uint i = 0; i < recordList.count(); ++i) { + Data::EntryPtr e = new Data::Entry(c); + + QDomElement elem = recordList.item(i).toElement(); + + QDomNodeList nodeList = GETELEMENTS("title"); + if(nodeList.count() == 0) { // no title, skip + if(useNS == UnknownNS) { + nodeList = elem.elementsByTagName(QString::fromLatin1("title")); + if(nodeList.count() > 0) { + useNS = NoNS; + } else { + myDebug() << "DCImporter::collection() - no title, skipping" << endl; + continue; + } + } else { + myDebug() << "DCImporter::collection() - no title, skipping" << endl; + continue; + } + } else if(useNS == UnknownNS) { + useNS = UseNS; + } + QString s = nodeList.item(0).toElement().text(); + s.replace('\n', ' '); + s = s.simplifyWhiteSpace(); + e->setField(QString::fromLatin1("title"), s); + + nodeList = GETELEMENTS("creator"); + QStringList creators; + for(uint j = 0; j < nodeList.count(); ++j) { + QString s = nodeList.item(j).toElement().text(); + if(authorDateRX.search(s) > -1) { + // check if anything after date like [publisher] + if(authorDateRX.cap(2).stripWhiteSpace().isEmpty()) { + s.remove(authorDateRX); + s = s.simplifyWhiteSpace(); + creators << s; + } else { + myDebug() << "DCImporter::collection() - weird creator, skipping: " << s << endl; + } + } else { + creators << s; + } + } + e->setField(QString::fromLatin1("author"), creators.join(QString::fromLatin1("; "))); + + nodeList = GETELEMENTS("publisher"); + QStringList publishers; + for(uint j = 0; j < nodeList.count(); ++j) { + publishers << nodeList.item(j).toElement().text(); + } + e->setField(QString::fromLatin1("publisher"), publishers.join(QString::fromLatin1("; "))); + + nodeList = GETELEMENTS("subject"); + QStringList keywords; + for(uint j = 0; j < nodeList.count(); ++j) { + keywords << nodeList.item(j).toElement().text(); + } + e->setField(QString::fromLatin1("keyword"), keywords.join(QString::fromLatin1("; "))); + + nodeList = GETELEMENTS("date"); + if(nodeList.count() > 0) { + QString s = nodeList.item(0).toElement().text(); + if(dateRX.search(s) > -1) { + e->setField(QString::fromLatin1("pub_year"), dateRX.cap()); + } + } + + nodeList = GETELEMENTS("description"); + if(nodeList.count() > 0) { // no title, skip + e->setField(QString::fromLatin1("comments"), nodeList.item(0).toElement().text()); + } + + c->addEntries(e); + } +#undef GETELEMENTS + + return c; +} diff --git a/src/translators/dcimporter.h b/src/translators/dcimporter.h new file mode 100644 index 0000000..03eaedf --- /dev/null +++ b/src/translators/dcimporter.h @@ -0,0 +1,34 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_IMPORT_DCIMPORTER_H +#define TELLICO_IMPORT_DCIMPORTER_H + +#include "xmlimporter.h" + +namespace Tellico { + namespace Import { + +class DCImporter : public XMLImporter { +public: + DCImporter(const KURL& url); + DCImporter(const QString& text); + DCImporter(const QDomDocument& dom); + ~DCImporter() {} + + virtual Data::CollPtr collection(); +}; + + } +} +#endif diff --git a/src/translators/deliciousimporter.cpp b/src/translators/deliciousimporter.cpp new file mode 100644 index 0000000..5c434cd --- /dev/null +++ b/src/translators/deliciousimporter.cpp @@ -0,0 +1,87 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "deliciousimporter.h" +#include "../collection.h" +#include "../rtf2html/rtf2html.h" +#include "../imagefactory.h" +#include "../tellico_debug.h" + +#include <kstandarddirs.h> + +#include <qfile.h> + +using Tellico::Import::DeliciousImporter; + +DeliciousImporter::DeliciousImporter(const KURL& url_) : XSLTImporter(url_) { + QString xsltFile = locate("appdata", QString::fromLatin1("delicious2tellico.xsl")); + if(!xsltFile.isEmpty()) { + KURL u; + u.setPath(xsltFile); + XSLTImporter::setXSLTURL(u); + } else { + kdWarning() << "DeliciousImporter() - unable to find delicious2tellico.xml!" << endl; + } +} + +bool DeliciousImporter::canImport(int type) const { + return type == Data::Collection::Book; +} + +Tellico::Data::CollPtr DeliciousImporter::collection() { + Data::CollPtr coll = XSLTImporter::collection(); + if(!coll) { + return 0; + } + + KURL libraryDir = url(); + libraryDir.setPath(url().directory() + "Images/"); + const QStringList imageDirs = QStringList() + << QString::fromLatin1("Large Covers/") + << QString::fromLatin1("Medium Covers/") + << QString::fromLatin1("Small Covers/") + << QString::fromLatin1("Plain Covers/"); + const QString commField = QString::fromLatin1("comments"); + const QString uuidField = QString::fromLatin1("uuid"); + const QString coverField = QString::fromLatin1("cover"); + const bool isLocal = url().isLocalFile(); + + Data::EntryVec entries = coll->entries(); + for(Data::EntryVecIt entry = entries.begin(); entry != entries.end(); ++entry) { + QString comments = entry->field(commField); + if(!comments.isEmpty()) { + RTF2HTML rtf2html(comments); + entry->setField(commField, rtf2html.toHTML()); + } + + //try to add images + QString uuid = entry->field(uuidField); + if(!uuid.isEmpty() && isLocal) { + for(QStringList::ConstIterator it = imageDirs.begin(); it != imageDirs.end(); ++it) { + QString imgPath = libraryDir.path() + *it + uuid; + if(!QFile::exists(imgPath)) { + continue; + } + QString imgID = ImageFactory::addImage(imgPath, true); + if(!imgID.isEmpty()) { + entry->setField(coverField, imgID); + } + break; + } + } + } + coll->removeField(uuidField); + return coll; +} + +#include "deliciousimporter.moc" diff --git a/src/translators/deliciousimporter.h b/src/translators/deliciousimporter.h new file mode 100644 index 0000000..657160e --- /dev/null +++ b/src/translators/deliciousimporter.h @@ -0,0 +1,49 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_IMPORT_DELICIOUSIMPORTER_H +#define TELLICO_IMPORT_DELICIOUSIMPORTER_H + +#include "xsltimporter.h" +#include "../datavectors.h" + +namespace Tellico { + namespace Import { + +/** + * @author Robby Stephenson +*/ +class DeliciousImporter : public XSLTImporter { +Q_OBJECT + +public: + /** + */ + DeliciousImporter(const KURL& url); + + /** + */ + virtual Data::CollPtr collection(); + /** + */ + virtual QWidget* widget(QWidget*, const char*) { return 0; } + virtual bool canImport(int type) const; + +private: + // private so it can't be changed accidently + void setXSLTURL(const KURL& url); +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/exporter.cpp b/src/translators/exporter.cpp new file mode 100644 index 0000000..2fe78b7 --- /dev/null +++ b/src/translators/exporter.cpp @@ -0,0 +1,36 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "exporter.h" +#include "../document.h" +#include "../collection.h" + +using Tellico::Export::Exporter; + +Exporter::Exporter() : QObject(), m_options(Export::ExportUTF8 | Export::ExportComplete), m_coll(0) { +} + +Exporter::Exporter(Data::CollPtr coll) : QObject(), m_options(Export::ExportUTF8), m_coll(coll) { +} + +Exporter::~Exporter() { +} + +Tellico::Data::CollPtr Exporter::collection() const { + if(m_coll) { + return m_coll; + } + return Data::Document::self()->collection(); +} + +#include "exporter.moc" diff --git a/src/translators/exporter.h b/src/translators/exporter.h new file mode 100644 index 0000000..2ffc13b --- /dev/null +++ b/src/translators/exporter.h @@ -0,0 +1,89 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_EXPORTER_H +#define TELLICO_EXPORTER_H + +class KConfig; + +class QWidget; +class QString; + +#include "../entry.h" +#include "../datavectors.h" + +#include <kurl.h> + +#include <qobject.h> + +namespace Tellico { + namespace Export { + enum Options { + ExportFormatted = 1 << 0, // format entries when exported + ExportUTF8 = 1 << 1, // valid for some text files, export as utf-8 + ExportImages = 1 << 2, // should the images be included? + ExportForce = 1 << 3, // force the export, no confirmation of overwriting + ExportComplete = 1 << 4, // export complete document, including loans, etc. + ExportProgress = 1 << 5, // show progress bar + ExportClean = 1 << 6, // specifically for bibliographies, remove latex commands + ExportVerifyImages= 1 << 7, // don't put in an image link that's not in the cache + ExportImageSize = 1 << 8 // include image size in the generated XML + }; + +/** + * @author Robby Stephenson + */ +class Exporter : public QObject { +Q_OBJECT + +public: + Exporter(); + Exporter(Data::CollPtr coll); + virtual ~Exporter(); + + Data::CollPtr collection() const; + + void setURL(const KURL& url_) { m_url = url_; } + void setEntries(const Data::EntryVec& entries) { m_entries = entries; } + void setOptions(long options) { m_options = options; reset(); } + + virtual QString formatString() const = 0; + virtual QString fileFilter() const = 0; + const KURL& url() const { return m_url; } + const Data::EntryVec& entries() const { return m_entries; } + long options() const { return m_options; } + + /** + * Do the export + */ + virtual bool exec() = 0; + /** + * If changing options in the exporter should cause member variables to reset, implement + * that here + */ + virtual void reset() {} + + virtual QWidget* widget(QWidget* parent, const char* name=0) = 0; + virtual void readOptions(KConfig*) {} + virtual void saveOptions(KConfig*) {} + +private: + long m_options; + Data::CollPtr m_coll; + Data::EntryVec m_entries; + KURL m_url; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/filelistingimporter.cpp b/src/translators/filelistingimporter.cpp new file mode 100644 index 0000000..bef9288 --- /dev/null +++ b/src/translators/filelistingimporter.cpp @@ -0,0 +1,274 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "filelistingimporter.h" +#include "../collections/filecatalog.h" +#include "../entry.h" +#include "../field.h" +#include "../latin1literal.h" +#include "../imagefactory.h" +#include "../tellico_utils.h" +#include "../tellico_kernel.h" +#include "../progressmanager.h" +#include "../core/netaccess.h" +#include "../tellico_debug.h" + +#include <kapplication.h> +#include <kmountpoint.h> +#include <kio/job.h> +#include <kio/netaccess.h> + +#include <qcheckbox.h> +#include <qvgroupbox.h> +#include <qlayout.h> +#include <qwhatsthis.h> +#include <qfile.h> +#include <qfileinfo.h> + +#include <stdio.h> + +namespace { + static const int FILE_PREVIEW_SIZE = 128; + // volume name starts at 16*2048+40 bytes into the header + static const int VOLUME_NAME_POS = 32808; + static const int VOLUME_NAME_SIZE = 32; +} + +using Tellico::Import::FileListingImporter; + +FileListingImporter::FileListingImporter(const KURL& url_) : Importer(url_), m_coll(0), m_widget(0), + m_job(0), m_cancelled(false) { + m_files.setAutoDelete(true); +} + +bool FileListingImporter::canImport(int type) const { + return type == Data::Collection::File; +} + +Tellico::Data::CollPtr FileListingImporter::collection() { + if(m_coll) { + return m_coll; + } + + ProgressItem& item = ProgressManager::self()->newProgressItem(this, i18n("Scanning files..."), true); + item.setTotalSteps(100); + connect(&item, SIGNAL(signalCancelled(ProgressItem*)), SLOT(slotCancel())); + ProgressItem::Done done(this); + + // going to assume only one volume will ever be imported + QString volume = volumeName(); + + m_job = m_recursive->isChecked() + ? KIO::listRecursive(url(), true, false) + : KIO::listDir(url(), true, false); + connect(m_job, SIGNAL(entries(KIO::Job*, const KIO::UDSEntryList&)), + SLOT(slotEntries(KIO::Job*, const KIO::UDSEntryList&))); + + if(!KIO::NetAccess::synchronousRun(m_job, Kernel::self()->widget()) || m_cancelled) { + return 0; + } + + const bool usePreview = m_filePreview->isChecked(); + + const QString title = QString::fromLatin1("title"); + const QString url = QString::fromLatin1("url"); + const QString desc = QString::fromLatin1("description"); + const QString vol = QString::fromLatin1("volume"); + const QString folder = QString::fromLatin1("folder"); + const QString type = QString::fromLatin1("mimetype"); + const QString size = QString::fromLatin1("size"); + const QString perm = QString::fromLatin1("permissions"); + const QString owner = QString::fromLatin1("owner"); + const QString group = QString::fromLatin1("group"); + const QString created = QString::fromLatin1("created"); + const QString modified = QString::fromLatin1("modified"); + const QString metainfo = QString::fromLatin1("metainfo"); + const QString icon = QString::fromLatin1("icon"); + + m_coll = new Data::FileCatalog(true); + QString tmp; + const uint stepSize = QMAX(1, m_files.count()/100); + const bool showProgress = options() & ImportProgress; + + item.setTotalSteps(m_files.count()); + uint j = 0; + for(KFileItemListIterator it(m_files); !m_cancelled && it.current(); ++it, ++j) { + Data::EntryPtr entry = new Data::Entry(m_coll); + + const KURL u = it.current()->url(); + entry->setField(title, u.fileName()); + entry->setField(url, u.url()); + entry->setField(desc, it.current()->mimeComment()); + entry->setField(vol, volume); + tmp = KURL::relativePath(this->url().path(), u.directory()); + // remove "./" from the string + entry->setField(folder, tmp.right(tmp.length()-2)); + entry->setField(type, it.current()->mimetype()); + entry->setField(size, KIO::convertSize(it.current()->size())); + entry->setField(perm, it.current()->permissionsString()); + entry->setField(owner, it.current()->user()); + entry->setField(group, it.current()->group()); + + time_t t = it.current()->time(KIO::UDS_CREATION_TIME); + if(t > 0) { + QDateTime dt; + dt.setTime_t(t); + entry->setField(created, dt.toString(Qt::ISODate)); + } + t = it.current()->time(KIO::UDS_MODIFICATION_TIME); + if(t > 0) { + QDateTime dt; + dt.setTime_t(t); + entry->setField(modified, dt.toString(Qt::ISODate)); + } + const KFileMetaInfo& meta = it.current()->metaInfo(); + if(meta.isValid() && !meta.isEmpty()) { + const QStringList keys = meta.supportedKeys(); + QStringList strings; + for(QStringList::ConstIterator it2 = keys.begin(); it2 != keys.end(); ++it2) { + KFileMetaInfoItem item = meta.item(*it2); + if(item.isValid()) { + QString s = item.string(); + if(!s.isEmpty()) { + strings << item.key() + "::" + s; + } + } + } + entry->setField(metainfo, strings.join(QString::fromLatin1("; "))); + } + + if(!m_cancelled && usePreview) { + m_pixmap = NetAccess::filePreview(it.current(), FILE_PREVIEW_SIZE); + if(m_pixmap.isNull()) { + m_pixmap = it.current()->pixmap(0); + } + } else { + m_pixmap = it.current()->pixmap(0); + } + + if(!m_pixmap.isNull()) { + // is png best option? + QString id = ImageFactory::addImage(m_pixmap, QString::fromLatin1("PNG")); + if(!id.isEmpty()) { + entry->setField(icon, id); + } + } + + m_coll->addEntries(entry); + + if(showProgress && j%stepSize == 0) { + ProgressManager::self()->setProgress(this, j); + kapp->processEvents(); + } + } + + if(m_cancelled) { + m_coll = 0; + return 0; + } + + return m_coll; +} + +QWidget* FileListingImporter::widget(QWidget* parent_, const char* name_) { + if(m_widget) { + return m_widget; + } + + m_widget = new QWidget(parent_, name_); + QVBoxLayout* l = new QVBoxLayout(m_widget); + + QVGroupBox* box = new QVGroupBox(i18n("File Listing Options"), m_widget); + + m_recursive = new QCheckBox(i18n("Recursive folder search"), box); + QWhatsThis::add(m_recursive, i18n("If checked, folders are recursively searched for all files.")); + // by default, make it checked + m_recursive->setChecked(true); + + m_filePreview = new QCheckBox(i18n("Generate file previews"), box); + QWhatsThis::add(m_filePreview, i18n("If checked, previews of the file contents are generated, which can slow down " + "the folder listing.")); + // by default, make it no previews + m_filePreview->setChecked(false); + + l->addWidget(box); + l->addStretch(1); + return m_widget; +} + +void FileListingImporter::slotEntries(KIO::Job* job_, const KIO::UDSEntryList& list_) { + if(m_cancelled) { + job_->kill(); + m_job = 0; + return; + } + + for(KIO::UDSEntryList::ConstIterator it = list_.begin(); it != list_.end(); ++it) { + KFileItem* item = new KFileItem(*it, url(), false, true); + if(item->isFile()) { + m_files.append(item); + } else { + delete item; + } + } +} + +QString FileListingImporter::volumeName() const { + // this functions turns /media/cdrom into /dev/hdc, then reads 32 bytes after the 16 x 2048 header + QString volume; + const KMountPoint::List mountPoints = KMountPoint::currentMountPoints(KMountPoint::NeedRealDeviceName); + for(KMountPoint::List::ConstIterator it = mountPoints.begin(), end = mountPoints.end(); it != end; ++it) { + // path() could be /media/cdrom + // which could be the mount point of the device + // I know it works for iso9660 (cdrom) and udf (dvd) + if(url().path() == (*it)->mountPoint() + && ((*it)->mountType() == Latin1Literal("iso9660") + || (*it)->mountType() == Latin1Literal("udf"))) { + volume = (*it)->mountPoint(); + if(!(*it)->realDeviceName().isEmpty()) { + QString devName = (*it)->realDeviceName(); + if(devName.endsWith(QChar('/'))) { + devName.truncate(devName.length()-1); + } + // QFile can't do a sequential seek, and I don't want to do a 32808x loop on getch() + FILE* dev = 0; + if((dev = fopen(devName.latin1(), "rb")) != 0) { + // returns 0 on success + if(fseek(dev, VOLUME_NAME_POS, SEEK_SET) == 0) { + char buf[VOLUME_NAME_SIZE]; + int ret = fread(buf, 1, VOLUME_NAME_SIZE, dev); + if(ret == VOLUME_NAME_SIZE) { + volume = QString::fromLatin1(buf, VOLUME_NAME_SIZE).stripWhiteSpace(); + } + } else { + myDebug() << "FileListingImporter::volumeName() - can't seek " << devName << endl; + } + fclose(dev); + } else { + myDebug() << "FileListingImporter::volumeName() - can't read " << devName << endl; + } + } + break; + } + } + return volume; +} + +void FileListingImporter::slotCancel() { + m_cancelled = true; + if(m_job) { + m_job->kill(); + } +} + +#include "filelistingimporter.moc" diff --git a/src/translators/filelistingimporter.h b/src/translators/filelistingimporter.h new file mode 100644 index 0000000..aca4602 --- /dev/null +++ b/src/translators/filelistingimporter.h @@ -0,0 +1,72 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_IMPORT_FILELISTINGIMPORTER_H +#define TELLICO_IMPORT_FILELISTINGIMPORTER_H + +#include "importer.h" +#include "../datavectors.h" + +#include <kio/global.h> +#include <kfileitem.h> + +#include <qguardedptr.h> + +class QCheckBox; +namespace KIO { + class Job; +} + +namespace Tellico { + namespace Import { + +/** + * @author Robby Stephenson + */ +class FileListingImporter : public Importer { +Q_OBJECT + +public: + FileListingImporter(const KURL& url); + + /** + * @return A pointer to a @ref Data::Collection, or 0 if none can be created. + */ + virtual Data::CollPtr collection(); + /** + */ + virtual QWidget* widget(QWidget*, const char*); + virtual bool canImport(int type) const; + +public slots: + void slotCancel(); + +private slots: + void slotEntries(KIO::Job* job, const KIO::UDSEntryList& list); + +private: + QString volumeName() const; + + Data::CollPtr m_coll; + QWidget* m_widget; + QCheckBox* m_recursive; + QCheckBox* m_filePreview; + QGuardedPtr<KIO::Job> m_job; + KFileItemList m_files; + QPixmap m_pixmap; + bool m_cancelled : 1; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/freedb_util.cpp b/src/translators/freedb_util.cpp new file mode 100644 index 0000000..6640ef6 --- /dev/null +++ b/src/translators/freedb_util.cpp @@ -0,0 +1,376 @@ +/*************************************************************************** + * * + * Modified from cd-discid.c, found at http://lly.org/~rcw/cd-discid/ * + * * + * Copyright (c) 1999-2003 Robert Woodcock <rcw@debian.org> * + * * + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "freedbimporter.h" +#include "../tellico_debug.h" + +using Tellico::Import::FreeDBImporter; + +extern "C" { +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <fcntl.h> +#include <sys/ioctl.h> + +/* Porting credits: + * Solaris: David Champion <dgc@uchicago.edu> + * FreeBSD: Niels Bakker <niels@bakker.net> + * OpenBSD: Marcus Daniel <danielm@uni-muenster.de> + * NetBSD: Chris Gilbert <chris@NetBSD.org> + * MacOSX: Evan Jones <ejones@uwaterloo.ca> http://www.eng.uwaterloo.ca/~ejones/ + */ + +#if defined(__linux__) + +// see http://bugs.kde.org/show_bug.cgi?id=86188 +#ifdef __STRICT_ANSI__ +#undef __STRICT_ANSI__ +#define _ANSI_WAS_HERE_ +#endif +#include <linux/types.h> +#include <linux/cdrom.h> +#ifdef _ANSI_WAS_HERE_ +#define __STRICT_ANSI__ +#undef _ANSI_WAS_HERE_ +#endif +#define cdte_track_address cdte_addr.lba + +#elif defined(sun) && defined(unix) && defined(__SVR4) + +#include <sys/cdio.h> +#define CD_MSF_OFFSET 150 +#define CD_FRAMES 75 +/* According to David Schweikert <dws@ee.ethz.ch>, cd-discid needs this + * to compile on Solaris */ +#define cdte_track_address cdte_addr.lba + +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) + +#include <netinet/in.h> +#include <sys/cdio.h> +#define CDROM_LBA CD_LBA_FORMAT /* first frame is 0 */ +#define CD_MSF_OFFSET 150 /* MSF offset of first frame */ +#define CD_FRAMES 75 /* per second */ +#define CDROM_LEADOUT 0xAA /* leadout track */ +#define CDROMREADTOCHDR CDIOREADTOCHEADER +#define CDROMREADTOCENTRY CDIOREADTOCENTRY +#define cdrom_tochdr ioc_toc_header +#define cdth_trk0 starting_track +#define cdth_trk1 ending_track +#define cdrom_tocentry ioc_read_toc_single_entry +#define cdte_track track +#define cdte_format address_format +#define cdte_track_address entry.addr.lba + +#elif defined(__OpenBSD__) || defined(__NetBSD__) + +#include <netinet/in.h> +#include <sys/cdio.h> +#define CDROM_LBA CD_LBA_FORMAT /* first frame is 0 */ +#define CD_MSF_OFFSET 150 /* MSF offset of first frame */ +#define CD_FRAMES 75 /* per second */ +#define CDROM_LEADOUT 0xAA /* leadout track */ +#define CDROMREADTOCHDR CDIOREADTOCHEADER +#define cdrom_tochdr ioc_toc_header +#define cdth_trk0 starting_track +#define cdth_trk1 ending_track +#define cdrom_tocentry cd_toc_entry +#define cdte_track track +#define cdte_track_address addr.lba + +#elif defined(__APPLE__) + +#include <sys/types.h> +#include <IOKit/storage/IOCDTypes.h> +#include <IOKit/storage/IOCDMediaBSDClient.h> +#define CD_FRAMES 75 /* per second */ +#define CD_MSF_OFFSET 150 /* MSF offset of first frame */ +#define cdrom_tochdr CDDiscInfo +#define cdth_trk0 numberOfFirstTrack +/* NOTE: Judging by the name here, we might have to do this: + * hdr.lastTrackNumberInLastSessionMSB << 8 * + * sizeof(hdr.lastTrackNumberInLastSessionLSB) + * | hdr.lastTrackNumberInLastSessionLSB; */ +#define cdth_trk1 lastTrackNumberInLastSessionLSB +#define cdrom_tocentry CDTrackInfo +#define cdte_track_address trackStartAddress + +#else +# warning "Your OS isn't supported yet for CDDB lookup." +#endif /* os selection */ + +} + +#include <config.h> + +namespace { + class CloseDrive { + public: + CloseDrive(int d) : drive(d) {} + ~CloseDrive() { ::close(drive); } + private: + int drive; + }; +} + +QValueList<uint> FreeDBImporter::offsetList(const QCString& drive_, QValueList<uint>& trackLengths_) { + QValueList<uint> list; + + int drive = ::open(drive_.data(), O_RDONLY | O_NONBLOCK); + CloseDrive closer(drive); + if(drive < 0) { + return list; + } + + cdrom_tochdr hdr; +#if defined(__APPLE__) + dk_cd_read_disc_info_t discInfoParams; + ::memset(&discInfoParams, 0, sizeof(discInfoParams)); + discInfoParams.buffer = &hdr; + discInfoParams.bufferLength = sizeof(hdr); + if(ioctl(drive, DKIOCCDREADDISCINFO, &discInfoParams) < 0 + || discInfoParams.bufferLength != sizeof(hdr)) { + return list; + } +#else + if(ioctl(drive, CDROMREADTOCHDR, &hdr) < 0) { + return list; + } +#endif + +// uchar first = hdr.cdth_trk0; + uchar last = hdr.cdth_trk1; + + cdrom_tocentry* TocEntry = new cdrom_tocentry[last+1]; +#if defined(__OpenBSD__) + ioc_read_toc_entry t; + t.starting_track = 0; +#elif defined(__NetBSD__) + ioc_read_toc_entry t; + t.starting_track = 1; +#endif +#if defined(__OpenBSD__) || defined(__NetBSD__) + t.address_format = CDROM_LBA; + t.data_len = (last + 1) * sizeof(cdrom_tocentry); + t.data = TocEntry; + + if (::ioctl(drive, CDIOREADTOCENTRYS, (char *) &t) < 0) + return list; + +#elif defined(__APPLE__) + dk_cd_read_track_info_t trackInfoParams; + ::memset(&trackInfoParams, 0, sizeof(trackInfoParams)); + trackInfoParams.addressType = kCDTrackInfoAddressTypeTrackNumber; + trackInfoParams.bufferLength = sizeof(*TocEntry); + + for(int i = 0; i < last; ++i) { + trackInfoParams.address = i + 1; + trackInfoParams.buffer = &TocEntry[i]; + ::ioctl(drive, DKIOCCDREADTRACKINFO, &trackInfoParams); + } + + /* MacOS X on G5-based systems does not report valid info for + * TocEntry[last-1].lastRecordedAddress + 1, so we compute the start + * of leadout from the start+length of the last track instead + */ + TocEntry[last].cdte_track_address = TocEntry[last-1].trackSize + TocEntry[last-1].trackStartAddress; +#else /* FreeBSD, Linux, Solaris */ + for(uint i = 0; i < last; ++i) { + /* tracks start with 1, but I must start with 0 on OpenBSD */ + TocEntry[i].cdte_track = i + 1; + TocEntry[i].cdte_format = CDROM_LBA; + ::ioctl(drive, CDROMREADTOCENTRY, &TocEntry[i]); + } + + TocEntry[last].cdte_track = CDROM_LEADOUT; + TocEntry[last].cdte_format = CDROM_LBA; + ::ioctl(drive, CDROMREADTOCENTRY, &TocEntry[last]); +#endif + +#if defined(__FreeBSD__) + TocEntry[last].cdte_track_address = ntohl(TocEntry[last].cdte_track_address); +#endif + + for(uint i = 0; i < last; ++i) { +#if defined(__FreeBSD__) + TocEntry[i].cdte_track_address = ntohl(TocEntry[i].cdte_track_address); +#endif + list.append(TocEntry[i].cdte_track_address + CD_MSF_OFFSET); + } + + list.append(TocEntry[0].cdte_track_address + CD_MSF_OFFSET); + list.append(TocEntry[last].cdte_track_address + CD_MSF_OFFSET); + + // hey, these are track lengths! :P + trackLengths_.clear(); + for(uint i = 0; i < last; ++i) { + trackLengths_.append((TocEntry[i+1].cdte_track_address - TocEntry[i].cdte_track_address) / CD_FRAMES); + } + + delete[] TocEntry; + return list; +} + +inline +ushort from2Byte(uchar* d) { + return (d[0] << 8 & 0xFF00) | (d[1] & 0xFF); +} + +#define SIZE 61 +// mostly taken from kover and k3b +// licensed under GPL +FreeDBImporter::CDText FreeDBImporter::getCDText(const QCString& drive_) { + CDText cdtext; +#ifdef USE_CDTEXT +// only works for linux ATM +#if defined(__linux__) + int drive = ::open(drive_.data(), O_RDONLY | O_NONBLOCK); + CloseDrive closer(drive); + if(drive < 0) { + return cdtext; + } + + cdrom_generic_command m_cmd; + ::memset(&m_cmd, 0, sizeof(cdrom_generic_command)); + + int dataLen; + + int format = 5; + uint track = 0; + uchar buffer[2048]; + + m_cmd.cmd[0] = 0x43; + m_cmd.cmd[1] = 0x0; + m_cmd.cmd[2] = format & 0x0F; + m_cmd.cmd[6] = track; + m_cmd.cmd[8] = 2; // we only read the length first + + m_cmd.buffer = buffer; + m_cmd.buflen = 2; + m_cmd.data_direction = CGC_DATA_READ; + + if(ioctl(drive, CDROM_SEND_PACKET, &m_cmd) != 0) { + myDebug() << "FreeDBImporter::getCDText() - access error" << endl; + return cdtext; + } + + dataLen = from2Byte(buffer) + 2; + m_cmd.cmd[7] = 2048 >> 8; + m_cmd.cmd[8] = 2048 & 0xFF; + m_cmd.buflen = 2048; + ::ioctl(drive, CDROM_SEND_PACKET, &m_cmd); + dataLen = from2Byte(buffer) + 2; + + ::memset(buffer, 0, dataLen); + + m_cmd.cmd[7] = dataLen >> 8; + m_cmd.cmd[8] = dataLen; + m_cmd.buffer = buffer; + m_cmd.buflen = dataLen; + ::ioctl(drive, CDROM_SEND_PACKET, &m_cmd); + + bool rc = false; + int buffer_size = (buffer[0] << 8) | buffer[1]; + buffer_size -= 2; + + char data[SIZE]; + short pos_data = 0; + char old_block_no = 0xff; + for(uchar* bufptr = buffer + 4; buffer_size >= 18; bufptr += 18, buffer_size -= 18) { + char code = *bufptr; + + if((code & 0x80) != 0x80) { + continue; + } + + char block_no = *(bufptr + 3); + if(block_no & 0x80) { + myDebug() << "FreeDBImporter::readCDText() - double byte code not supported" << endl; + continue; + } + block_no &= 0x70; + + if(block_no != old_block_no) { + if(rc) { + break; + } + pos_data = 0; + old_block_no = block_no; + } + + track = *(bufptr + 1); + if(track & 0x80) { + continue; + } + + uchar* txtstr = bufptr + 4; + + int length = 11; + while(length >= 0 && *(txtstr + length) == '\0') { + --length; + } + + ++length; + if(length < 12) { + ++length; + } + + for(int j = 0; j < length; ++j) { + char c = *(txtstr + j); + if(c == '\0') { + data[pos_data] = c; + if(track == 0) { + if(code == (char)0xFFFFFF80) { + cdtext.title = QString::fromUtf8(data); + } else if(code == (char)0xFFFFFF81) { + cdtext.artist = QString::fromUtf8(data); + } else if (code == (char)0xFFFFFF85) { + cdtext.message = QString::fromUtf8(data); + } + } else { + if(code == (char)0xFFFFFF80) { + if(cdtext.trackTitles.size() < track) { + cdtext.trackTitles.resize(track); + } + cdtext.trackTitles[track-1] = QString::fromUtf8(data); + } else if(code == (char)0xFFFFFF81) { + if(cdtext.trackArtists.size() < track) { + cdtext.trackArtists.resize(track); + } + cdtext.trackArtists[track-1] = QString::fromUtf8(data); + } + } + rc = true; + pos_data = 0; + ++track; + } else if(pos_data < (SIZE - 1)) { + data[pos_data++] = c; + } + } + } + if(cdtext.trackTitles.size() != cdtext.trackArtists.size()) { + int size = QMAX(cdtext.trackTitles.size(), cdtext.trackArtists.size()); + cdtext.trackTitles.resize(size); + cdtext.trackArtists.resize(size); + } +#endif +#endif + return cdtext; +} +#undef SIZE diff --git a/src/translators/freedbimporter.cpp b/src/translators/freedbimporter.cpp new file mode 100644 index 0000000..14d92d8 --- /dev/null +++ b/src/translators/freedbimporter.cpp @@ -0,0 +1,556 @@ +/*************************************************************************** + copyright : (C) 2004-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "freedbimporter.h" +#include "../collections/musiccollection.h" +#include "../entry.h" +#include "../field.h" +#include "../latin1literal.h" +#include "../tellico_utils.h" +#include "../tellico_debug.h" +#include "../tellico_kernel.h" +#include "../progressmanager.h" + +#include <config.h> + +#ifdef HAVE_KCDDB +#ifdef QT_NO_CAST_ASCII +#define HAD_QT_NO_CAST_ASCII +#undef QT_NO_CAST_ASCII +#endif +#include <libkcddb/client.h> +#ifdef HAD_QT_NO_CAST_ASCII +#define QT_NO_CAST_ASCII +#undef HAD_QT_NO_CAST_ASCII +#endif +#endif + +#include <kcombobox.h> +#include <kconfig.h> +#include <kapplication.h> +#include <kinputdialog.h> + +#include <qfile.h> +#include <qdir.h> +#include <qlabel.h> +#include <qlayout.h> +#include <qgroupbox.h> +#include <qwhatsthis.h> +#include <qradiobutton.h> +#include <qbuttongroup.h> +#include <qhbox.h> +#include <qcheckbox.h> + +using Tellico::Import::FreeDBImporter; + +FreeDBImporter::FreeDBImporter() : Tellico::Import::Importer(), m_coll(0), m_widget(0), m_cancelled(false) { +} + +bool FreeDBImporter::canImport(int type) const { + return type == Data::Collection::Album; +} + +Tellico::Data::CollPtr FreeDBImporter::collection() { + if(m_coll) { + return m_coll; + } + + m_cancelled = false; + if(m_radioCDROM->isChecked()) { + readCDROM(); + } else { + readCache(); + } + if(m_cancelled) { + m_coll = 0; + } + return m_coll; +} + +void FreeDBImporter::readCDROM() { +#ifdef HAVE_KCDDB + QString drivePath = m_driveCombo->currentText(); + if(drivePath.isEmpty()) { + setStatusMessage(i18n("<qt>Tellico was unable to access the CD-ROM device - <i>%1</i>.</qt>").arg(drivePath)); + myDebug() << "FreeDBImporter::readCDROM() - no drive!" << endl; + return; + } + + // now it's ok to add device to saved list + m_driveCombo->insertItem(drivePath); + QStringList drives; + for(int i = 0; i < m_driveCombo->count(); ++i) { + if(drives.findIndex(m_driveCombo->text(i)) == -1) { + drives += m_driveCombo->text(i); + } + } + + { + KConfigGroup config(KGlobal::config(), QString::fromLatin1("ImportOptions - FreeDB")); + config.writeEntry("CD-ROM Devices", drives); + config.writeEntry("Last Device", drivePath); + config.writeEntry("Cache Files Only", false); + } + + QCString drive = QFile::encodeName(drivePath); + QValueList<uint> lengths; + KCDDB::TrackOffsetList list; +#if 0 + // a1107d0a - Kruder & Dorfmeister - The K&D Sessions - Disc One. +/* list + << 150 // First track start. + << 29462 + << 66983 + << 96785 + << 135628 + << 168676 + << 194147 + << 222158 + << 247076 + << 278203 // Last track start. + << 10 // Disc start. + << 316732; // Disc end. +*/ + list + << 150 // First track start. + << 3296 + << 14437 + << 41279 + << 51362 + << 56253 + << 59755 + << 61324 + << 66059 + << 69073 + << 77790 + << 83214 + << 89726 + << 92078 + << 106325 + << 113117 + << 116040 + << 119877 + << 124377 + << 145466 + << 157583 + << 167208 + << 173486 + << 180120 + << 185279 + << 193270 + << 206451 + << 217303 // Last track start. + << 10 // Disc start. + << 224925; // Disc end. +/* + list + << 150 + << 106965 + << 127220 + << 151925 + << 176085 + << 5 + << 234500; +*/ +#else + list = offsetList(drive, lengths); +#endif + + if(list.isEmpty()) { + setStatusMessage(i18n("<qt>Tellico was unable to access the CD-ROM device - <i>%1</i>.</qt>").arg(drivePath)); + return; + } +// myDebug() << KCDDB::CDDB::trackOffsetListToId(list) << endl; +// for(KCDDB::TrackOffsetList::iterator it = list.begin(); it != list.end(); ++it) { +// myDebug() << *it << endl; +// } + + // the result info, could be multiple ones + KCDDB::CDInfo info; + KCDDB::Client client; + client.setBlockingMode(true); + KCDDB::CDDB::Result r = client.lookup(list); + // KCDDB doesn't return MultipleRecordFound properly, so check outselves + if(r == KCDDB::CDDB::MultipleRecordFound || client.lookupResponse().count() > 1) { + QStringList list; + KCDDB::CDInfoList infoList = client.lookupResponse(); + for(KCDDB::CDInfoList::iterator it = infoList.begin(); it != infoList.end(); ++it) { + list.append(QString::fromLatin1("%1, %2, %3").arg((*it).artist) + .arg((*it).title) + .arg((*it).genre)); + } + + // switch back to pointer cursor + GUI::CursorSaver cs(Qt::arrowCursor); + bool ok; + QString res = KInputDialog::getItem(i18n("Select CDDB Entry"), + i18n("Select a CDDB entry:"), + list, 0, false, &ok, + Kernel::self()->widget()); + if(ok) { + uint i = 0; + for(QStringList::ConstIterator it = list.begin(); it != list.end(); ++it, ++i) { + if(*it == res) { + break; + } + } + if(i < infoList.size()) { + info = infoList[i]; + } + } else { // cancelled dialog + m_cancelled = true; + } + } else if(r == KCDDB::CDDB::Success) { + info = client.bestLookupResponse(); + } else { +// myDebug() << "FreeDBImporter::readCDROM() - no success! Return value = " << r << endl; + QString s; + switch(r) { + case KCDDB::CDDB::NoRecordFound: + s = i18n("<qt>No records were found to match the CD.</qt>"); + break; + case KCDDB::CDDB::ServerError: + myDebug() << "Server Error" << endl; + break; + case KCDDB::CDDB::HostNotFound: + myDebug() << "Host Not Found" << endl; + break; + case KCDDB::CDDB::NoResponse: + myDebug() << "No Response" << endl; + break; + case KCDDB::CDDB::UnknownError: + myDebug() << "Unknown Error" << endl; + break; + default: + break; + } + if(s.isEmpty()) { + s = i18n("<qt>Tellico was unable to complete the CD lookup.</qt>"); + } + setStatusMessage(s); + return; + } + + if(!info.isValid()) { + // go ahead and try to read cd-text if we weren't cancelled + // could be the case we don't have net access + if(!m_cancelled) { + readCDText(drive); + } + return; + } + + m_coll = new Data::MusicCollection(true); + + Data::EntryPtr entry = new Data::Entry(m_coll); + // obviously a CD + entry->setField(QString::fromLatin1("medium"), i18n("Compact Disc")); + entry->setField(QString::fromLatin1("title"), info.title); + entry->setField(QString::fromLatin1("artist"), info.artist); + entry->setField(QString::fromLatin1("genre"), info.genre); + if(info.year > 0) { + entry->setField(QString::fromLatin1("year"), QString::number(info.year)); + } + entry->setField(QString::fromLatin1("keyword"), info.category); + QString extd = info.extd; + extd.replace('\n', QString::fromLatin1("<br/>")); + entry->setField(QString::fromLatin1("comments"), extd); + + QStringList trackList; + KCDDB::TrackInfoList t = info.trackInfoList; + for(uint i = 0; i < t.count(); ++i) { +#if KDE_IS_VERSION(3,4,90) + QString s = t[i].get(QString::fromLatin1("title")).toString() + "::" + info.artist; +#else + QString s = t[i].title + "::" + info.artist; +#endif + if(i < lengths.count()) { + s += "::" + Tellico::minutes(lengths[i]); + } + trackList << s; + // TODO: KDE4 will probably have track length too + } + entry->setField(QString::fromLatin1("track"), trackList.join(QString::fromLatin1("; "))); + + m_coll->addEntries(entry); + readCDText(drive); +#endif +} + +void FreeDBImporter::readCache() { +#ifdef HAVE_KCDDB + { + // remember the import options + KConfigGroup config(KGlobal::config(), QString::fromLatin1("ImportOptions - FreeDB")); + config.writeEntry("Cache Files Only", true); + } + + KCDDB::Config cfg; + cfg.readConfig(); + + QStringList dirs = cfg.cacheLocations(); + for(QStringList::ConstIterator it = dirs.begin(); it != dirs.end(); ++it) { + dirs += Tellico::findAllSubDirs(*it); + } + + // using a QMap is a lazy man's way of getting unique keys + // the cddb info may be in multiple files, all with the same filename, the cddb id + QMap<QString, QString> files; + for(QStringList::ConstIterator it = dirs.begin(); it != dirs.end(); ++it) { + if((*it).isEmpty()) { + continue; + } + + QDir dir(*it); + dir.setFilter(QDir::Files | QDir::Readable | QDir::Hidden); // hidden since I want directory files + const QStringList list = dir.entryList(); + for(QStringList::ConstIterator it2 = list.begin(); it2 != list.end(); ++it2) { + files.insert(*it2, dir.absFilePath(*it2), false); + } +// kapp->processEvents(); // really needed ? + } + + const QString title = QString::fromLatin1("title"); + const QString artist = QString::fromLatin1("artist"); + const QString year = QString::fromLatin1("year"); + const QString genre = QString::fromLatin1("genre"); + const QString track = QString::fromLatin1("track"); + const QString comments = QString::fromLatin1("comments"); + uint numFiles = files.count(); + + if(numFiles == 0) { + myDebug() << "FreeDBImporter::readCache() - no files found" << endl; + return; + } + + m_coll = new Data::MusicCollection(true); + + const uint stepSize = QMAX(1, numFiles / 100); + const bool showProgress = options() & ImportProgress; + + ProgressItem& item = ProgressManager::self()->newProgressItem(this, progressLabel(), true); + item.setTotalSteps(numFiles); + connect(&item, SIGNAL(signalCancelled(ProgressItem*)), SLOT(slotCancel())); + ProgressItem::Done done(this); + + uint step = 1; + + KCDDB::CDInfo info; + for(QMap<QString, QString>::Iterator it = files.begin(); !m_cancelled && it != files.end(); ++it, ++step) { + // open file and read content + QFileInfo fileinfo(it.data()); // skip files larger than 10 kB + if(!fileinfo.exists() || !fileinfo.isReadable() || fileinfo.size() > 10*1024) { + myDebug() << "FreeDBImporter::readCache() - skipping " << it.data() << endl; + continue; + } + QFile file(it.data()); + if(!file.open(IO_ReadOnly)) { + continue; + } + QTextStream ts(&file); + // libkcddb always writes the cache files in utf-8 + ts.setEncoding(QTextStream::UnicodeUTF8); + QString cddbData = ts.read(); + file.close(); + + if(cddbData.isEmpty() || !info.load(cddbData) || !info.isValid()) { + myDebug() << "FreeDBImporter::readCache() - Error - CDDB record is not valid" << endl; + myDebug() << "FreeDBImporter::readCache() - File = " << it.data() << endl; + continue; + } + + // create a new entry and set fields + Data::EntryPtr entry = new Data::Entry(m_coll); + // obviously a CD + entry->setField(QString::fromLatin1("medium"), i18n("Compact Disc")); + entry->setField(title, info.title); + entry->setField(artist, info.artist); + entry->setField(genre, info.genre); + if(info.year > 0) { + entry->setField(QString::fromLatin1("year"), QString::number(info.year)); + } + entry->setField(QString::fromLatin1("keyword"), info.category); + QString extd = info.extd; + extd.replace('\n', QString::fromLatin1("<br/>")); + entry->setField(QString::fromLatin1("comments"), extd); + + // step through trackList + QStringList trackList; + KCDDB::TrackInfoList t = info.trackInfoList; + for(uint i = 0; i < t.count(); ++i) { +#if KDE_IS_VERSION(3,4,90) + trackList << t[i].get(QString::fromLatin1("title")).toString(); +#else + trackList << t[i].title; +#endif + } + entry->setField(track, trackList.join(QString::fromLatin1("; "))); + +#if 0 + // add CDDB info + const QString br = QString::fromLatin1("<br/>"); + QString comment; + if(!info.extd.isEmpty()) { + comment.append(info.extd + br); + } + if(!info.id.isEmpty()) { + comment.append(QString::fromLatin1("CDDB-ID: ") + info.id + br); + } + if(info.length > 0) { + comment.append("Length: " + QString::number(info.length) + br); + } + if(info.revision > 0) { + comment.append("Revision: " + QString::number(info.revision) + br); + } + entry->setField(comments, comment); +#endif + + // add this entry to the music collection + m_coll->addEntries(entry); + + if(showProgress && step%stepSize == 0) { + ProgressManager::self()->setProgress(this, step); + kapp->processEvents(); + } + } +#endif +} + +#define SETFIELD(name,value) \ + if(entry->field(QString::fromLatin1(name)).isEmpty()) { \ + entry->setField(QString::fromLatin1(name), value); \ + } + +void FreeDBImporter::readCDText(const QCString& drive_) { +#ifdef USE_CDTEXT + Data::EntryPtr entry; + if(m_coll) { + if(m_coll->entryCount() > 0) { + entry = m_coll->entries().front(); + } + } else { + m_coll = new Data::MusicCollection(true); + } + if(!entry) { + entry = new Data::Entry(m_coll); + entry->setField(QString::fromLatin1("medium"), i18n("Compact Disc")); + m_coll->addEntries(entry); + } + + CDText cdtext = getCDText(drive_); +/* + myDebug() << "CDText - title: " << cdtext.title << endl; + myDebug() << "CDText - title: " << cdtext.artist << endl; + for(int i = 0; i < cdtext.trackTitles.size(); ++i) { + myDebug() << i << "::" << cdtext.trackTitles[i] << " - " << cdtext.trackArtists[i] << endl; + } +*/ + + QString artist = cdtext.artist; + SETFIELD("title", cdtext.title); + SETFIELD("artist", artist); + SETFIELD("comments", cdtext.message); + QStringList tracks; + for(uint i = 0; i < cdtext.trackTitles.size(); ++i) { + tracks << cdtext.trackTitles[i] + "::" + cdtext.trackArtists[i]; + if(artist.isEmpty()) { + artist = cdtext.trackArtists[i]; + } + if(!artist.isEmpty() && artist.lower() != cdtext.trackArtists[i].lower()) { + artist = i18n("Various"); + } + } + SETFIELD("track", tracks.join(QString::fromLatin1("; "))); + + // something special for compilations and such + SETFIELD("title", i18n(Data::Collection::s_emptyGroupTitle)); + SETFIELD("artist", artist); +#endif +} +#undef SETFIELD + +QWidget* FreeDBImporter::widget(QWidget* parent_, const char* name_/*=0*/) { + if(m_widget) { + return m_widget; + } + m_widget = new QWidget(parent_, name_); + QVBoxLayout* l = new QVBoxLayout(m_widget); + + QGroupBox* bigbox = new QGroupBox(1, Qt::Horizontal, i18n("Audio CD Options"), m_widget); + + // cdrom stuff + QHBox* box = new QHBox(bigbox); + m_radioCDROM = new QRadioButton(i18n("Read data from CD-ROM device"), box); + m_driveCombo = new KComboBox(true, box); + m_driveCombo->setDuplicatesEnabled(false); + QString w = i18n("Select or input the CD-ROM device location."); + QWhatsThis::add(m_radioCDROM, w); + QWhatsThis::add(m_driveCombo, w); + + /********************************************************************************/ + + m_radioCache = new QRadioButton(i18n("Read all CDDB cache files only"), bigbox); + QWhatsThis::add(m_radioCache, i18n("Read data recursively from all the CDDB cache files " + "contained in the default cache folders.")); + + // cddb cache stuff + m_buttonGroup = new QButtonGroup(m_widget); + m_buttonGroup->hide(); // only use as button parent + m_buttonGroup->setExclusive(true); + m_buttonGroup->insert(m_radioCDROM); + m_buttonGroup->insert(m_radioCache); + connect(m_buttonGroup, SIGNAL(clicked(int)), SLOT(slotClicked(int))); + + l->addWidget(bigbox); + l->addStretch(1); + + // now read config options + KConfigGroup config(KGlobal::config(), QString::fromLatin1("ImportOptions - FreeDB")); + QStringList devices = config.readListEntry("CD-ROM Devices"); + if(devices.isEmpty()) { +#if defined(__OpenBSD__) + devices += QString::fromLatin1("/dev/rcd0c"); +#endif + devices += QString::fromLatin1("/dev/cdrom"); + devices += QString::fromLatin1("/dev/dvd"); + } + m_driveCombo->insertStringList(devices); + QString device = config.readEntry("Last Device"); + if(!device.isEmpty()) { + m_driveCombo->setCurrentText(device); + } + if(config.readBoolEntry("Cache Files Only", false)) { + m_radioCache->setChecked(true); + } else { + m_radioCDROM->setChecked(true); + } + // set enabled widgets + slotClicked(m_buttonGroup->selectedId()); + + return m_widget; +} + +void FreeDBImporter::slotClicked(int id_) { + QButton* button = m_buttonGroup->find(id_); + if(!button) { + return; + } + + m_driveCombo->setEnabled(button == m_radioCDROM); +} + +void FreeDBImporter::slotCancel() { + m_cancelled = true; +} + +#include "freedbimporter.moc" diff --git a/src/translators/freedbimporter.h b/src/translators/freedbimporter.h new file mode 100644 index 0000000..263f89d --- /dev/null +++ b/src/translators/freedbimporter.h @@ -0,0 +1,85 @@ +/*************************************************************************** + copyright : (C) 2004-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef FREEDBIMPORTER_H +#define FREEDBIMPORTER_H + +#include "importer.h" +#include "../datavectors.h" + +#include <qvaluevector.h> + +class QButtonGroup; +class QRadioButton; +class KComboBox; + +namespace Tellico { + namespace Import { + +/** + * The FreeDBImporter class takes care of importing audio files. + * + * @author Robby Stephenson + */ +class FreeDBImporter : public Importer { +Q_OBJECT + +public: + /** + */ + FreeDBImporter(); + + /** + */ + virtual Data::CollPtr collection(); + /** + */ + virtual QWidget* widget(QWidget* parent, const char* name=0); + virtual bool canImport(int type) const; + +public slots: + void slotCancel(); + +private slots: + void slotClicked(int id); + +private: + typedef QValueVector<QString> StringVector; + struct CDText { + friend class FreeDBImporter; + QString title; + QString artist; + QString message; + StringVector trackTitles; + StringVector trackArtists; + }; + + static QValueList<uint> offsetList(const QCString& drive, QValueList<uint>& trackLengths); + static CDText getCDText(const QCString& drive); + + void readCDROM(); + void readCache(); + void readCDText(const QCString& drive); + + Data::CollPtr m_coll; + QWidget* m_widget; + QButtonGroup* m_buttonGroup; + QRadioButton* m_radioCDROM; + QRadioButton* m_radioCache; + KComboBox* m_driveCombo; + bool m_cancelled : 1; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/gcfilmsexporter.cpp b/src/translators/gcfilmsexporter.cpp new file mode 100644 index 0000000..b172996 --- /dev/null +++ b/src/translators/gcfilmsexporter.cpp @@ -0,0 +1,235 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "gcfilmsexporter.h" +#include "../collection.h" +#include "../document.h" +#include "../filehandler.h" +#include "../latin1literal.h" +#include "../tellico_utils.h" +#include "../stringset.h" +#include "../tellico_kernel.h" +#include "../imagefactory.h" + +#include <klocale.h> +#include <kio/netaccess.h> + +namespace { + char GCFILMS_DELIMITER = '|'; +} + +using Tellico::Export::GCfilmsExporter; + +GCfilmsExporter::GCfilmsExporter() : Tellico::Export::Exporter() { +} + +QString GCfilmsExporter::formatString() const { + return i18n("GCfilms"); +} + +QString GCfilmsExporter::fileFilter() const { + return i18n("*.gcf|GCfilms Data Files (*.gcf)") + QChar('\n') + i18n("*|All Files"); +#if 0 + i18n("*.gcs|GCstar Data Files (*.gcs)") +#endif +} + +bool GCfilmsExporter::exec() { + Data::CollPtr coll = collection(); + if(!coll) { + return false; + } + + QString text; + QTextOStream ts(&text); + + ts << "GCfilms|" << coll->entryCount() << "|"; + if(options() & Export::ExportUTF8) { + ts << "UTF8" << endl; + } + + char d = GCFILMS_DELIMITER; + bool format = options() & Export::ExportFormatted; + // when importing GCfilms, a url field is added + bool hasURL = coll->hasField(QString::fromLatin1("url")) + && coll->fieldByName(QString::fromLatin1("url"))->type() == Data::Field::URL; + + uint minRating = 1; + uint maxRating = 5; + Data::FieldPtr f = coll->fieldByName(QString::fromLatin1("rating")); + if(f) { + bool ok; + uint n = Tellico::toUInt(f->property(QString::fromLatin1("minimum")), &ok); + if(ok) { + minRating = n; + } + n = Tellico::toUInt(f->property(QString::fromLatin1("maximum")), &ok); + if(ok) { + maxRating = n; + } + } + + // only going to export images if it's a local path + KURL imageDir; + if(url().isLocalFile()) { + imageDir = url(); + imageDir.cd(QString::fromLatin1("..")); + imageDir.addPath(url().fileName().section('.', 0, 0) + QString::fromLatin1("_images/")); + if(!KIO::NetAccess::exists(imageDir, false, 0)) { + bool success = KIO::NetAccess::mkdir(imageDir, Kernel::self()->widget()); + if(!success) { + imageDir = KURL(); // means don't write images + } + } + } + + QStringList images; + for(Data::EntryVec::ConstIterator entry = entries().begin(); entry != entries().end(); ++entry) { + ts << entry->id() << d; + push(ts, "title", entry, format); + push(ts, "year", entry, format); + push(ts, "running-time", entry, format); + push(ts, "director", entry, format); + push(ts, "nationality", entry, format); + push(ts, "genre", entry, format); + // do image + QString tmp = entry->field(QString::fromLatin1("cover")); + if(!tmp.isEmpty() && !imageDir.isEmpty()) { + images << tmp; + ts << imageDir.path() << tmp; + } + ts << d; + + // do not format cast since the commas could get mixed up + const QStringList cast = entry->fields(QString::fromLatin1("cast"), false); + for(QStringList::ConstIterator it = cast.begin(); it != cast.end(); ++it) { + ts << (*it).section(QString::fromLatin1("::"), 0, 0); + if(it != cast.fromLast()) { + ts << ", "; + } + } + ts << d; + + // values[9] is the original title + ts << d; + + push(ts, "plot", entry, format); + + if(hasURL) { + push(ts, "url", entry, format); + } else { + ts << d; + } + + // values[12] is whether the film has been viewed or not + ts << d; + + push(ts, "medium", entry, format); + // values[14] is number of DVDS? + ts << d; + // values[15] is place? + ts << d; + + // gcfilms's ratings go 0-10, just multiply by two + bool ok; + int rat = Tellico::toUInt(entry->field(QString::fromLatin1("rating"), format), &ok); + if(ok) { + ts << rat * 10/(maxRating-minRating); + } + ts << d; + + push(ts, "comments", entry, format); + push(ts, "language", entry, format); // ignoring audio-tracks + + push(ts, "subtitle", entry, format); + + // values[20] is borrower name, values[21] is loan date + if(entry->field(QString::fromLatin1("loaned")).isEmpty()) { + ts << d << d; + } else { + // find loan + bool found = false; + const Data::BorrowerVec& borrowers = Data::Document::self()->collection()->borrowers(); + for(Data::BorrowerVec::ConstIterator b = borrowers.begin(); b != borrowers.end() && !found; ++b) { + const Data::LoanVec& loans = b->loans(); + for(Data::LoanVec::ConstIterator loan = loans.begin(); loan != loans.end(); ++loan) { + if(entry.data() == loan->entry()) { + ts << b->name() << d; + ts << loan->loanDate().day() << '/' + << loan->loanDate().month() << '/' + << loan->loanDate().year() << d; + found = true; + break; + } + } + } + } + + // values[22] is history ? + ts << d; + + // for certification, only thing we can do is assume default american ratings + tmp = entry->field(QString::fromLatin1("certification"), format); + int age = 0; + if(tmp == Latin1Literal("U (USA)")) { + age = 1; + } else if(tmp == Latin1Literal("G (USA)")) { + age = 2; + } else if(tmp == Latin1Literal("PG (USA)")) { + age = 5; + } else if(tmp == Latin1Literal("PG-13 (USA)")) { + age = 13; + } else if(tmp == Latin1Literal("R (USA)")) { + age = 17; + } + if(age > 0) { + ts << age << d; + } + ts << d; + + // all done + ts << endl; + } + + StringSet imageSet; + for(QStringList::ConstIterator it = images.begin(); it != images.end(); ++it) { + if(imageSet.has(*it)) { + continue; + } + if(ImageFactory::writeImage(*it, imageDir)) { + imageSet.add(*it); + } else { + kdWarning() << "GCfilmsExporter::exec() - unable to write image file: " + << imageDir << *it << endl; + } + } + + return FileHandler::writeTextURL(url(), text, options() & Export::ExportUTF8, options() & Export::ExportForce); +} + +void GCfilmsExporter::push(QTextOStream& ts_, QCString fieldName_, Data::EntryVec::ConstIterator entry_, bool format_) { + Data::FieldPtr f = collection()->fieldByName(QString::fromLatin1(fieldName_)); + // don't format multiple names cause commas will cause problems + if(f->formatFlag() == Data::Field::FormatName && (f->flags() & Data::Field::AllowMultiple)) { + format_ = false; + } + QString s = entry_->field(QString::fromLatin1(fieldName_), format_); + if(f->flags() & Data::Field::AllowMultiple) { + ts_ << s.replace(QString::fromLatin1("; "), QChar(',')); + } else { + ts_ << s; + } + ts_ << GCFILMS_DELIMITER; +} + +#include "gcfilmsexporter.moc" diff --git a/src/translators/gcfilmsexporter.h b/src/translators/gcfilmsexporter.h new file mode 100644 index 0000000..50ee31c --- /dev/null +++ b/src/translators/gcfilmsexporter.h @@ -0,0 +1,46 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_EXPORT_GCFILMSEXPORTER_H +#define TELLICO_EXPORT_GCFILMSEXPORTER_H + +class QTextOStream; + +#include "exporter.h" + +namespace Tellico { + namespace Export { + +/** + * @author Robby Stephenson + */ +class GCfilmsExporter : public Exporter { +Q_OBJECT + +public: + GCfilmsExporter(); + + virtual bool exec(); + virtual QString formatString() const; + virtual QString fileFilter() const; + + // no options + virtual QWidget* widget(QWidget*, const char*) { return 0; } + +private: + void push(QTextOStream& ts, QCString fieldName, Data::EntryVec::ConstIterator entry, bool format); +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/gcfilmsimporter.cpp b/src/translators/gcfilmsimporter.cpp new file mode 100644 index 0000000..e2ff9ca --- /dev/null +++ b/src/translators/gcfilmsimporter.cpp @@ -0,0 +1,273 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "gcfilmsimporter.h" +#include "../collections/videocollection.h" +#include "../latin1literal.h" +#include "../tellico_utils.h" +#include "../imagefactory.h" +#include "../borrower.h" +#include "../progressmanager.h" +#include "xslthandler.h" +#include "tellicoimporter.h" + +#include <kapplication.h> +#include <kstandarddirs.h> + +#include <qtextcodec.h> + +#define CHECKLIMITS(n) if(values.count() <= n) continue + +using Tellico::Import::GCfilmsImporter; + +GCfilmsImporter::GCfilmsImporter(const KURL& url_) : TextImporter(url_), m_coll(0), m_cancelled(false) { +} + +bool GCfilmsImporter::canImport(int type) const { + return type == Data::Collection::Video + || type == Data::Collection::Book + || type == Data::Collection::Album + || type == Data::Collection::Game + || type == Data::Collection::Wine + || type == Data::Collection::Coin; +} + +Tellico::Data::CollPtr GCfilmsImporter::collection() { + if(m_coll) { + return m_coll; + } + + ProgressItem& item = ProgressManager::self()->newProgressItem(this, progressLabel(), true); + item.setTotalSteps(100); + connect(&item, SIGNAL(signalCancelled(ProgressItem*)), SLOT(slotCancel())); + ProgressItem::Done done(this); + + QString str = text(); + QTextIStream t(&str); + QString line = t.readLine(); + if(line.startsWith(QString::fromLatin1("GCfilms"))) { + readGCfilms(str); + } else { + // need to reparse the string if it's in utf-8 + if(line.lower().find(QString::fromLatin1("utf-8")) > 0) { + str = QString::fromUtf8(str.local8Bit()); + } + readGCstar(str); + } + return m_coll; +} + +void GCfilmsImporter::readGCfilms(const QString& text_) { + m_coll = new Data::VideoCollection(true); + bool hasURL = false; + if(m_coll->hasField(QString::fromLatin1("url"))) { + hasURL = m_coll->fieldByName(QString::fromLatin1("url"))->type() == Data::Field::URL; + } else { + Data::FieldPtr field = new Data::Field(QString::fromLatin1("url"), i18n("URL"), Data::Field::URL); + field->setCategory(i18n("General")); + m_coll->addField(field); + hasURL = true; + } + + bool convertUTF8 = false; + QMap<QString, Data::BorrowerPtr> borrowers; + const QRegExp rx(QString::fromLatin1("\\s*,\\s*")); + QRegExp year(QString::fromLatin1("\\d{4}")); + QRegExp runTimeHr(QString::fromLatin1("(\\d+)\\s?hr?")); + QRegExp runTimeMin(QString::fromLatin1("(\\d+)\\s?mi?n?")); + + bool gotFirstLine = false; + uint total = 0; + + QTextIStream t(&text_); + + const uint length = text_.length(); + const uint stepSize = QMAX(s_stepSize, length/100); + const bool showProgress = options() & ImportProgress; + + ProgressManager::self()->setTotalSteps(this, length); + uint j = 0; + for(QString line = t.readLine(); !m_cancelled && !line.isNull(); line = t.readLine(), j += line.length()) { + // string was wrongly converted + QStringList values = QStringList::split('|', (convertUTF8 ? QString::fromUtf8(line.local8Bit()) : line), true); + if(values.empty()) { + continue; + } + + if(!gotFirstLine) { + if(values[0] != Latin1Literal("GCfilms")) { + setStatusMessage(i18n("<qt>The file is not a valid GCstar data file.</qt>")); + m_coll = 0; + return; + } + total = Tellico::toUInt(values[1], 0)+1; // number of lines really + if(values.size() > 2 && values[2] == Latin1Literal("UTF8")) { + // if locale encoding isn't utf8, need to do a reconversion + QTextCodec* codec = QTextCodec::codecForLocale(); + if(QCString(codec->name()).find("utf-8", 0, false) == -1) { + convertUTF8 = true; + } + } + gotFirstLine = true; + continue; + } + + bool ok; + + Data::EntryPtr entry = new Data::Entry(m_coll); + entry->setId(Tellico::toUInt(values[0], &ok)); + entry->setField(QString::fromLatin1("title"), values[1]); + if(year.search(values[2]) > -1) { + entry->setField(QString::fromLatin1("year"), year.cap()); + } + + uint time = 0; + if(runTimeHr.search(values[3]) > -1) { + time = Tellico::toUInt(runTimeHr.cap(1), &ok) * 60; + } + if(runTimeMin.search(values[3]) > -1) { + time += Tellico::toUInt(runTimeMin.cap(1), &ok); + } + if(time > 0) { + entry->setField(QString::fromLatin1("running-time"), QString::number(time)); + } + + entry->setField(QString::fromLatin1("director"), splitJoin(rx, values[4])); + entry->setField(QString::fromLatin1("nationality"), splitJoin(rx, values[5])); + entry->setField(QString::fromLatin1("genre"), splitJoin(rx, values[6])); + KURL u = KURL::fromPathOrURL(values[7]); + if(!u.isEmpty()) { + QString id = ImageFactory::addImage(u, true /* quiet */); + if(!id.isEmpty()) { + entry->setField(QString::fromLatin1("cover"), id); + } + } + entry->setField(QString::fromLatin1("cast"), splitJoin(rx, values[8])); + // values[9] is the original title + entry->setField(QString::fromLatin1("plot"), values[10]); + if(hasURL) { + entry->setField(QString::fromLatin1("url"), values[11]); + } + + CHECKLIMITS(12); + + // values[12] is whether the film has been viewed or not + entry->setField(QString::fromLatin1("medium"), values[13]); + // values[14] is number of DVDS? + // values[15] is place? + // gcfilms's ratings go 0-10, just divide by two + entry->setField(QString::fromLatin1("rating"), QString::number(int(Tellico::toUInt(values[16], &ok)/2))); + entry->setField(QString::fromLatin1("comments"), values[17]); + + CHECKLIMITS(18); + + QStringList s = QStringList::split(',', values[18]); + QStringList tracks, langs; + for(QStringList::ConstIterator it = s.begin(); it != s.end(); ++it) { + langs << (*it).section(';', 0, 0); + tracks << (*it).section(';', 1, 1); + } + entry->setField(QString::fromLatin1("language"), langs.join(QString::fromLatin1("; "))); + entry->setField(QString::fromLatin1("audio-track"), tracks.join(QString::fromLatin1("; "))); + + entry->setField(QString::fromLatin1("subtitle"), splitJoin(rx, values[19])); + + CHECKLIMITS(20); + + // values[20] is borrower name + if(!values[20].isEmpty()) { + QString tmp = values[20]; + Data::BorrowerPtr b = borrowers[tmp]; + if(!b) { + b = new Data::Borrower(tmp, QString()); + borrowers.insert(tmp, b); + } + // values[21] is loan date + if(!values[21].isEmpty()) { + tmp = values[21]; // assume date is dd/mm/yyyy + int d = Tellico::toUInt(tmp.section('/', 0, 0), &ok); + int m = Tellico::toUInt(tmp.section('/', 1, 1), &ok); + int y = Tellico::toUInt(tmp.section('/', 2, 2), &ok); + b->addLoan(new Data::Loan(entry, QDate(y, m, d), QDate(), QString())); + entry->setField(QString::fromLatin1("loaned"), QString::fromLatin1("true")); + } + } + // values[22] is history ? + // for certification, only thing we can do is assume default american ratings + // they're not translated one for one + CHECKLIMITS(23); + + int age = Tellico::toUInt(values[23], &ok); + if(age < 2) { + entry->setField(QString::fromLatin1("certification"), QString::fromLatin1("U (USA)")); + } else if(age < 3) { + entry->setField(QString::fromLatin1("certification"), QString::fromLatin1("G (USA)")); + } else if(age < 6) { + entry->setField(QString::fromLatin1("certification"), QString::fromLatin1("PG (USA)")); + } else if(age < 14) { + entry->setField(QString::fromLatin1("certification"), QString::fromLatin1("PG-13 (USA)")); + } else { + entry->setField(QString::fromLatin1("certification"), QString::fromLatin1("R (USA)")); + } + + m_coll->addEntries(entry); + + if(showProgress && j%stepSize == 0) { + ProgressManager::self()->setProgress(this, j); + kapp->processEvents(); + } + } + + if(m_cancelled) { + m_coll = 0; + return; + } + + for(QMap<QString, Data::BorrowerPtr>::Iterator it = borrowers.begin(); it != borrowers.end(); ++it) { + if(!it.data()->isEmpty()) { + m_coll->addBorrower(it.data()); + } + } +} + +void GCfilmsImporter::readGCstar(const QString& text_) { + QString xsltFile = locate("appdata", QString::fromLatin1("gcstar2tellico.xsl")); + XSLTHandler handler(xsltFile); + if(!handler.isValid()) { + setStatusMessage(i18n("Tellico encountered an error in XSLT processing.")); + return; + } + + QString str = handler.applyStylesheet(text_); + + if(str.isEmpty()) { + setStatusMessage(i18n("<qt>The file is not a valid GCstar data file.</qt>")); + return; + } + + Import::TellicoImporter imp(str); + m_coll = imp.collection(); + setStatusMessage(imp.statusMessage()); +} + +inline +QString GCfilmsImporter::splitJoin(const QRegExp& rx, const QString& s) { + return QStringList::split(rx, s, false).join(QString::fromLatin1("; ")); +} + +void GCfilmsImporter::slotCancel() { + m_cancelled = true; +} + +#undef CHECKLIMITS +#include "gcfilmsimporter.moc" diff --git a/src/translators/gcfilmsimporter.h b/src/translators/gcfilmsimporter.h new file mode 100644 index 0000000..8fa9a0d --- /dev/null +++ b/src/translators/gcfilmsimporter.h @@ -0,0 +1,60 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_IMPORT_GCFILMSIMPORTER_H +#define TELLICO_IMPORT_GCFILMSIMPORTER_H + +#include "textimporter.h" +#include "../datavectors.h" + +class QRegExp; + +namespace Tellico { + namespace Import { + +/** + * @author Robby Stephenson +*/ +class GCfilmsImporter : public TextImporter { +Q_OBJECT + +public: + /** + */ + GCfilmsImporter(const KURL& url); + + /** + * + */ + virtual Data::CollPtr collection(); + /** + */ + virtual QWidget* widget(QWidget*, const char*) { return 0; } + virtual bool canImport(int type) const; + +public slots: + void slotCancel(); + +private: + static QString splitJoin(const QRegExp& rx, const QString& s); + + void readGCfilms(const QString& text); + void readGCstar(const QString& text); + + Data::CollPtr m_coll; + bool m_cancelled; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/griffith2tellico.py b/src/translators/griffith2tellico.py new file mode 100755 index 0000000..24bfb41 --- /dev/null +++ b/src/translators/griffith2tellico.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- + +# *************************************************************************** +# copyright : (C) 2007 by Robby Stephenson +# email : robby@periapsis.org +# based on : fr.allocine.py by Mathias Monnerville +# *************************************************************************** +# +# *************************************************************************** +# * * +# * This program is free software; you can redistribute it and/or modify * +# * it under the terms of version 2 of the GNU General Public License as * +# * published by the Free Software Foundation; * +# * * +# *************************************************************************** + +import os, sys +import base64 +import xml.dom.minidom +try: + import sqlite3 +except: + print sys.stderr, "The Python sqlite3 module is required to import Griffith databases." + exit(1) + +DB_PATH = os.environ['HOME'] + '/.griffith/griffith.db' +POSTERS_PATH = os.environ['HOME'] + '/.griffith/posters/' + +XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>""" +DOCTYPE = """<!DOCTYPE tellico PUBLIC "-//Robby Stephenson/DTD Tellico V9.0//EN" "http://periapsis.org/tellico/dtd/v9/tellico.dtd">""" + +class BasicTellicoDOM: + def __init__(self): + self.__doc = xml.dom.minidom.Document() + self.__root = self.__doc.createElement('tellico') + self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/') + self.__root.setAttribute('syntaxVersion', '9') + + self.__collection = self.__doc.createElement('collection') + self.__collection.setAttribute('title', 'Griffith Import') + self.__collection.setAttribute('type', '3') + + self.__fields = self.__doc.createElement('fields') + # Add all default (standard) fields + self.__dfltField = self.__doc.createElement('field') + self.__dfltField.setAttribute('name', '_default') + + # change the rating to have a maximum of 10 + self.__ratingField = self.__doc.createElement('field') + self.__ratingField.setAttribute('name', 'rating') + self.__ratingField.setAttribute('title', 'Personal Rating') + self.__ratingField.setAttribute('flags', '2') + self.__ratingField.setAttribute('category', 'Personal') + self.__ratingField.setAttribute('format', '4') + self.__ratingField.setAttribute('type', '14') + self.__ratingField.setAttribute('i18n', 'yes') + propNode = self.__doc.createElement('prop') + propNode.setAttribute('name', 'maximum') + propNode.appendChild(self.__doc.createTextNode('10')) + self.__ratingField.appendChild(propNode); + propNode = self.__doc.createElement('prop') + propNode.setAttribute('name', 'minimum') + propNode.appendChild(self.__doc.createTextNode('1')) + self.__ratingField.appendChild(propNode); + + # Add a custom 'Original Title' field + self.__titleField = self.__doc.createElement('field') + self.__titleField.setAttribute('name', 'orig-title') + self.__titleField.setAttribute('title', 'Original Title') + self.__titleField.setAttribute('flags', '8') + self.__titleField.setAttribute('category', 'General') + self.__titleField.setAttribute('format', '1') + self.__titleField.setAttribute('type', '1') + self.__titleField.setAttribute('i18n', 'yes') + + self.__keywordField = self.__doc.createElement('field') + self.__keywordField.setAttribute('name', 'keyword') + self.__keywordField.setAttribute('title', 'Keywords') + self.__keywordField.setAttribute('flags', '7') + self.__keywordField.setAttribute('category', 'Personal') + self.__keywordField.setAttribute('format', '4') + self.__keywordField.setAttribute('type', '1') + self.__keywordField.setAttribute('i18n', 'yes') + + self.__urlField = self.__doc.createElement('field') + self.__urlField.setAttribute('name', 'url') + self.__urlField.setAttribute('title', 'URL') + self.__urlField.setAttribute('flags', '0') + self.__urlField.setAttribute('category', 'General') + self.__urlField.setAttribute('format', '4') + self.__urlField.setAttribute('type', '7') + self.__urlField.setAttribute('i18n', 'yes') + + self.__fields.appendChild(self.__dfltField) + self.__fields.appendChild(self.__ratingField) + self.__fields.appendChild(self.__titleField) + self.__fields.appendChild(self.__keywordField) + self.__fields.appendChild(self.__urlField) + self.__collection.appendChild(self.__fields) + + self.__images = self.__doc.createElement('images') + + self.__root.appendChild(self.__collection) + self.__doc.appendChild(self.__root) + self.__fieldsMap = dict(country='nationality', + classification='certification', + runtime='running-time', + o_title='orig-title', + notes='comments', + image='cover', + tag='keyword', + site='url') + + + def addMedia(self, media): + if len(media) == 0: return + # add default Tellico values + orig_media = 'DVD;VHS;VCD;DivX;Blu-ray;HD DVD'.split(';') + orig_media.extend(media) + # make sure unique + set = {} + media = [set.setdefault(e,e) for e in orig_media if e not in set] + + mediaField = self.__doc.createElement('field') + mediaField.setAttribute('name', 'medium') + mediaField.setAttribute('title', 'Medium') + mediaField.setAttribute('flags', '2') + mediaField.setAttribute('category', 'General') + mediaField.setAttribute('format', '4') + mediaField.setAttribute('type', '3') + mediaField.setAttribute('i18n', 'yes') + mediaField.setAttribute('allowed', ';'.join(media)) + self.__fields.appendChild(mediaField) + + def addEntry(self, movieData): + """ + Add a movie entry + """ + entryNode = self.__doc.createElement('entry') + entryNode.setAttribute('id', movieData['id']) + + for key, values in movieData.iteritems(): + if key == 'id': + continue + + if self.__fieldsMap.has_key(key): + field = self.__fieldsMap[key] + else: + field = key + + parentNode = self.__doc.createElement(field + 's') + + for value in values: + if len(value) == 0: continue + node = self.__doc.createElement(field) + if field == 'certification': value += " (USA)" + elif field == 'region': value = "Region " + value + elif field == 'cover': + imageNode = self.__doc.createElement('image') + imageNode.setAttribute('format', 'JPEG') + imageNode.setAttribute('id', value[0]) + imageNode.appendChild(self.__doc.createTextNode(value[1])) + self.__images.appendChild(imageNode) + value = value[0] # value was (id, md5) + + if field == 'cast': + for v in value: + columnNode = self.__doc.createElement('column') + columnNode.appendChild(self.__doc.createTextNode(v.strip())) + node.appendChild(columnNode) + + else: + node.appendChild(self.__doc.createTextNode(value.strip())) + + if node.hasChildNodes(): parentNode.appendChild(node) + + if parentNode.hasChildNodes(): entryNode.appendChild(parentNode) + + self.__collection.appendChild(entryNode) + + def printXML(self): + """ + Outputs XML content to stdout + """ + self.__collection.appendChild(self.__images) + print XML_HEADER; print DOCTYPE + print self.__root.toxml() + + +class GriffithParser: + def __init__(self): + self.__dbPath = DB_PATH + self.__domTree = BasicTellicoDOM() + + def run(self): + """ + Runs the parser: fetch movie ids, then fills and prints the DOM tree + to stdout (in tellico format) so that tellico can use it. + """ + self.__conn = sqlite3.connect(self.__dbPath) + self.__loadDatabase() + # Print results to stdout + self.__domTree.printXML() + + def __addMediaValues(self): + c = self.__conn.cursor() + c.execute("SELECT name FROM media") + + media = list([row[0].encode('utf-8') for row in c.fetchall()]) + self.__domTree.addMedia(media) + + + def __fetchMovieIds(self): + """ + Retrieve all movie ids + """ + c = self.__conn.cursor() + c.execute("SELECT movie_id FROM movies") + data = c.fetchall() + dataList = [row[0] for row in data] + return dataList + + def __fetchMovieInfo(self, id): + """ + Fetches movie information + """ + #cast is a reserved word + columns = ('title','director','rating','year','region', + 'country','genre','classification','plot', + 'runtime','o_title','studio','notes','image', + '[cast]','loaned','color','site') + + c = self.__conn.cursor() + c.execute("SELECT %s FROM movies WHERE movie_id=%s" % (','.join(columns),id)) + row = c.fetchone() + + data = {} + data['id'] = str(id) + + for i in range(len(columns)): + if row[i] == None : continue + + try: + value = row[i].encode('utf-8') + except: + value = str(row[i]) + + col = columns[i].replace('[','').replace(']','') + + if col == 'genre' or col == 'studio': + values = value.split('/') + elif col == 'plot' or col == 'notes': + value = value.replace('\n', '\n<br/>') + values = (value,) + elif col == 'cast': + values = [] + lines = value.split('\n') + for line in lines: + cast = line.split('as') + values.append(cast) + elif col == 'image': + imgfile = POSTERS_PATH + value + '.jpg' + img = file(imgfile,'rb').read() + values = ((value + '.jpg', base64.encodestring(img)),) + elif col == 'loaned': + if value == '0': value = '' + values = (value,) + elif col == 'color': + if value == '1': value = 'Color' + elif value == '2': value = 'Black & White' + values = (value,) + else: + values = (value,) + col = col.replace('"','') + data[col] = values + + # get medium + c.execute("SELECT name FROM media WHERE medium_id IN (SELECT medium_id FROM movies WHERE movie_id=%s)" % id) + + media = list([row[0].encode('utf-8') for row in c.fetchall()]) + if len(media) > 0: data['medium'] = media + + # get all tags + c.execute("SELECT name FROM tags WHERE tag_id IN (SELECT tag_id FROM movie_tag WHERE movie_id=%s)" % id) + + tags = list([row[0].encode('utf-8') for row in c.fetchall()]) + if len(tags) > 0: data['tag'] = tags + + # get all languages + c.execute("SELECT name FROM languages WHERE lang_id IN (SELECT lang_id FROM movie_lang WHERE movie_id=%s)" % id) + + langs = list([row[0].encode('utf-8') for row in c.fetchall()]) + if len(langs) > 0: data['language'] = langs + + return data + + + def __loadDatabase(self): + # Get all ids + self.__addMediaValues(); + ids = self.__fetchMovieIds() + + # Now retrieve data + if ids: + for entry in ids: + data = self.__fetchMovieInfo(entry) + self.__domTree.addEntry(data) + else: + return None + + + +def main(): + parser = GriffithParser() + parser.run() + +if __name__ == '__main__': + main() diff --git a/src/translators/griffithimporter.cpp b/src/translators/griffithimporter.cpp new file mode 100644 index 0000000..8b0394f --- /dev/null +++ b/src/translators/griffithimporter.cpp @@ -0,0 +1,107 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "griffithimporter.h" +#include "../collections/videocollection.h" +#include "tellicoimporter.h" +#include "../tellico_debug.h" + +#include <kglobal.h> +#include <kstandarddirs.h> +#include <kprocess.h> + +#include <qdir.h> +#include <qfile.h> + +using Tellico::Import::GriffithImporter; + +GriffithImporter::~GriffithImporter() { + if(m_process) { + m_process->kill(); + delete m_process; + m_process = 0; + } +} + +Tellico::Data::CollPtr GriffithImporter::collection() { + QString filename = QDir::homeDirPath() + QString::fromLatin1("/.griffith/griffith.db"); + if(!QFile::exists(filename)) { + myWarning() << "GriffithImporter::collection() - database not found: " << filename << endl; + return 0; + } + + QString python = KStandardDirs::findExe(QString::fromLatin1("python")); + if(python.isEmpty()) { + myWarning() << "GriffithImporter::collection() - python not found!" << endl; + return 0; + } + + QString griffith = KGlobal::dirs()->findResource("appdata", QString::fromLatin1("griffith2tellico.py")); + if(griffith.isEmpty()) { + myWarning() << "GriffithImporter::collection() - griffith2tellico.py not found!" << endl; + return 0; + } + + m_process = new KProcess(); + connect(m_process, SIGNAL(receivedStdout(KProcess*, char*, int)), SLOT(slotData(KProcess*, char*, int))); + connect(m_process, SIGNAL(receivedStderr(KProcess*, char*, int)), SLOT(slotError(KProcess*, char*, int))); + connect(m_process, SIGNAL(processExited(KProcess*)), SLOT(slotProcessExited(KProcess*))); + *m_process << python << griffith; + if(!m_process->start(KProcess::Block, KProcess::AllOutput)) { + myDebug() << "ExecExternalFetcher::startSearch() - process failed to start" << endl; + return 0; + } + + return m_coll; +} + +void GriffithImporter::slotData(KProcess*, char* buffer_, int len_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(buffer_, len_); +} + +void GriffithImporter::slotError(KProcess*, char* buffer_, int len_) { + QString msg = QString::fromLocal8Bit(buffer_, len_); + myDebug() << "GriffithImporter::slotError() - " << msg << endl; + setStatusMessage(msg); +} + + +void GriffithImporter::slotProcessExited(KProcess*) { +// myDebug() << "GriffithImporter::slotProcessExited()" << endl; + if(!m_process->normalExit() || m_process->exitStatus()) { + myDebug() << "GriffithImporter::slotProcessExited() - process did not exit successfully" << endl; + return; + } + + if(m_data.isEmpty()) { + myDebug() << "GriffithImporter::slotProcessExited() - no data" << endl; + return; + } + + QString text = QString::fromUtf8(m_data, m_data.size()); + TellicoImporter imp(text); + + m_coll = imp.collection(); + if(!m_coll) { + myDebug() << "GriffithImporter::slotProcessExited() - no collection pointer" << endl; + } else { + myLog() << "GriffithImporter::slotProcessExited() - results found: " << m_coll->entryCount() << endl; + } +} + +bool GriffithImporter::canImport(int type) const { + return type == Data::Collection::Video; +} + +#include "griffithimporter.moc" diff --git a/src/translators/griffithimporter.h b/src/translators/griffithimporter.h new file mode 100644 index 0000000..60bae07 --- /dev/null +++ b/src/translators/griffithimporter.h @@ -0,0 +1,63 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef GRIFFITHIMPORTER_H +#define GRIFFITHIMPORTER_H + +#include "importer.h" +#include "../datavectors.h" + +class KProcess; + +namespace Tellico { + namespace Import { + +/** + * An importer for importing collections used by Griffith, a movie colleciton manager. + * + * The database is assumed to be $HOME/.griffith/griffith.db. The file format is sqlite3, + * and a python script, depending on pysqlite, i sused to import the database + * + * @author Robby Stephenson + */ +class GriffithImporter : public Importer { +Q_OBJECT + +public: + /** + */ + GriffithImporter() : Importer(), m_coll(0), m_process(0) {} + /** + */ + virtual ~GriffithImporter(); + + /** + */ + virtual Data::CollPtr collection(); + virtual bool canImport(int type) const; + +private slots: + void slotData(KProcess* proc, char* buffer, int len); + void slotError(KProcess* proc, char* buffer, int len); + void slotProcessExited(KProcess* proc); + +private: + Data::CollPtr m_coll; + + KProcess* m_process; + QByteArray m_data; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/grs1importer.cpp b/src/translators/grs1importer.cpp new file mode 100644 index 0000000..7eca9e3 --- /dev/null +++ b/src/translators/grs1importer.cpp @@ -0,0 +1,130 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "grs1importer.h" +#include "../collections/bibtexcollection.h" +#include "../entry.h" +#include "../field.h" +#include "../latin1literal.h" +#include "../tellico_debug.h" + +using Tellico::Import::GRS1Importer; +GRS1Importer::TagMap* GRS1Importer::s_tagMap = 0; + +// static +void GRS1Importer::initTagMap() { + if(!s_tagMap) { + s_tagMap = new TagMap(); + // BT is special and is handled separately + s_tagMap->insert(TagPair(2, 1), QString::fromLatin1("title")); + s_tagMap->insert(TagPair(2, 2), QString::fromLatin1("author")); + s_tagMap->insert(TagPair(2, 4), QString::fromLatin1("year")); + s_tagMap->insert(TagPair(2, 7), QString::fromLatin1("publisher")); + s_tagMap->insert(TagPair(2, 31), QString::fromLatin1("publisher")); + s_tagMap->insert(TagPair(2, 20), QString::fromLatin1("language")); + s_tagMap->insert(TagPair(2, 21), QString::fromLatin1("keyword")); + s_tagMap->insert(TagPair(3, QString::fromLatin1("isbn/issn")), QString::fromLatin1("isbn")); + s_tagMap->insert(TagPair(3, QString::fromLatin1("isbn")), QString::fromLatin1("isbn")); + s_tagMap->insert(TagPair(3, QString::fromLatin1("notes")), QString::fromLatin1("note")); + s_tagMap->insert(TagPair(3, QString::fromLatin1("note")), QString::fromLatin1("note")); + s_tagMap->insert(TagPair(3, QString::fromLatin1("series")), QString::fromLatin1("series")); + s_tagMap->insert(TagPair(3, QString::fromLatin1("physical description")), QString::fromLatin1("note")); + s_tagMap->insert(TagPair(3, QString::fromLatin1("subtitle")), QString::fromLatin1("subtitle")); + } +} + +GRS1Importer::GRS1Importer(const QString& text_) : TextImporter(text_) { + initTagMap(); +} + +bool GRS1Importer::canImport(int type) const { + return type == Data::Collection::Bibtex; +} + +Tellico::Data::CollPtr GRS1Importer::collection() { + Data::CollPtr coll = new Data::BibtexCollection(true); + + Data::FieldPtr f = new Data::Field(QString::fromLatin1("isbn"), i18n("ISBN#")); + f->setCategory(i18n("Publishing")); + f->setDescription(i18n("International Standard Book Number")); + coll->addField(f); + + f = new Data::Field(QString::fromLatin1("language"), i18n("Language")); + f->setCategory(i18n("Publishing")); + f->setFlags(Data::Field::AllowCompletion | Data::Field::AllowGrouped | Data::Field::AllowMultiple); + coll->addField(f); + + Data::EntryPtr e = new Data::Entry(coll); + bool empty = true; + + // in format "(tag, tag) value" + QRegExp rx(QString::fromLatin1("\\s*\\((\\d+),\\s*(.+)\\s*\\)\\s*(.+)\\s*")); +// rx.setMinimal(true); + QRegExp dateRx(QString::fromLatin1(",[^,]*\\d{3,4}[^,]*")); // remove dates from authors + QRegExp pubRx(QString::fromLatin1("([^:]+):([^,]+),?")); // split location and publisher + + bool ok; + int n; + QVariant v; + QString tmp, field, val, str = text(); + if(str.isEmpty()) { + return 0; + } + QTextStream t(&str, IO_ReadOnly); + for(QString line = t.readLine(); !line.isNull(); line = t.readLine()) { +// myDebug() << line << endl; + if(!rx.exactMatch(line)) { + continue; + } + n = rx.cap(1).toInt(); + v = rx.cap(2).toInt(&ok); + if(!ok) { + v = rx.cap(2).lower(); + } + field = (*s_tagMap)[TagPair(n, v)]; + if(field.isEmpty()) { + continue; + } +// myDebug() << "field is " << field << endl; + // assume if multiple values, it's allowed + val = rx.cap(3).stripWhiteSpace(); + if(val.isEmpty()) { + continue; + } + empty = false; + if(field == Latin1Literal("title")) { + val = val.section('/', 0, 0).stripWhiteSpace(); // only take portion of title before slash + } else if(field == Latin1Literal("author")) { + val.replace(dateRx, QString::null); + } else if(field == Latin1Literal("publisher")) { + int pos = val.find(pubRx); + if(pos > -1) { + e->setField(QString::fromLatin1("address"), pubRx.cap(1)); + val = pubRx.cap(2); + } + } + + tmp = e->field(field); + if(!tmp.isEmpty()) { + tmp += QString::fromLatin1("; "); + } + e->setField(field, tmp + val); + } + + if(!empty) { + coll->addEntries(e); + } + return coll; +} + +#include "grs1importer.moc" diff --git a/src/translators/grs1importer.h b/src/translators/grs1importer.h new file mode 100644 index 0000000..a4929a4 --- /dev/null +++ b/src/translators/grs1importer.h @@ -0,0 +1,65 @@ +/*************************************************************************** + copyright : (C) 2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_IMPORT_GRS1IMPORTER_H +#define TELLICO_IMPORT_GRS1IMPORTER_H + +#include "textimporter.h" +#include "../datavectors.h" + +#include <qvariant.h> +#include <qmap.h> +#include <qpair.h> + +namespace Tellico { + namespace Import { + +/** + * @author Robby Stephenson + */ +class GRS1Importer : public TextImporter { +Q_OBJECT + +public: + GRS1Importer(const QString& text); + virtual ~GRS1Importer() {} + + /** + * @return A pointer to a @ref Data::Collection, or 0 if none can be created. + */ + virtual Data::CollPtr collection(); + /** + */ + virtual QWidget* widget(QWidget*, const char*) { return 0; } + virtual bool canImport(int type) const; + +private: + static void initTagMap(); + + class TagPair : public QPair<int, QVariant> { + public: + TagPair() : QPair<int, QVariant>(-1, QVariant()) {} + TagPair(int n, const QVariant& v) : QPair<int, QVariant>(n, v) {} + QString toString() const { return QString::number(first) + second.toString(); } + bool operator< (const TagPair& p) const { + return toString() < p.toString(); + } + }; + + typedef QMap<TagPair, QString> TagMap; + static TagMap* s_tagMap; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/htmlexporter.cpp b/src/translators/htmlexporter.cpp new file mode 100644 index 0000000..e947793 --- /dev/null +++ b/src/translators/htmlexporter.cpp @@ -0,0 +1,815 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "htmlexporter.h" +#include "xslthandler.h" +#include "tellicoxmlexporter.h" +#include "../document.h" +#include "../collection.h" +#include "../filehandler.h" +#include "../imagefactory.h" +#include "../latin1literal.h" +#include "../tellico_kernel.h" +#include "../tellico_utils.h" +#include "../progressmanager.h" +#include "../core/tellico_config.h" +#include "../tellico_debug.h" + +#include <kstandarddirs.h> +#include <kconfig.h> +#include <kglobal.h> +#include <kio/netaccess.h> +#include <kapplication.h> +#include <klocale.h> + +#include <qdom.h> +#include <qgroupbox.h> +#include <qlayout.h> +#include <qcheckbox.h> +#include <qwhatsthis.h> +#include <qfile.h> +#include <qhbox.h> +#include <qlabel.h> + +extern "C" { +#include <libxml/HTMLparser.h> +#include <libxml/HTMLtree.h> +} + +using Tellico::Export::HTMLExporter; + +HTMLExporter::HTMLExporter() : Tellico::Export::Exporter(), + m_handler(0), + m_printHeaders(true), + m_printGrouped(false), + m_exportEntryFiles(false), + m_cancelled(false), + m_parseDOM(true), + m_checkCreateDir(true), + m_imageWidth(0), + m_imageHeight(0), + m_widget(0), + m_xsltFile(QString::fromLatin1("tellico2html.xsl")) { +} + +HTMLExporter::HTMLExporter(Data::CollPtr coll_) : Tellico::Export::Exporter(coll_), + m_handler(0), + m_printHeaders(true), + m_printGrouped(false), + m_exportEntryFiles(false), + m_cancelled(false), + m_parseDOM(true), + m_checkCreateDir(true), + m_imageWidth(0), + m_imageHeight(0), + m_widget(0), + m_xsltFile(QString::fromLatin1("tellico2html.xsl")) { +} + +HTMLExporter::~HTMLExporter() { + delete m_handler; + m_handler = 0; +} + +QString HTMLExporter::formatString() const { + return i18n("HTML"); +} + +QString HTMLExporter::fileFilter() const { + return i18n("*.html|HTML Files (*.html)") + QChar('\n') + i18n("*|All Files"); +} + +void HTMLExporter::reset() { + // since the ExportUTF8 option may have changed, need to delete handler + delete m_handler; + m_handler = 0; + m_files.clear(); + m_links.clear(); + m_copiedFiles.clear(); +} + +bool HTMLExporter::exec() { + if(url().isEmpty() || !url().isValid()) { + kdWarning() << "HTMLExporter::exec() - trying to export to invalid URL" << endl; + return false; + } + + // check file exists first + // if we're not forcing, ask use + bool force = (options() & Export::ExportForce) || FileHandler::queryExists(url()); + if(!force) { + return false; + } + + if(!m_parseDOM) { + return FileHandler::writeTextURL(url(), text(), options() & Export::ExportUTF8, force); + } + + m_cancelled = false; + // TODO: maybe need label? + if(options() & ExportProgress) { + ProgressItem& item = ProgressManager::self()->newProgressItem(this, QString::null, true); + item.setTotalSteps(100); + connect(&item, SIGNAL(signalCancelled(ProgressItem*)), SLOT(slotCancel())); + } + // ok if not ExportProgress, no worries + ProgressItem::Done done(this); + + htmlDocPtr htmlDoc = htmlParseDoc(reinterpret_cast<xmlChar*>(text().utf8().data()), NULL); + xmlNodePtr root = xmlDocGetRootElement(htmlDoc); + if(root == 0) { + myDebug() << "HTMLExporter::exec() - no root" << endl; + return false; + } + parseDOM(root); + + if(m_cancelled) { + return true; // intentionally cancelled + } + ProgressManager::self()->setProgress(this, 15); + + xmlChar* c; + int bytes; + htmlDocDumpMemory(htmlDoc, &c, &bytes); + QString allText; + if(bytes > 0) { + allText = QString::fromUtf8(reinterpret_cast<const char*>(c), bytes); + xmlFree(c); + } + + if(m_cancelled) { + return true; // intentionally cancelled + } + ProgressManager::self()->setProgress(this, 20); + + bool success = FileHandler::writeTextURL(url(), allText, options() & Export::ExportUTF8, force); + success &= copyFiles() && (!m_exportEntryFiles || writeEntryFiles()); + return success; +} + +bool HTMLExporter::loadXSLTFile() { + QString xsltfile = locate("appdata", m_xsltFile); + if(xsltfile.isNull()) { + myDebug() << "HTMLExporter::loadXSLTFile() - no xslt file for " << m_xsltFile << endl; + return false; + } + + KURL u; + u.setPath(xsltfile); + // do NOT do namespace processing, it messes up the XSL declaration since + // QDom thinks there are no elements in the Tellico namespace and as a result + // removes the namespace declaration + QDomDocument dom = FileHandler::readXMLFile(u, false); + if(dom.isNull()) { + myDebug() << "HTMLExporter::loadXSLTFile() - error loading xslt file: " << xsltfile << endl; + return false; + } + + // notes about utf-8 encoding: + // all params should be passed to XSLTHandler in utf8 + // input string to XSLTHandler should be in utf-8, EVEN IF DOM STRING SAYS OTHERWISE + + // the stylesheet prints utf-8 by default, if using locale encoding, need + // to change the encoding attribute on the xsl:output element + if(!(options() & Export::ExportUTF8)) { + XSLTHandler::setLocaleEncoding(dom); + } + + delete m_handler; + m_handler = new XSLTHandler(dom, QFile::encodeName(xsltfile), true /*translate*/); + if(!m_handler->isValid()) { + delete m_handler; + m_handler = 0; + return false; + } + + if(m_exportEntryFiles) { + // export entries to same place as all the other date files + m_handler->addStringParam("entrydir", QFile::encodeName(fileDir().fileName())+ '/'); + // be sure to link all the entries + m_handler->addParam("link-entries", "true()"); + } + + if(!m_collectionURL.isEmpty()) { + QString s = QString::fromLatin1("../") + m_collectionURL.fileName(); + m_handler->addStringParam("collection-file", s.utf8()); + } + + // look for a file that gets installed to know the installation directory + // if parseDOM, that means we want the locations to be the actual location + // otherwise, we assume it'll be relative + if(m_parseDOM && m_dataDir.isEmpty()) { + m_dataDir = KGlobal::dirs()->findResourceDir("appdata", QString::fromLatin1("pics/tellico.png")); + } else if(!m_parseDOM) { + m_dataDir.truncate(0); + } + if(!m_dataDir.isEmpty()) { + m_handler->addStringParam("datadir", QFile::encodeName(m_dataDir)); + } + + setFormattingOptions(collection()); + + return m_handler->isValid(); +} + +QString HTMLExporter::text() { + if((!m_handler || !m_handler->isValid()) && !loadXSLTFile()) { + kdWarning() << "HTMLExporter::text() - error loading xslt file: " << m_xsltFile << endl; + return QString::null; + } + + Data::CollPtr coll = collection(); + if(!coll) { + myDebug() << "HTMLExporter::text() - no collection pointer!" << endl; + return QString::null; + } + + if(m_groupBy.isEmpty()) { + m_printGrouped = false; // can't group if no groups exist + } + + GUI::CursorSaver cs; + writeImages(coll); + + // now grab the XML + TellicoXMLExporter exporter(coll); + exporter.setURL(url()); + exporter.setEntries(entries()); + exporter.setIncludeGroups(m_printGrouped); +// yes, this should be in utf8, always + exporter.setOptions(options() | Export::ExportUTF8 | Export::ExportImages); + QDomDocument output = exporter.exportXML(); +#if 0 + QFile f(QString::fromLatin1("/tmp/test.xml")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t << output.toString(); + } + f.close(); +#endif + + QString text = m_handler->applyStylesheet(output.toString()); +#if 0 + QFile f2(QString::fromLatin1("/tmp/test.html")); + if(f2.open(IO_WriteOnly)) { + QTextStream t(&f2); + t << text; +// t << "\n\n-------------------------------------------------------\n\n"; +// t << Tellico::i18nReplace(text); + } + f2.close(); +#endif + // the XSLT file gets translated instead +// return Tellico::i18nReplace(text); + return text; +} + +void HTMLExporter::setFormattingOptions(Data::CollPtr coll) { + QString file = Kernel::self()->URL().fileName(); + if(file != i18n("Untitled")) { + m_handler->addStringParam("filename", QFile::encodeName(file)); + } + m_handler->addStringParam("cdate", KGlobal::locale()->formatDate(QDate::currentDate()).utf8()); + m_handler->addParam("show-headers", m_printHeaders ? "true()" : "false()"); + m_handler->addParam("group-entries", m_printGrouped ? "true()" : "false()"); + + QStringList sortTitles; + if(!m_sort1.isEmpty()) { + sortTitles << m_sort1; + } + if(!m_sort2.isEmpty()) { + sortTitles << m_sort2; + } + + // the third sort column may be same as first + if(!m_sort3.isEmpty() && sortTitles.findIndex(m_sort3) == -1) { + sortTitles << m_sort3; + } + + if(sortTitles.count() > 0) { + m_handler->addStringParam("sort-name1", coll->fieldNameByTitle(sortTitles[0]).utf8()); + if(sortTitles.count() > 1) { + m_handler->addStringParam("sort-name2", coll->fieldNameByTitle(sortTitles[1]).utf8()); + if(sortTitles.count() > 2) { + m_handler->addStringParam("sort-name3", coll->fieldNameByTitle(sortTitles[2]).utf8()); + } + } + } + + // no longer showing "sorted by..." since the column headers are clickable + // but still use "grouped by" + QString sortString; + if(m_printGrouped) { + QString s; + // if more than one, then it's the People pseudo-group + if(m_groupBy.count() > 1) { + s = i18n("People"); + } else { + s = coll->fieldTitleByName(m_groupBy[0]); + } + sortString = i18n("(grouped by %1)").arg(s); + + QString groupFields; + for(QStringList::ConstIterator it = m_groupBy.begin(); it != m_groupBy.end(); ++it) { + Data::FieldPtr f = coll->fieldByName(*it); + if(!f) { + continue; + } + if(f->flags() & Data::Field::AllowMultiple) { + groupFields += QString::fromLatin1("tc:") + *it + QString::fromLatin1("s/tc:") + *it; + } else { + groupFields += QString::fromLatin1("tc:") + *it; + } + int ncols = 0; + if(f->type() == Data::Field::Table) { + bool ok; + ncols = Tellico::toUInt(f->property(QString::fromLatin1("columns")), &ok); + if(!ok) { + ncols = 1; + } + } + if(ncols > 1) { + groupFields += QString::fromLatin1("/tc:column[1]"); + } + if(*it != m_groupBy.last()) { + groupFields += '|'; + } + } +// myDebug() << groupFields << endl; + m_handler->addStringParam("group-fields", groupFields.utf8()); + m_handler->addStringParam("sort-title", sortString.utf8()); + } + + QString pageTitle = coll->title(); + pageTitle += QChar(' ') + sortString; + m_handler->addStringParam("page-title", pageTitle.utf8()); + + QStringList showFields; + for(QStringList::ConstIterator it = m_columns.begin(); it != m_columns.end(); ++it) { + showFields << coll->fieldNameByTitle(*it); + } + m_handler->addStringParam("column-names", showFields.join(QChar(' ')).utf8()); + + if(m_imageWidth > 0 && m_imageHeight > 0) { + m_handler->addParam("image-width", QCString().setNum(m_imageWidth)); + m_handler->addParam("image-height", QCString().setNum(m_imageHeight)); + } + + // add system colors to stylesheet + const int type = coll->type(); + m_handler->addStringParam("font", Config::templateFont(type).family().latin1()); + m_handler->addStringParam("fontsize", QCString().setNum(Config::templateFont(type).pointSize())); + m_handler->addStringParam("bgcolor", Config::templateBaseColor(type).name().latin1()); + m_handler->addStringParam("fgcolor", Config::templateTextColor(type).name().latin1()); + m_handler->addStringParam("color1", Config::templateHighlightedTextColor(type).name().latin1()); + m_handler->addStringParam("color2", Config::templateHighlightedBaseColor(type).name().latin1()); + + // add locale code to stylesheet (for sorting) + m_handler->addStringParam("lang", KGlobal::locale()->languagesTwoAlpha().first().utf8()); +} + +void HTMLExporter::writeImages(Data::CollPtr coll_) { + // keep track of which image fields to write, this is for field names + StringSet imageFields; + for(QStringList::ConstIterator it = m_columns.begin(); it != m_columns.end(); ++it) { + if(coll_->fieldByTitle(*it)->type() == Data::Field::Image) { + imageFields.add(*it); + } + } + + // all the images potentially used in the HTML export need to be written to disk + // if we're exporting entry files, then we'll certainly want all the image fields written + // if we're not exporting to a file, then we might be exporting an entry template file + // and so we need to write all of them too. + if(m_exportEntryFiles || url().isEmpty()) { + // add all image fields to string list + Data::FieldVec fields = coll_->imageFields(); + for(Data::FieldVec::Iterator fieldIt = fields.begin(); fieldIt != fields.end(); ++fieldIt) { + imageFields.add(fieldIt->name()); + } + } + + // all of them are going to get written to tmp file + bool useTemp = url().isEmpty(); + KURL imgDir; + QString imgDirRelative; + // really some convoluted logic here + // basically, four cases. 1) we're writing to a tmp file, for printing probably + // so then write all the images to the tmp directory, 2) we're exporting to HTML, and + // this is the main collection file, in which case m_parseDOM is always true; + // 3) we're exporting HTML, and this is the first entry file, for which parseDOM is true + // and exportEntryFiles is false. Then the image file will get copied in copyFiles() and is + // probably an image in the entry template. 4) we're exporting HTML, and this is not the + // first entry file, in which case, we want to refer directly to the target dir + if(useTemp) { // everything goes in the tmp dir + imgDir.setPath(ImageFactory::tempDir()); + imgDirRelative = imgDir.path(); + } else if(m_parseDOM) { + imgDir = fileDir(); // copy to fileDir + imgDirRelative = Data::Document::self()->allImagesOnDisk() ? ImageFactory::dataDir() : ImageFactory::tempDir(); + createDir(); + } else { + imgDir = fileDir(); + imgDirRelative = KURL::relativeURL(url(), imgDir); + createDir(); + } + m_handler->addStringParam("imgdir", QFile::encodeName(imgDirRelative)); + + int count = 0; + const int processCount = 100; // process after every 100 events + + QStringList fieldsList = imageFields.toList(); + StringSet imageSet; // track which images are written + for(QStringList::ConstIterator fieldName = fieldsList.begin(); fieldName != fieldsList.end(); ++fieldName) { + for(Data::EntryVec::ConstIterator entryIt = entries().begin(); entryIt != entries().end(); ++entryIt) { + QString id = entryIt->field(*fieldName); + // if no id or is already writen, continue + if(id.isEmpty() || imageSet.has(id)) { + continue; + } + imageSet.add(id); + // try writing + bool success = useTemp ? ImageFactory::writeCachedImage(id, ImageFactory::TempDir) + : ImageFactory::writeImage(id, imgDir, true); + if(!success) { + kdWarning() << "HTMLExporter::writeImages() - unable to write image file: " + << imgDir.path() << id << endl; + } + + if(++count == processCount) { + kapp->processEvents(); + count = 0; + } + } + } +} + +QWidget* HTMLExporter::widget(QWidget* parent_, const char* name_/*=0*/) { + if(m_widget && m_widget->parent() == parent_) { + return m_widget; + } + + m_widget = new QWidget(parent_, name_); + QVBoxLayout* l = new QVBoxLayout(m_widget); + + QGroupBox* box = new QGroupBox(1, Qt::Horizontal, i18n("HTML Options"), m_widget); + l->addWidget(box); + + m_checkPrintHeaders = new QCheckBox(i18n("Print field headers"), box); + QWhatsThis::add(m_checkPrintHeaders, i18n("If checked, the field names will be " + "printed as table headers.")); + m_checkPrintHeaders->setChecked(m_printHeaders); + + m_checkPrintGrouped = new QCheckBox(i18n("Group the entries"), box); + QWhatsThis::add(m_checkPrintGrouped, i18n("If checked, the entries will be grouped by " + "the selected field.")); + m_checkPrintGrouped->setChecked(m_printGrouped); + + m_checkExportEntryFiles = new QCheckBox(i18n("Export individual entry files"), box); + QWhatsThis::add(m_checkExportEntryFiles, i18n("If checked, individual files will be created for each entry.")); + m_checkExportEntryFiles->setChecked(m_exportEntryFiles); + + l->addStretch(1); + return m_widget; +} + +void HTMLExporter::readOptions(KConfig* config_) { + KConfigGroup exportConfig(config_, QString::fromLatin1("ExportOptions - %1").arg(formatString())); + m_printHeaders = exportConfig.readBoolEntry("Print Field Headers", m_printHeaders); + m_printGrouped = exportConfig.readBoolEntry("Print Grouped", m_printGrouped); + m_exportEntryFiles = exportConfig.readBoolEntry("Export Entry Files", m_exportEntryFiles); + + // read current entry export template + m_entryXSLTFile = Config::templateName(collection()->type()); + m_entryXSLTFile = locate("appdata", QString::fromLatin1("entry-templates/") + + m_entryXSLTFile + QString::fromLatin1(".xsl")); +} + +void HTMLExporter::saveOptions(KConfig* config_) { + KConfigGroup cfg(config_, QString::fromLatin1("ExportOptions - %1").arg(formatString())); + m_printHeaders = m_checkPrintHeaders->isChecked(); + cfg.writeEntry("Print Field Headers", m_printHeaders); + m_printGrouped = m_checkPrintGrouped->isChecked(); + cfg.writeEntry("Print Grouped", m_printGrouped); + m_exportEntryFiles = m_checkExportEntryFiles->isChecked(); + cfg.writeEntry("Export Entry Files", m_exportEntryFiles); +} + +void HTMLExporter::setXSLTFile(const QString& filename_) { + if(m_xsltFile == filename_) { + return; + } + + m_xsltFile = filename_; + m_xsltFilePath = QString::null; + reset(); +} + +KURL HTMLExporter::fileDir() const { + if(url().isEmpty()) { + return KURL(); + } + KURL fileDir = url(); + // cd to directory of target URL + fileDir.cd(QString::fromLatin1("..")); + fileDir.addPath(fileDirName()); + return fileDir; +} + +QString HTMLExporter::fileDirName() const { + if(!m_collectionURL.isEmpty()) { + return QString::fromLatin1("/"); + } + return url().fileName().section('.', 0, 0) + QString::fromLatin1("_files/"); +} + +// how ugly is this? +const xmlChar* HTMLExporter::handleLink(const xmlChar* link_) { + return reinterpret_cast<xmlChar*>(qstrdup(handleLink(QString::fromUtf8(reinterpret_cast<const char*>(link_))).utf8())); +} + +QString HTMLExporter::handleLink(const QString& link_) { + if(m_links.contains(link_)) { + return m_links[link_]; + } + // assume that if the link_ is not relative, then we don't need to copy it + if(!KURL::isRelativeURL(link_)) { + return link_; + } + + if(m_xsltFilePath.isEmpty()) { + m_xsltFilePath = locate("appdata", m_xsltFile); + if(m_xsltFilePath.isNull()) { + kdWarning() << "HTMLExporter::handleLink() - no xslt file for " << m_xsltFile << endl; + } + } + + KURL u; + u.setPath(m_xsltFilePath); + u = KURL(u, link_); + + // one of the "quirks" of the html export is that img src urls are set to point to + // the tmpDir() when exporting entry files from a collection, but those images + // don't actually exist, and they get copied in writeImages() instead. + // so we only need to keep track of the url if it exists + const bool exists = KIO::NetAccess::exists(u, false, 0); + if(exists) { + m_files.append(u); + } + + // if we're exporting entry files, we want pics/ to + // go in pics/ + const bool isPic = link_.startsWith(m_dataDir + QString::fromLatin1("pics/")); + QString midDir; + if(m_exportEntryFiles && isPic) { + midDir = QString::fromLatin1("pics/"); + } + // pictures are special since they might not exist when the HTML is exported, since they might get copied later + // on the other hand, don't change the file location if it doesn't exist + if(isPic || exists) { + m_links.insert(link_, fileDirName() + midDir + u.fileName()); + } else { + m_links.insert(link_, link_); + } + return m_links[link_]; +} + +const xmlChar* HTMLExporter::analyzeInternalCSS(const xmlChar* str_) { + return reinterpret_cast<xmlChar*>(qstrdup(analyzeInternalCSS(QString::fromUtf8(reinterpret_cast<const char*>(str_))).utf8())); +} + +QString HTMLExporter::analyzeInternalCSS(const QString& str_) { + QString str = str_; + int start = 0; + int end = 0; + const QString url = QString::fromLatin1("url("); + for(int pos = str.find(url); pos >= 0; pos = str.find(url, pos+1)) { + pos += 4; // url( + if(str[pos] == '"' || str[pos] == '\'') { + ++pos; + } + + start = pos; + pos = str.find(')', start); + end = pos; + if(str[pos-1] == '"' || str[pos-1] == '\'') { + --end; + } + + str.replace(start, end-start, handleLink(str.mid(start, end-start))); + } + return str; +} + +void HTMLExporter::createDir() { + if(!m_checkCreateDir) { + return; + } + KURL dir = fileDir(); + if(dir.isEmpty()) { + myDebug() << "HTMLExporter::createDir() - called on empty URL!" << endl; + return; + } + if(KIO::NetAccess::exists(dir, false, 0)) { + m_checkCreateDir = false; + } else { + m_checkCreateDir = !KIO::NetAccess::mkdir(dir, m_widget); + } +} + +bool HTMLExporter::copyFiles() { + if(m_files.isEmpty()) { + return true; + } + const uint start = 20; + const uint maxProgress = m_exportEntryFiles ? 40 : 80; + const uint stepSize = QMAX(1, m_files.count()/maxProgress); + uint j = 0; + + createDir(); + KURL target; + for(KURL::List::ConstIterator it = m_files.begin(); it != m_files.end() && !m_cancelled; ++it, ++j) { + if(m_copiedFiles.has((*it).url())) { + continue; + } + + if(target.isEmpty()) { + target = fileDir(); + } + target.setFileName((*it).fileName()); + bool success = KIO::NetAccess::file_copy(*it, target, -1, true /* overwrite */, false /* resume */, m_widget); + if(success) { + m_copiedFiles.add((*it).url()); + } else { + kdWarning() << "HTMLExporter::copyFiles() - can't copy " << target << endl; + kdWarning() << KIO::NetAccess::lastErrorString() << endl; + } + if(j%stepSize == 0) { + if(options() & ExportProgress) { + ProgressManager::self()->setProgress(this, QMIN(start+j/stepSize, 99)); + } + kapp->processEvents(); + } + } + return true; +} + +bool HTMLExporter::writeEntryFiles() { + if(m_entryXSLTFile.isEmpty()) { + kdWarning() << "HTMLExporter::writeEntryFiles() - no entry XSLT file" << endl; + return false; + } + + const uint start = 60; + const uint stepSize = QMAX(1, entries().count()/40); + uint j = 0; + + // now worry about actually exporting entry files + // I can't reliable encode a string as a URI, so I'm punting, and I'll just replace everything but + // a-zA-Z0-9 with an underscore. This MUST match the filename template in tellico2html.xsl + // the id is used so uniqueness is guaranteed + const QRegExp badChars(QString::fromLatin1("[^-a-zA-Z0-9]")); + bool formatted = options() & Export::ExportFormatted; + + KURL outputFile = fileDir(); + + GUI::CursorSaver cs(Qt::waitCursor); + + HTMLExporter exporter(collection()); + long opt = options() | Export::ExportForce; + opt &= ~ExportProgress; + exporter.setOptions(opt); + exporter.setXSLTFile(m_entryXSLTFile); + exporter.setCollectionURL(url()); + bool parseDOM = true; + + const QString title = QString::fromLatin1("title"); + const QString html = QString::fromLatin1(".html"); + bool multipleTitles = collection()->fieldByName(title)->flags() & Data::Field::AllowMultiple; + Data::EntryVec entries = this->entries(); // not const since the pointer has to be copied + for(Data::EntryVecIt entryIt = entries.begin(); entryIt != entries.end() && !m_cancelled; ++entryIt, ++j) { + QString file = entryIt->field(title, formatted); + + // but only use the first title if it has multiple + if(multipleTitles) { + file = file.section(';', 0, 0); + } + file.replace(badChars, QChar('_')); + file += QChar('-') + QString::number(entryIt->id()) + html; + outputFile.setFileName(file); + + exporter.setEntries(Data::EntryVec(entryIt)); + exporter.setURL(outputFile); + exporter.exec(); + + // no longer need to parse DOM + if(parseDOM) { + parseDOM = false; + exporter.setParseDOM(false); + // this is rather stupid, but I'm too lazy to figure out the better way + // since we parsed the DOM for the first entry file to grab any + // images used in the template, need to resave it so the image links + // get written correctly + exporter.exec(); + } + + if(j%stepSize == 0) { + if(options() & ExportProgress) { + ProgressManager::self()->setProgress(this, QMIN(start+j/stepSize, 99)); + } + kapp->processEvents(); + } + } + // the images in "pics/" are special data images, copy them always + // since the entry files may refer to them, but we don't know that + QStringList dataImages; + dataImages << QString::fromLatin1("checkmark.png"); + for(uint i = 1; i <= 10; ++i) { + dataImages << QString::fromLatin1("stars%1.png").arg(i); + } + KURL dataDir; + dataDir.setPath(KGlobal::dirs()->findResourceDir("appdata", QString::fromLatin1("pics/tellico.png")) + "pics/"); + KURL target = fileDir(); + target.addPath(QString::fromLatin1("pics/")); + KIO::NetAccess::mkdir(target, m_widget); + for(QStringList::ConstIterator it = dataImages.begin(); it != dataImages.end(); ++it) { + dataDir.setFileName(*it); + target.setFileName(*it); + KIO::NetAccess::copy(dataDir, target, m_widget); + } + + return true; +} + +void HTMLExporter::slotCancel() { + m_cancelled = true; +} + +void HTMLExporter::parseDOM(xmlNode* node_) { + if(node_ == 0) { + myDebug() << "HTMLExporter::parseDOM() - no node" << endl; + return; + } + + bool parseChildren = true; + + if(node_->type == XML_ELEMENT_NODE) { + const QCString nodeName = QCString(reinterpret_cast<const char*>(node_->name)).upper(); + xmlElement* elem = reinterpret_cast<xmlElement*>(node_); + // to speed up things, check now for nodename + if(nodeName == "IMG" || nodeName == "SCRIPT" || nodeName == "LINK") { + for(xmlAttribute* attr = elem->attributes; attr; attr = reinterpret_cast<xmlAttribute*>(attr->next)) { + QCString attrName = QCString(reinterpret_cast<const char*>(attr->name)).upper(); + + if( (attrName == "SRC" && (nodeName == "IMG" || nodeName == "SCRIPT")) || + (attrName == "HREF" && nodeName == "LINK")) { +/* (attrName == "BACKGROUND" && (nodeName == "BODY" || + nodeName == "TABLE" || + nodeName == "TH" || + nodeName == "TD"))) */ + xmlChar* value = xmlGetProp(node_, attr->name); + if(value) { + xmlSetProp(node_, attr->name, handleLink(value)); + xmlFree(value); + } + // each node only has one significant attribute, so break now + break; + } + } + } else if(nodeName == "STYLE") { + // if the first child is a CDATA, use it, otherwise replace complete node + xmlNode* nodeToReplace = node_; + xmlNode* child = node_->children; + if(child && child->type == XML_CDATA_SECTION_NODE) { + nodeToReplace = child; + } + xmlChar* value = xmlNodeGetContent(nodeToReplace); + if(value) { + xmlNodeSetContent(nodeToReplace, analyzeInternalCSS(value)); + xmlFree(value); + } + // no longer need to parse child text nodes + parseChildren = false; + } + } + + if(parseChildren) { + xmlNode* child = node_->children; + while(child) { + parseDOM(child); + child = child->next; + } + } +} + +#include "htmlexporter.moc" diff --git a/src/translators/htmlexporter.h b/src/translators/htmlexporter.h new file mode 100644 index 0000000..be89bbf --- /dev/null +++ b/src/translators/htmlexporter.h @@ -0,0 +1,124 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef HTMLEXPORTER_H +#define HTMLEXPORTER_H + +class QCheckBox; + +#include "exporter.h" +#include "../stringset.h" + +#include <qstringlist.h> + +#include <libxml/xmlstring.h> + +extern "C" { + struct _xmlNode; +} + +namespace Tellico { + namespace Data { + class Collection; + } + class XSLTHandler; + + namespace Export { + +/** + * @author Robby Stephenson + */ +class HTMLExporter : public Exporter { +Q_OBJECT + +public: + HTMLExporter(); + HTMLExporter(Data::CollPtr coll); + ~HTMLExporter(); + + virtual bool exec(); + virtual void reset(); + virtual QString formatString() const; + virtual QString fileFilter() const; + + virtual QWidget* widget(QWidget* parent, const char* name=0); + virtual void readOptions(KConfig*); + virtual void saveOptions(KConfig*); + + void setCollectionURL(const KURL& url) { m_collectionURL = url; m_links.clear(); } + void setXSLTFile(const QString& filename); + void setPrintHeaders(bool printHeaders) { m_printHeaders = printHeaders; } + void setPrintGrouped(bool printGrouped) { m_printGrouped = printGrouped; } + void setMaxImageSize(int w, int h) { m_imageWidth = w; m_imageHeight = h; } + void setGroupBy(const QStringList& groupBy) { m_groupBy = groupBy; } + void setSortTitles(const QStringList& l) + { m_sort1 = l[0]; m_sort2 = l[1]; m_sort3 = l[2]; } + void setColumns(const QStringList& columns) { m_columns = columns; } + void setParseDOM(bool parseDOM) { m_parseDOM = parseDOM; reset(); } + + QString text(); + +public slots: + void slotCancel(); + +private: + void setFormattingOptions(Data::CollPtr coll); + void writeImages(Data::CollPtr coll); + bool writeEntryFiles(); + KURL fileDir() const; + QString fileDirName() const; + + void parseDOM(_xmlNode* node); + QString handleLink(const QString& link); + const xmlChar* handleLink(const xmlChar* link); + QString analyzeInternalCSS(const QString& string); + const xmlChar* analyzeInternalCSS(const xmlChar* string); + bool copyFiles(); + bool loadXSLTFile(); + void createDir(); + + XSLTHandler* m_handler; + bool m_printHeaders : 1; + bool m_printGrouped : 1; + bool m_exportEntryFiles : 1; + bool m_cancelled : 1; + bool m_parseDOM : 1; + bool m_checkCreateDir : 1; + int m_imageWidth; + int m_imageHeight; + + QWidget* m_widget; + QCheckBox* m_checkPrintHeaders; + QCheckBox* m_checkPrintGrouped; + QCheckBox* m_checkExportEntryFiles; + QCheckBox* m_checkExportImages; + + KURL m_collectionURL; + QString m_xsltFile; + QString m_xsltFilePath; + QString m_dataDir; + QStringList m_groupBy; + QString m_sort1; + QString m_sort2; + QString m_sort3; + QStringList m_columns; + QString m_entryXSLTFile; + + KURL::List m_files; + QMap<QString, QString> m_links; + StringSet m_copiedFiles; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/importer.h b/src/translators/importer.h new file mode 100644 index 0000000..4df5ccb --- /dev/null +++ b/src/translators/importer.h @@ -0,0 +1,137 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef IMPORTER_H +#define IMPORTER_H + +class QWidget; + +#include "../datavectors.h" + +#include <klocale.h> +#include <kurl.h> + +#include <qobject.h> +#include <qstring.h> + +namespace Tellico { + namespace Import { + enum Options { + ImportProgress = 1 << 5 // show progress bar + }; + +/** + * The top-level abstract class for importing other document formats into Tellico. + * + * The Importer classes import a file, and return a pointer to a newly created + * @ref Data::Collection. Any errors or warnings are added to a status message queue. + * The calling function owns the collection pointer. + * + * @author Robby Stephenson + */ +class Importer : public QObject { +Q_OBJECT + +public: + Importer() : QObject(), m_options(ImportProgress) {} + /** + * The constructor should immediately load the contents of the file to be imported. + * Any warnings or errors should be added the the status message queue. + * + * @param url The URL of the file to import + */ + Importer(const KURL& url) : QObject(), m_options(ImportProgress), m_urls(url) {} + Importer(const KURL::List& urls) : QObject(), m_options(ImportProgress), m_urls(urls) {} + Importer(const QString& text) : QObject(), m_options(ImportProgress), m_text(text) {} + /** + */ + virtual ~Importer() {} + + /** + * Returns a pointer to a @ref Data::Collection containing the contents of the imported file. + * This function should probably only be called once, but the subclasses may cache the + * collection. The collection should not be created until this function is called. + * + * @return A pointer to a @ref Collection created on the stack, or 0 if none could be created. + */ + virtual Data::CollPtr collection() = 0; + /** + * Returns a string containing all the messages added to the queue in the course of loading + * and importing the file. + * + * @return The status message + */ + const QString& statusMessage() const { return m_statusMsg; } + /** + * Returns a widget with the setting specific to this importer, or 0 if no + * options are needed. + * + * @return A pointer to the setting widget + */ + virtual QWidget* widget(QWidget*, const char*) { return 0; } + /** + * Checks to see if the importer can return a collection of this type + * + * @param type The collection type to check + * @return Whether the importer could return a collection of that type + */ + virtual bool canImport(int) const { return true; } + /** + * Validate the import settings + */ + virtual bool validImport() const { return true; } + virtual void setText(const QString& text) { m_text = text; } + long options() const { return m_options; } + void setOptions(long options) { m_options = options; } + /** + * Returns a string useful for the ProgressManager + */ + QString progressLabel() const { + if(url().isEmpty()) return i18n("Loading data..."); else return i18n("Loading %1...").arg(url().fileName()); + } + +public slots: + /** + * The import action was changed in the import dialog + */ + virtual void slotActionChanged(int) {} + +protected: + /** + * Return the URL of the imported file. + * + * @return the file URL + */ + KURL url() const { return m_urls.isEmpty() ? KURL() : m_urls[0]; } + KURL::List urls() const { return m_urls; } + QString text() const { return m_text; } + /** + * Adds a message to the status queue. + * + * @param msg A string containing a warning or error. + */ + void setStatusMessage(const QString& msg) { if(!msg.isEmpty()) m_statusMsg += msg + QChar(' '); } + + static const uint s_stepSize; + +private: + long m_options; + KURL::List m_urls; + QString m_text; + QString m_statusMsg; +}; + + } // end namespace +} // end namespace + +#endif diff --git a/src/translators/libcsv.c b/src/translators/libcsv.c new file mode 100644 index 0000000..4e53f63 --- /dev/null +++ b/src/translators/libcsv.c @@ -0,0 +1,490 @@ +/* +libcsv - parse and write csv data +Copyright (C) 2007 Robert Gamble + + available at http://libcsv.sf.net + + Original available under the terms of the GNU LGPL2, and according + to those terms, relicensed under the GNU GPL2 for inclusion in Tellico */ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#if ___STDC_VERSION__ >= 199901L +# include <stdint.h> +#else +# define SIZE_MAX ((size_t)-1) /* C89 doesn't have stdint.h or SIZE_MAX */ +#endif + +#include "libcsv.h" + +#define VERSION "2.0.0" + +#define ROW_NOT_BEGUN 0 +#define FIELD_NOT_BEGUN 1 +#define FIELD_BEGUN 2 +#define FIELD_MIGHT_HAVE_ENDED 3 + +/* + Explanation of states + ROW_NOT_BEGUN There have not been any fields encountered for this row + FIELD_NOT_BEGUN There have been fields but we are currently not in one + FIELD_BEGUN We are in a field + FIELD_MIGHT_HAVE_ENDED + We encountered a double quote inside a quoted field, the + field is either ended or the quote is literal +*/ + +#define MEM_BLK_SIZE 128 + +#define SUBMIT_FIELD(p) \ + do { \ + if (!quoted) \ + entry_pos -= spaces; \ + if (cb1) \ + cb1(p->entry_buf, entry_pos, data); \ + pstate = FIELD_NOT_BEGUN; \ + entry_pos = quoted = spaces = 0; \ + } while (0) + +#define SUBMIT_ROW(p, c) \ + do { \ + if (cb2) \ + cb2(c, data); \ + pstate = ROW_NOT_BEGUN; \ + entry_pos = quoted = spaces = 0; \ + } while (0) + +#define SUBMIT_CHAR(p, c) ((p)->entry_buf[entry_pos++] = (c)) + +static char *csv_errors[] = {"success", + "error parsing data while strict checking enabled", + "memory exhausted while increasing buffer size", + "data size too large", + "invalid status code"}; + +int +csv_error(struct csv_parser *p) +{ + return p->status; +} + +char * +csv_strerror(int status) +{ + if (status >= CSV_EINVALID || status < 0) + return csv_errors[CSV_EINVALID]; + else + return csv_errors[status]; +} + +int +csv_opts(struct csv_parser *p, unsigned char options) +{ + if (p == NULL) + return -1; + + p->options = options; + return 0; +} + +int +csv_init(struct csv_parser **p, unsigned char options) +{ + /* Initialize a csv_parser object returns 0 on success, -1 on error */ + if (p == NULL) + return -1; + + if ((*p = malloc(sizeof(struct csv_parser))) == NULL) + return -1; + + if ( ((*p)->entry_buf = malloc(MEM_BLK_SIZE)) == NULL ) { + free(*p); + return -1; + } + (*p)->pstate = ROW_NOT_BEGUN; + (*p)->quoted = 0; + (*p)->spaces = 0; + (*p)->entry_pos = 0; + (*p)->entry_size = MEM_BLK_SIZE; + (*p)->status = 0; + (*p)->options = options; + (*p)->quote_char = CSV_QUOTE; + (*p)->delim_char = CSV_COMMA; + (*p)->is_space = NULL; + (*p)->is_term = NULL; + + return 0; +} + +void +csv_free(struct csv_parser *p) +{ + /* Free the entry_buffer and the csv_parser object */ + if (p == NULL) + return; + + if (p->entry_buf) + free(p->entry_buf); + + free(p); + return; +} + +int +csv_fini(struct csv_parser *p, void (*cb1)(char *, size_t, void *), void (*cb2)(char c, void *), void *data) +{ + /* Finalize parsing. Needed, for example, when file does not end in a newline */ + int quoted = p->quoted; + int pstate = p->pstate; + size_t spaces = p->spaces; + size_t entry_pos = p->entry_pos; + + if (p == NULL) + return -1; + + + if (p->pstate == FIELD_BEGUN && p->quoted && p->options & CSV_STRICT && p->options & CSV_STRICT_FINI) { + p->status = CSV_EPARSE; + return -1; + } + + switch (p->pstate) { + case FIELD_MIGHT_HAVE_ENDED: + p->entry_pos -= p->spaces + 1; /* get rid of spaces and original quote */ + case FIELD_NOT_BEGUN: + case FIELD_BEGUN: + quoted = p->quoted, pstate = p->pstate; + spaces = p->spaces, entry_pos = p->entry_pos; + SUBMIT_FIELD(p); + SUBMIT_ROW(p, 0); + case ROW_NOT_BEGUN: /* Already ended properly */ + ; + } + + p->spaces = p->quoted = p->entry_pos = p->status = 0; + p->pstate = ROW_NOT_BEGUN; + + return 0; +} + +void +csv_set_delim(struct csv_parser *p, char c) +{ + if (p) p->delim_char = c; +} + +void +csv_set_quote(struct csv_parser *p, char c) +{ + if (p) p->quote_char = c; +} + +char +csv_get_delim(struct csv_parser *p) +{ + return p->delim_char; +} + +char +csv_get_quote(struct csv_parser *p) +{ + return p->quote_char; +} + +void +csv_set_space_func(struct csv_parser *p, int (*f)(char)) +{ + if (p) p->is_space = f; +} + +void +csv_set_term_func(struct csv_parser *p, int (*f)(char)) +{ + if (p) p->is_term = f; +} + +static int +csv_increase_buffer(struct csv_parser *p) +{ + size_t to_add = MEM_BLK_SIZE; + void *vp; + while ( p->entry_size >= SIZE_MAX - to_add ) + to_add /= 2; + if (!to_add) { + p->status = CSV_ETOOBIG; + return -1; + } + while ((vp = realloc(p->entry_buf, p->entry_size + to_add)) == NULL) { + to_add /= 2; + if (!to_add) { + p->status = CSV_ENOMEM; + return -1; + } + } + p->entry_buf = vp; + p->entry_size += to_add; + return 0; +} + +size_t +csv_parse(struct csv_parser *p, const char *s, size_t len, void (*cb1)(char *, size_t, void *), void (*cb2)(char c, void *), void *data) +{ + char c; /* The character we are currently processing */ + size_t pos = 0; /* The number of characters we have processed in this call */ + char delim = p->delim_char; + char quote = p->quote_char; + int (*is_space)(char) = p->is_space; + int (*is_term)(char) = p->is_term; + int quoted = p->quoted; + int pstate = p->pstate; + size_t spaces = p->spaces; + size_t entry_pos = p->entry_pos; + + while (pos < len) { + /* Check memory usage */ + if (entry_pos == p->entry_size) + if (csv_increase_buffer(p) != 0) { + p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; + return pos; + } + + c = s[pos++]; + switch (pstate) { + case ROW_NOT_BEGUN: + case FIELD_NOT_BEGUN: + if (is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) { /* Space or Tab */ + continue; + } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ + if (pstate == FIELD_NOT_BEGUN) { + SUBMIT_FIELD(p); + SUBMIT_ROW(p, c); + } else { /* ROW_NOT_BEGUN */ + /* Don't submit empty rows by default */ + if (p->options & CSV_REPALL_NL) { + SUBMIT_ROW(p, c); + } + } + continue; + } else if (c == delim) { /* Comma */ + SUBMIT_FIELD(p); + break; + } else if (c == quote) { /* Quote */ + pstate = FIELD_BEGUN; + quoted = 1; + } else { /* Anything else */ + pstate = FIELD_BEGUN; + quoted = 0; + SUBMIT_CHAR(p, c); + } + break; + case FIELD_BEGUN: + if (c == quote) { /* Quote */ + if (quoted) { + SUBMIT_CHAR(p, c); + pstate = FIELD_MIGHT_HAVE_ENDED; + } else { + /* STRICT ERROR - double quote inside non-quoted field */ + if (p->options & CSV_STRICT) { + p->status = CSV_EPARSE; + p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; + return pos-1; + } + SUBMIT_CHAR(p, c); + spaces = 0; + } + } else if (c == delim) { /* Comma */ + if (quoted) { + SUBMIT_CHAR(p, c); + } else { + SUBMIT_FIELD(p); + } + } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ + if (!quoted) { + SUBMIT_FIELD(p); + SUBMIT_ROW(p, c); + } else { + SUBMIT_CHAR(p, c); + } + } else if (!quoted && (is_space? is_space(c) : c == CSV_SPACE || c == CSV_TAB)) { /* Tab or space for non-quoted field */ + SUBMIT_CHAR(p, c); + spaces++; + } else { /* Anything else */ + SUBMIT_CHAR(p, c); + spaces = 0; + } + break; + case FIELD_MIGHT_HAVE_ENDED: + /* This only happens when a quote character is encountered in a quoted field */ + if (c == delim) { /* Comma */ + entry_pos -= spaces + 1; /* get rid of spaces and original quote */ + SUBMIT_FIELD(p); + } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ + entry_pos -= spaces + 1; /* get rid of spaces and original quote */ + SUBMIT_FIELD(p); + SUBMIT_ROW(p, c); + } else if (is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) { /* Space or Tab */ + SUBMIT_CHAR(p, c); + spaces++; + } else if (c == quote) { /* Quote */ + if (spaces) { + /* STRICT ERROR - unescaped double quote */ + if (p->options & CSV_STRICT) { + p->status = CSV_EPARSE; + p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; + return pos-1; + } + spaces = 0; + SUBMIT_CHAR(p, c); + } else { + /* Two quotes in a row */ + pstate = FIELD_BEGUN; + } + } else { /* Anything else */ + /* STRICT ERROR - unescaped double quote */ + if (p->options & CSV_STRICT) { + p->status = CSV_EPARSE; + p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; + return pos-1; + } + pstate = FIELD_BEGUN; + spaces = 0; + SUBMIT_CHAR(p, c); + } + break; + default: + break; + } + } + p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; + return pos; +} + +size_t +csv_write (char *dest, size_t dest_size, const char *src, size_t src_size) +{ + size_t chars = 0; + + if (src == NULL) + return 0; + + if (dest == NULL) + dest_size = 0; + + if (dest_size > 0) + *dest++ = '"'; + chars++; + + while (src_size) { + if (*src == '"') { + if (dest_size > chars) + *dest++ = '"'; + if (chars < SIZE_MAX) chars++; + } + if (dest_size > chars) + *dest++ = *src; + if (chars < SIZE_MAX) chars++; + src_size--; + src++; + } + + if (dest_size > chars) + *dest = '"'; + if (chars < SIZE_MAX) chars++; + + return chars; +} + +int +csv_fwrite (FILE *fp, const char *src, size_t src_size) +{ + if (fp == NULL || src == NULL) + return 0; + + if (fputc('"', fp) == EOF) + return EOF; + + while (src_size) { + if (*src == '"') { + if (fputc('"', fp) == EOF) + return EOF; + } + if (fputc(*src, fp) == EOF) + return EOF; + src_size--; + src++; + } + + if (fputc('"', fp) == EOF) { + return EOF; + } + + return 0; +} + +size_t +csv_write2 (char *dest, size_t dest_size, const char *src, size_t src_size, char quote) +{ + size_t chars = 0; + + if (src == NULL) + return 0; + + if (dest == NULL) + dest_size = 0; + + if (dest_size > 0) + *dest++ = quote; + chars++; + + while (src_size) { + if (*src == quote) { + if (dest_size > chars) + *dest++ = quote; + if (chars < SIZE_MAX) chars++; + } + if (dest_size > chars) + *dest++ = *src; + if (chars < SIZE_MAX) chars++; + src_size--; + src++; + } + + if (dest_size > chars) + *dest = quote; + if (chars < SIZE_MAX) chars++; + + return chars; +} + +int +csv_fwrite2 (FILE *fp, const char *src, size_t src_size, char quote) +{ + if (fp == NULL || src == NULL) + return 0; + + if (fputc(quote, fp) == EOF) + return EOF; + + while (src_size) { + if (*src == quote) { + if (fputc(quote, fp) == EOF) + return EOF; + } + if (fputc(*src, fp) == EOF) + return EOF; + src_size--; + src++; + } + + if (fputc(quote, fp) == EOF) { + return EOF; + } + + return 0; +} diff --git a/src/translators/libcsv.h b/src/translators/libcsv.h new file mode 100644 index 0000000..9058192 --- /dev/null +++ b/src/translators/libcsv.h @@ -0,0 +1,84 @@ +/* +libcsv - parse and write csv data +Copyright (C) 2007 Robert Gamble + + available at http://libcsv.sf.net + + Original available under the terms of the GNU LGPL2, and according + to those terms, relicensed under the GNU GPL2 for inclusion in Tellico */ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + + +#ifndef LIBCSV_H__ +#define LIBCSV_H__ +#include <stdlib.h> +#include <stdio.h> + +#define CSV_MAJOR 2 +#define CSV_MINOR 0 +#define CSV_RELEASE 1 + +/* Error Codes */ +#define CSV_SUCCESS 0 +#define CSV_EPARSE 1 /* Parse error in strict mode */ +#define CSV_ENOMEM 2 /* Out of memory while increasing buffer size */ +#define CSV_ETOOBIG 3 /* Buffer larger than SIZE_MAX needed */ +#define CSV_EINVALID 4 /* Invalid code,should never be received from csv_error*/ + + +/* parser options */ +#define CSV_STRICT 1 /* enable strict mode */ +#define CSV_REPALL_NL 2 /* report all unquoted carriage returns and linefeeds */ +#define CSV_STRICT_FINI 4 /* causes csv_fini to return CSV_EPARSE if last + field is quoted and doesn't containg ending + quote */ + +/* Character values */ +#define CSV_TAB 0x09 +#define CSV_SPACE 0x20 +#define CSV_CR 0x0d +#define CSV_LF 0x0a +#define CSV_COMMA 0x2c +#define CSV_QUOTE 0x22 + +struct csv_parser { + int pstate; /* Parser state */ + int quoted; /* Is the current field a quoted field? */ + size_t spaces; /* Number of continious spaces after quote or in a non-quoted field */ + char * entry_buf; /* Entry buffer */ + size_t entry_pos; /* Current position in entry_buf (and current size of entry) */ + size_t entry_size; /* Size of buffer */ + int status; /* Operation status */ + unsigned char options; + char quote_char; + char delim_char; + int (*is_space)(char); + int (*is_term)(char); +}; + +int csv_init(struct csv_parser **p, unsigned char options); +int csv_fini(struct csv_parser *p, void (*cb1)(char *, size_t, void *), void (*cb2)(char, void *), void *data); +void csv_free(struct csv_parser *p); +int csv_error(struct csv_parser *p); +char * csv_strerror(int error); +size_t csv_parse(struct csv_parser *p, const char *s, size_t len, void (*cb1)(char *, size_t, void *), void (*cb2)(char, void *), void *data); +size_t csv_write(char *dest, size_t dest_size, const char *src, size_t src_size); +int csv_fwrite(FILE *fp, const char *src, size_t src_size); +size_t csv_write2(char *dest, size_t dest_size, const char *src, size_t src_size, char quote); +int csv_fwrite2(FILE *fp, const char *src, size_t src_size, char quote); +int csv_opts(struct csv_parser *p, unsigned char options); +void csv_set_delim(struct csv_parser *p, char c); +void csv_set_quote(struct csv_parser *p, char c); +char csv_get_delim(struct csv_parser *p); +char csv_get_quote(struct csv_parser *p); +void csv_set_space_func(struct csv_parser *p, int (*f)(char)); +void csv_set_term_func(struct csv_parser *p, int (*f)(char)); + +#endif diff --git a/src/translators/onixexporter.cpp b/src/translators/onixexporter.cpp new file mode 100644 index 0000000..4479b2f --- /dev/null +++ b/src/translators/onixexporter.cpp @@ -0,0 +1,199 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "onixexporter.h" +#include "xslthandler.h" +#include "tellicoxmlexporter.h" +#include "../collection.h" +#include "../filehandler.h" +#include "../tellico_utils.h" +#include "../imagefactory.h" +#include "../image.h" +#include "../tellico_debug.h" + +#include <config.h> + +#include <kstandarddirs.h> +#include <kapplication.h> +#include <kzip.h> +#include <kconfig.h> +#include <klocale.h> + +#include <qdom.h> +#include <qfile.h> +#include <qdatetime.h> +#include <qbuffer.h> +#include <qlayout.h> +#include <qwhatsthis.h> +#include <qcheckbox.h> +#include <qgroupbox.h> + +using Tellico::Export::ONIXExporter; + +ONIXExporter::ONIXExporter() : Tellico::Export::Exporter(), + m_handler(0), + m_xsltFile(QString::fromLatin1("tellico2onix.xsl")), + m_includeImages(true), + m_widget(0) { +} + +ONIXExporter::ONIXExporter(Data::CollPtr coll_) : Tellico::Export::Exporter(coll_), + m_handler(0), + m_xsltFile(QString::fromLatin1("tellico2onix.xsl")), + m_includeImages(true), + m_widget(0) { +} + +ONIXExporter::~ONIXExporter() { + delete m_handler; + m_handler = 0; +} + +QString ONIXExporter::formatString() const { + return i18n("ONIX Archive"); +} + +QString ONIXExporter::fileFilter() const { + return i18n("*.zip|Zip Files (*.zip)") + QChar('\n') + i18n("*|All Files"); +} + +bool ONIXExporter::exec() { + Data::CollPtr coll = collection(); + if(!coll) { + return false; + } + + QCString xml = text().utf8(); // encoded in utf-8 + + QByteArray data; + QBuffer buf(data); + + KZip zip(&buf); + zip.open(IO_WriteOnly); + zip.writeFile(QString::fromLatin1("onix.xml"), QString::null, QString::null, xml.length(), xml); + + // use a dict for fast random access to keep track of which images were written to the file + if(m_includeImages) { // for now, we're ignoring (options() & Export::ExportImages) + const QString cover = QString::fromLatin1("cover"); + StringSet imageSet; + for(Data::EntryVec::ConstIterator it = entries().begin(); it != entries().end(); ++it) { + const Data::Image& img = ImageFactory::imageById(it->field(cover)); + if(!img.isNull() && !imageSet.has(img.id()) + && (img.format() == "JPEG" || img.format() == "JPG" || img.format() == "GIF")) { /// onix only understands jpeg and gif + QByteArray ba = img.byteArray(); + zip.writeFile(QString::fromLatin1("images/") + it->field(cover), + QString::null, QString::null, ba.size(), ba); + imageSet.add(img.id()); + } + } + } + + zip.close(); + return FileHandler::writeDataURL(url(), data, options() & Export::ExportForce); +// return FileHandler::writeTextURL(url(), text(), options() & Export::ExportUTF8, options() & Export::ExportForce); +} + +QString ONIXExporter::text() { + QString xsltfile = locate("appdata", m_xsltFile); + if(xsltfile.isNull()) { + myDebug() << "ONIXExporter::text() - no xslt file for " << m_xsltFile << endl; + return QString::null; + } + + Data::CollPtr coll = collection(); + if(!coll) { + myDebug() << "ONIXExporter::text() - no collection pointer!" << endl; + return QString::null; + } + + // notes about utf-8 encoding: + // all params should be passed to XSLTHandler in utf8 + // input string to XSLTHandler should be in utf-8, EVEN IF DOM STRING SAYS OTHERWISE + + KURL u; + u.setPath(xsltfile); + // do NOT do namespace processing, it messes up the XSL declaration since + // QDom thinks there are no elements in the Tellico namespace and as a result + // removes the namespace declaration + QDomDocument dom = FileHandler::readXMLFile(u, false); + if(dom.isNull()) { + myDebug() << "ONIXExporter::text() - error loading xslt file: " << xsltfile << endl; + return QString::null; + } + + // the stylesheet prints utf-8 by default, if using locale encoding, need + // to change the encoding attribute on the xsl:output element + if(!(options() & Export::ExportUTF8)) { + XSLTHandler::setLocaleEncoding(dom); + } + + delete m_handler; + m_handler = new XSLTHandler(dom, QFile::encodeName(xsltfile)); + + QDateTime now = QDateTime::currentDateTime(); + m_handler->addStringParam("sentDate", now.toString(QString::fromLatin1("yyyyMMddhhmm")).utf8()); + + m_handler->addStringParam("version", VERSION); + + GUI::CursorSaver cs(Qt::waitCursor); + + // now grab the XML + TellicoXMLExporter exporter(coll); + exporter.setEntries(entries()); + exporter.setIncludeImages(false); // do not include images in XML +// yes, this should be in utf8, always + exporter.setOptions(options() | Export::ExportUTF8); + QDomDocument output = exporter.exportXML(); +#if 0 + QFile f(QString::fromLatin1("/tmp/test.xml")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t << output.toString(); + } + f.close(); +#endif + return m_handler->applyStylesheet(output.toString()); +} + +QWidget* ONIXExporter::widget(QWidget* parent_, const char* name_/*=0*/) { + if(m_widget && m_widget->parent() == parent_) { + return m_widget; + } + + m_widget = new QWidget(parent_, name_); + QVBoxLayout* l = new QVBoxLayout(m_widget); + + QGroupBox* box = new QGroupBox(1, Qt::Horizontal, i18n("ONIX Archive Options"), m_widget); + l->addWidget(box); + + m_checkIncludeImages = new QCheckBox(i18n("Include images in archive"), box); + m_checkIncludeImages->setChecked(m_includeImages); + QWhatsThis::add(m_checkIncludeImages, i18n("If checked, the images in the document will be included " + "in the zipped ONIX archive.")); + + return m_widget; +} + +void ONIXExporter::readOptions(KConfig* config_) { + KConfigGroup group(config_, QString::fromLatin1("ExportOptions - %1").arg(formatString())); + m_includeImages = group.readBoolEntry("Include Images", m_includeImages); +} + +void ONIXExporter::saveOptions(KConfig* config_) { + m_includeImages = m_checkIncludeImages->isChecked(); + + KConfigGroup group(config_, QString::fromLatin1("ExportOptions - %1").arg(formatString())); + group.writeEntry("Include Images", m_includeImages); +} + +#include "onixexporter.moc" diff --git a/src/translators/onixexporter.h b/src/translators/onixexporter.h new file mode 100644 index 0000000..19d52dd --- /dev/null +++ b/src/translators/onixexporter.h @@ -0,0 +1,60 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef ONIXEXPORTER_H +#define ONIXEXPORTER_H + +class QCheckBox; + +#include "exporter.h" + +namespace Tellico { + namespace Data { + class Collection; + } + class XSLTHandler; + namespace Export { + +/** + * @author Robby Stephenson + */ +class ONIXExporter : public Exporter { +Q_OBJECT + +public: + ONIXExporter(); + ONIXExporter(Data::CollPtr coll); + ~ONIXExporter(); + + virtual bool exec(); + virtual QString formatString() const; + virtual QString fileFilter() const; + + virtual QWidget* widget(QWidget*, const char* name=0); + virtual void readOptions(KConfig*); + virtual void saveOptions(KConfig*); + + QString text(); + +private: + XSLTHandler* m_handler; + QString m_xsltFile; + bool m_includeImages; + + QWidget* m_widget; + QCheckBox* m_checkIncludeImages; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/pdfimporter.cpp b/src/translators/pdfimporter.cpp new file mode 100644 index 0000000..2d59b33 --- /dev/null +++ b/src/translators/pdfimporter.cpp @@ -0,0 +1,281 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "pdfimporter.h" +#include "tellicoimporter.h" +#include "xslthandler.h" +#include "../collections/bibtexcollection.h" +#include "../xmphandler.h" +#include "../filehandler.h" +#include "../imagefactory.h" +#include "../tellico_kernel.h" +#include "../fetch/fetchmanager.h" +#include "../fetch/crossreffetcher.h" +#include "../tellico_utils.h" +#include "../progressmanager.h" +#include "../core/netaccess.h" +#include "../tellico_debug.h" + +#include <kstandarddirs.h> +#include <kmessagebox.h> + +#include <config.h> +#ifdef HAVE_POPPLER +#include <poppler-qt.h> +#endif + +namespace { + static const int PDF_FILE_PREVIEW_SIZE = 196; +} + +using Tellico::Import::PDFImporter; + +PDFImporter::PDFImporter(const KURL::List& urls_) : Importer(urls_), m_cancelled(false) { +} + +bool PDFImporter::canImport(int type_) const { + return type_ == Data::Collection::Bibtex; +} + +Tellico::Data::CollPtr PDFImporter::collection() { + QString xsltfile = ::locate("appdata", QString::fromLatin1("xmp2tellico.xsl")); + if(xsltfile.isEmpty()) { + kdWarning() << "DropHandler::handleURL() - can not locate xmp2tellico.xsl" << endl; + return 0; + } + + ProgressItem& item = ProgressManager::self()->newProgressItem(this, progressLabel(), true); + item.setTotalSteps(urls().count()); + connect(&item, SIGNAL(signalCancelled(ProgressItem*)), SLOT(slotCancel())); + ProgressItem::Done done(this); + const bool showProgress = options() & ImportProgress; + + KURL u; + u.setPath(xsltfile); + + XSLTHandler xsltHandler(u); + if(!xsltHandler.isValid()) { + kdWarning() << "DropHandler::handleURL() - invalid xslt in xmp2tellico.xsl" << endl; + return 0; + } + + bool hasDOI = false; + bool hasArxiv = false; + + uint j = 0; + + Data::CollPtr coll; + XMPHandler xmpHandler; + KURL::List list = urls(); + for(KURL::List::Iterator it = list.begin(); it != list.end() && !m_cancelled; ++it, ++j) { + FileHandler::FileRef* ref = FileHandler::fileRef(*it); + if(!ref) { + continue; + } + + Data::CollPtr newColl; + Data::EntryPtr entry; + + QString xmp = xmpHandler.extractXMP(ref->fileName()); + // myDebug() << xmp << endl; + if(xmp.isEmpty()) { + setStatusMessage(i18n("Tellico was unable to read any metadata from the PDF file.")); + } else { + setStatusMessage(QString()); + + Import::TellicoImporter importer(xsltHandler.applyStylesheet(xmp)); + newColl = importer.collection(); + if(!newColl || newColl->entryCount() == 0) { + kdWarning() << "DropHandler::handleURL() - no collection found" << endl; + setStatusMessage(i18n("Tellico was unable to read any metadata from the PDF file.")); + } else { + entry = newColl->entries().front(); + hasDOI |= !entry->field(QString::fromLatin1("doi")).isEmpty(); + } + } + + if(!newColl) { + newColl = new Data::BibtexCollection(true); + } + if(!entry) { + entry = new Data::Entry(newColl); + newColl->addEntries(entry); + } + +#ifdef HAVE_POPPLER + + // now load from poppler + Poppler::Document* doc = Poppler::Document::load(ref->fileName()); + if(doc && !doc->isLocked()) { + // now the question is, do we overwrite XMP data with Poppler data? + // for now, let's say yes conditionally + QString s = doc->getInfo(QString::fromLatin1("Title")).simplifyWhiteSpace(); + if(!s.isEmpty()) { + entry->setField(QString::fromLatin1("title"), s); + } + // author could be separated by commas, "and" or whatever + // we're not going to overwrite it + if(entry->field(QString::fromLatin1("author")).isEmpty()) { + QRegExp rx(QString::fromLatin1("\\s*(and|,|;)\\s*")); + QStringList authors = QStringList::split(rx, doc->getInfo(QString::fromLatin1("Author")).simplifyWhiteSpace()); + entry->setField(QString::fromLatin1("author"), authors.join(QString::fromLatin1("; "))); + } + s = doc->getInfo(QString::fromLatin1("Keywords")).simplifyWhiteSpace(); + if(!s.isEmpty()) { + // keywords are also separated by semi-colons in poppler + entry->setField(QString::fromLatin1("keyword"), s); + } + + // now parse the first page text and try to guess + Poppler::Page* page = doc->getPage(0); + if(page) { + // a null rectangle means get all text on page + QString text = page->getText(Poppler::Rectangle()); + // borrowed from Referencer + QRegExp rx(QString::fromLatin1("(?:" + "(?:[Dd][Oo][Ii]:? *)" + "|" + "(?:[Dd]igital *[Oo]bject *[Ii]dentifier:? *)" + ")" + "(" + "[^\\.\\s]+" + "\\." + "[^\\/\\s]+" + "\\/" + "[^\\s]+" + ")")); + if(rx.search(text) > -1) { + QString doi = rx.cap(1); + myDebug() << "PDFImporter::collection() - in PDF file, found DOI: " << doi << endl; + entry->setField(QString::fromLatin1("doi"), doi); + hasDOI = true; + } + rx = QRegExp(QString::fromLatin1("arXiv:" + "(" + "[^\\/\\s]+" + "[\\/\\.]" + "[^\\s]+" + ")")); + if(rx.search(text) > -1) { + QString arxiv = rx.cap(1); + myDebug() << "PDFImporter::collection() - in PDF file, found arxiv: " << arxiv << endl; + if(entry->collection()->fieldByName(QString::fromLatin1("arxiv")) == 0) { + Data::FieldPtr field = new Data::Field(QString::fromLatin1("arxiv"), i18n("arXiv ID")); + field->setCategory(i18n("Publishing")); + entry->collection()->addField(field); + } + entry->setField(QString::fromLatin1("arxiv"), arxiv); + hasArxiv = true; + } + + delete page; + } + } else { + myDebug() << "PDFImporter::collection() - unable to read PDF info (poppler)" << endl; + } + delete doc; +#endif + + entry->setField(QString::fromLatin1("url"), (*it).url()); + // always an article? + entry->setField(QString::fromLatin1("entry-type"), QString::fromLatin1("article")); + + QPixmap pix = NetAccess::filePreview(ref->fileName(), PDF_FILE_PREVIEW_SIZE); + delete ref; // removes temp file + + if(!pix.isNull()) { + // is png best option? + QString id = ImageFactory::addImage(pix, QString::fromLatin1("PNG")); + if(!id.isEmpty()) { + Data::FieldPtr field = newColl->fieldByName(QString::fromLatin1("cover")); + if(!field && !newColl->imageFields().isEmpty()) { + field = newColl->imageFields().front(); + } else if(!field) { + field = new Data::Field(QString::fromLatin1("cover"), i18n("Front Cover"), Data::Field::Image); + newColl->addField(field); + } + entry->setField(field, id); + } + } + if(coll) { + coll->addEntries(newColl->entries()); + } else { + coll = newColl; + } + + if(showProgress) { + ProgressManager::self()->setProgress(this, j); + kapp->processEvents(); + } + } + + if(m_cancelled) { + return 0; + } + + if(hasDOI) { + myDebug() << "looking for DOI" << endl; + Fetch::FetcherVec vec = Fetch::Manager::self()->createUpdateFetchers(coll->type(), Fetch::DOI); + if(vec.isEmpty()) { + GUI::CursorSaver cs(Qt::arrowCursor); + KMessageBox::information(Kernel::self()->widget(), + i18n("Tellico is able to download information about entries with a DOI from " + "CrossRef.org. However, you must create an CrossRef account and add a new " + "data source with your account information."), + QString::null, + QString::fromLatin1("CrossRefSourceNeeded")); + } else { + Data::EntryVec entries = coll->entries(); + for(Fetch::FetcherVec::Iterator fetcher = vec.begin(); fetcher != vec.end(); ++fetcher) { + for(Data::EntryVecIt entry = entries.begin(); entry != entries.end(); ++entry) { + fetcher->updateEntrySynchronous(entry); + } + } + } + } + + if(m_cancelled) { + return 0; + } + + if(hasArxiv) { + Data::EntryVec entries = coll->entries(); + Fetch::FetcherVec vec = Fetch::Manager::self()->createUpdateFetchers(coll->type(), Fetch::ArxivID); + for(Fetch::FetcherVec::Iterator fetcher = vec.begin(); fetcher != vec.end(); ++fetcher) { + for(Data::EntryVecIt entry = entries.begin(); entry != entries.end(); ++entry) { + fetcher->updateEntrySynchronous(entry); + } + } + } + +// finally + Data::EntryVec entries = coll->entries(); + for(Data::EntryVecIt entry = entries.begin(); entry != entries.end(); ++entry) { + if(entry->title().isEmpty()) { + // use file name + KURL u = entry->field(QString::fromLatin1("url")); + entry->setField(QString::fromLatin1("title"), u.fileName()); + } + } + + if(m_cancelled) { + return 0; + } + return coll; +} + +void PDFImporter::slotCancel() { + m_cancelled = true; +} + +#include "pdfimporter.moc" diff --git a/src/translators/pdfimporter.h b/src/translators/pdfimporter.h new file mode 100644 index 0000000..87da58e --- /dev/null +++ b/src/translators/pdfimporter.h @@ -0,0 +1,41 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_IMPORT_PDFIMPORTER_H +#define TELLICO_IMPORT_PDFIMPORTER_H + +#include "importer.h" + +namespace Tellico { + namespace Import { + +class PDFImporter : public Importer { +Q_OBJECT + +public: + PDFImporter(const KURL::List& urls); + + virtual bool canImport(int type) const; + + virtual Data::CollPtr collection(); + +public slots: + void slotCancel(); + +private: + bool m_cancelled; +}; + + } +} +#endif diff --git a/src/translators/pilotdb/Makefile.am b/src/translators/pilotdb/Makefile.am new file mode 100644 index 0000000..cf21d12 --- /dev/null +++ b/src/translators/pilotdb/Makefile.am @@ -0,0 +1,16 @@ +####### kdevelop will overwrite this part!!! (begin)########## +noinst_LIBRARIES = libpilotdb.a + +AM_CPPFLAGS = $(all_includes) + +libpilotdb_a_METASOURCES = AUTO + +libpilotdb_a_SOURCES = pilotdb.cpp strop.cpp + +SUBDIRS = libflatfile libpalm + +EXTRA_DIST = strop.cpp strop.h portability.h pilotdb.h pilotdb.cpp + +####### kdevelop will overwrite this part!!! (end)############ + +KDE_OPTIONS = noautodist diff --git a/src/translators/pilotdb/libflatfile/DB.cpp b/src/translators/pilotdb/libflatfile/DB.cpp new file mode 100644 index 0000000..40e639a --- /dev/null +++ b/src/translators/pilotdb/libflatfile/DB.cpp @@ -0,0 +1,1437 @@ +/* + * palm-db-tools: Read/write DB-format databases + * Copyright (C) 1999-2001 by Tom Dyas (tdyas@users.sourceforge.net) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifh Floor, Boston, MA 02110-1301 USA + */ + +#include <iostream> +#include <vector> +#include <string> +#include <stdexcept> +#include <sstream> +#include <time.h> + +#include <cstring> + +#include <kdebug.h> + +#include "../strop.h" + +#include "DB.h" + +#include <kdebug.h> + +#define charSeperator '/' +#define VIEWFLAG_USE_IN_EDITVIEW 0x01 + +#define INVALID_DEFAULT 0 +#define NOW_DEFAULT 1 +#define CONSTANT_DEFAULT 2 + +using namespace PalmLib::FlatFile; +using namespace PalmLib; + +namespace { + static const pi_uint16_t CHUNK_FIELD_NAMES = 0; + static const pi_uint16_t CHUNK_FIELD_TYPES = 1; + static const pi_uint16_t CHUNK_FIELD_DATA = 2; + static const pi_uint16_t CHUNK_LISTVIEW_DEFINITION = 64; + static const pi_uint16_t CHUNK_LISTVIEW_OPTIONS = 65; + static const pi_uint16_t CHUNK_LFIND_OPTIONS = 128; + static const pi_uint16_t CHUNK_ABOUT = 254; +} + +template <class Map, class Key> +static inline bool has_key(const Map& map, const Key& key) +{ + return map.find(key) != map.end(); +} + +bool PalmLib::FlatFile::DB::classify(PalmLib::Database& pdb) +{ + return (! pdb.isResourceDB()) + && (pdb.creator() == PalmLib::mktag('D','B','O','S')) + && (pdb.type() == PalmLib::mktag('D','B','0','0')); +} + +bool PalmLib::FlatFile::DB::match_name(const std::string& name) +{ + return (name == "DB") || (name == "db"); +} + +void PalmLib::FlatFile::DB::extract_chunks(const PalmLib::Block& appinfo) +{ + size_t i; + pi_uint16_t chunk_type; + pi_uint16_t chunk_size; + + if (appinfo.size() > 4) { + // Loop through each chunk in the block while data remains. + i = 4; + while (i < appinfo.size()) { + /* Stop the loop if there is not enough room for even one + * chunk header. + */ + if (i + 4 >= appinfo.size()) { +// throw PalmLib::error("header is corrupt"); + kdDebug() << "header is corrupt" << endl; + } + // Copy the chunk type and size into the local buffer. + chunk_type = get_short(appinfo.data() + i); + chunk_size = get_short(appinfo.data() + i + 2); + i += 4; + + // Copy the chunk into seperate storage. + Chunk chunk(appinfo.data() + i, chunk_size); + chunk.chunk_type = chunk_type; + m_chunks[chunk.chunk_type].push_back(chunk); + + /* Advance the index by the size of the chunk. */ + i += chunk.size(); + } + + // If everything was correct, then we should be exactly at the + // end of the block. + if (i != appinfo.size()) { +// throw PalmLib::error("header is corrupt"); + kdDebug() << "header is corrupt" << endl; + } + } else { +// throw PalmLib::error("header is corrupt"); + kdDebug() << "header is corrupt" << endl; + } +} + +void PalmLib::FlatFile::DB::extract_schema(unsigned numFields) +{ + unsigned i; + + if (!has_key(m_chunks, CHUNK_FIELD_NAMES) + || !has_key(m_chunks, CHUNK_FIELD_TYPES)) { +// throw PalmLib::error("database is missing its schema"); + kdDebug() << "database is missing its schema" << endl; + return; + } + + Chunk names_chunk = m_chunks[CHUNK_FIELD_NAMES][0]; + Chunk types_chunk = m_chunks[CHUNK_FIELD_TYPES][0]; + pi_char_t* p = names_chunk.data(); + pi_char_t* q = types_chunk.data(); + + // Ensure that the types chunk has the expected size. + if (types_chunk.size() != numFields * sizeof(pi_uint16_t)) { +// throw PalmLib::error("types chunk is corrupt"); + kdDebug() << "types chunk is corrupt" << endl; + } + // Loop for each field and extract the name and type. + for (i = 0; i < numFields; ++i) { + PalmLib::FlatFile::Field::FieldType type; + int len; + + // Determine the length of the name string. Ensure that the + // string does not go beyond the end of the chunk. + pi_char_t* null_p = reinterpret_cast<pi_char_t*> + (memchr(p, 0, names_chunk.size() - (p - names_chunk.data()))); + if (!null_p) { +// throw PalmLib::error("names chunk is corrupt"); + kdDebug() << "names chunk is corrupt" << endl; + } + len = null_p - p; + + switch (PalmLib::get_short(q)) { + case 0: + type = PalmLib::FlatFile::Field::STRING; + break; + + case 1: + type = PalmLib::FlatFile::Field::BOOLEAN; + break; + + case 2: + type = PalmLib::FlatFile::Field::INTEGER; + break; + + case 3: + type = PalmLib::FlatFile::Field::DATE; + break; + + case 4: + type = PalmLib::FlatFile::Field::TIME; + break; + + case 5: + type = PalmLib::FlatFile::Field::NOTE; + break; + + case 6: + type = PalmLib::FlatFile::Field::LIST; + break; + + case 7: + type = PalmLib::FlatFile::Field::LINK; + break; + + case 8: + type = PalmLib::FlatFile::Field::FLOAT; + break; + + case 9: + type = PalmLib::FlatFile::Field::CALCULATED; + break; + + case 10: + type = PalmLib::FlatFile::Field::LINKED; + break; + + default: +// throw PalmLib::error("unknown field type"); + kdDebug() << "PalmLib::FlatFile::DB::extract_schema() - unknown field type" << endl; + type = PalmLib::FlatFile::Field::STRING; + break; + } + + // Inform the superclass about this field. + appendField(std::string((char *) p, len), type, extract_fieldsdata(i, type)); + + // Advance to the information on the next field. + p += len + 1; + q += 2; + } +} + +void PalmLib::FlatFile::DB::extract_listviews() +{ + if (!has_key(m_chunks, CHUNK_LISTVIEW_DEFINITION)) + return; + +/* throw PalmLib::error("no list views in database");*/ + + const std::vector<Chunk>& chunks = m_chunks[CHUNK_LISTVIEW_DEFINITION]; + + for (std::vector<Chunk>::const_iterator iter = chunks.begin(); + iter != chunks.end(); ++iter) { + const Chunk& chunk = (*iter); + PalmLib::FlatFile::ListView lv; + + if (chunk.size() < (2 + 2 + 32)) { +// throw PalmLib::error("list view is corrupt"); + kdDebug() << "list view is corrupt" << endl; + } + pi_uint16_t flags = PalmLib::get_short(chunk.data()); + pi_uint16_t num_cols = PalmLib::get_short(chunk.data() + 2); + + lv.editoruse = false; + if (flags & VIEWFLAG_USE_IN_EDITVIEW) + lv.editoruse = true; + + if (chunk.size() != static_cast<unsigned> (2 + 2 + 32 + num_cols * 4)) { +// throw PalmLib::error("list view is corrupt"); + kdDebug() << "list view is corrupt" << endl; + } + // Determine the length of the name string. + pi_char_t* null_ptr = reinterpret_cast<pi_char_t*> + (memchr(chunk.data() + 4, 0, 32)); + if (null_ptr) + lv.name = std::string((char *) (chunk.data() + 4), + null_ptr - (chunk.data() + 4)); + else + lv.name = "Unknown"; + + const pi_char_t* p = chunk.data() + 2 + 2 + 32; + for (int i = 0; i < num_cols; ++i) { + pi_uint16_t field = PalmLib::get_short(p); + pi_uint16_t width = PalmLib::get_short(p + 2); + p += 2 * sizeof(pi_uint16_t); + + if (field >= getNumOfFields()) { +// throw PalmLib::error("list view is corrupt"); + kdDebug() << "list view is corrupt" << endl; + } + PalmLib::FlatFile::ListViewColumn col(field, width); + lv.push_back(col); + } + + appendListView(lv); + } +} + +std::string PalmLib::FlatFile::DB::extract_fieldsdata(pi_uint16_t field_search, PalmLib::FlatFile::Field::FieldType type) +{ + std::ostringstream theReturn; + + if (!has_key(m_chunks, CHUNK_FIELD_DATA)) + return std::string(theReturn.str()); + + std::vector<Chunk>& chunks = m_chunks[CHUNK_FIELD_DATA]; + + pi_uint16_t field_num = 0; + bool find = false; + std::vector<Chunk>::const_iterator iter = chunks.begin(); + for ( ; iter != chunks.end(); ++iter) { + const Chunk& chunk = (*iter); + + field_num = PalmLib::get_short(chunk.data()); + + if (field_num == field_search) { + find = true; + break; + } + } + + if (find) { + const Chunk& chunk = (*iter); + + switch (type) { + + case PalmLib::FlatFile::Field::STRING: + theReturn << std::string((const char *)chunk.data()+2, chunk.size() - 2); + break; + + case PalmLib::FlatFile::Field::BOOLEAN: + break; + + case PalmLib::FlatFile::Field::INTEGER: + theReturn << PalmLib::get_long(chunk.data() + sizeof(pi_uint16_t)); + theReturn << charSeperator; + theReturn << PalmLib::get_short(chunk.data() + sizeof(pi_uint16_t) + sizeof(pi_uint32_t)); + break; + + case PalmLib::FlatFile::Field::FLOAT: { + pi_double_t value; + value.words.hi = PalmLib::get_long(chunk.data() + 2); + value.words.lo = PalmLib::get_long(chunk.data() + 6); + + theReturn << value.number; + } + break; + + case PalmLib::FlatFile::Field::DATE: + if (*(chunk.data() + sizeof(pi_uint16_t)) == NOW_DEFAULT) + theReturn << "now"; + else if (*(chunk.data() + sizeof(pi_uint16_t)) == CONSTANT_DEFAULT) { + const pi_char_t * ptr = chunk.data() + sizeof(pi_uint16_t) + 1; + struct tm date; + date.tm_year = PalmLib::get_short(ptr) - 1900; + date.tm_mon = (static_cast<int> (*(ptr + 2))) - 1; + date.tm_mday = static_cast<int> (*(ptr + 3)); + + (void) mktime(&date); + + char buf[1024]; + + // Clear out the output buffer. + memset(buf, 0, sizeof(buf)); + + // Convert and output the date using the format. + strftime(buf, sizeof(buf), "%Y/%m/%d", &date); + + theReturn << buf; + } + break; + + case PalmLib::FlatFile::Field::TIME: + if (*(chunk.data() + sizeof(pi_uint16_t)) == NOW_DEFAULT) + theReturn << "now"; + else if (*(chunk.data() + sizeof(pi_uint16_t)) == CONSTANT_DEFAULT) { + const pi_char_t * ptr = chunk.data() + sizeof(pi_uint16_t) + 1; + struct tm t; + const struct tm * tm_ptr; + time_t now; + + time(&now); + tm_ptr = localtime(&now); + memcpy(&t, tm_ptr, sizeof(tm)); + + t.tm_hour = static_cast<int> (*(ptr)); + t.tm_min = static_cast<int> (*(ptr + 1)); + t.tm_sec = 0; + + char buf[1024]; + + // Clear out the output buffer. + memset(buf, 0, sizeof(buf)); + + // Convert and output the date using the format. + strftime(buf, sizeof(buf), "%H:%M", &t); + + theReturn << buf; + } + break; + + case PalmLib::FlatFile::Field::NOTE: + break; + + case PalmLib::FlatFile::Field::LIST: { + unsigned short numItems = PalmLib::get_short(chunk.data() + sizeof(pi_uint16_t)); + int prevLength = 0; + std::string item; + + if (numItems > 0) { + for (unsigned short i = 0; i < numItems - 1; i++) { + item = std::string((const char *)chunk.data() + 3 * sizeof(pi_uint16_t) + prevLength); + theReturn << item << charSeperator; + prevLength += item.length() + 1; + } + item = std::string((const char *)chunk.data() + 3 * sizeof(pi_uint16_t) + prevLength); + theReturn << item; + } + } + break; + + case PalmLib::FlatFile::Field::LINK: + theReturn << std::string((const char *)chunk.data()+sizeof(pi_uint16_t)); +// theReturn << std::string((const char *)chunk.data()+sizeof(pi_uint16_t), chunk.size() - 2); + theReturn << charSeperator; + theReturn << PalmLib::get_short(chunk.data() + sizeof(pi_uint16_t) + 32 * sizeof(pi_char_t)); + break; + + case PalmLib::FlatFile::Field::LINKED: + theReturn << PalmLib::get_short(chunk.data() + sizeof(pi_uint16_t)); + theReturn << charSeperator; + theReturn << PalmLib::get_short(chunk.data() + 2 * sizeof(pi_uint16_t)); + break; + + case PalmLib::FlatFile::Field::CALCULATED: + break; + + default: + kdDebug() << "unknown field type" << endl; + break; + } + } + return std::string(theReturn.str()); +} + +void PalmLib::FlatFile::DB::extract_aboutinfo() +{ + if (!has_key(m_chunks, CHUNK_ABOUT)) + return; + + Chunk chunk = m_chunks[CHUNK_ABOUT][0]; + pi_char_t* header = chunk.data(); + pi_char_t* q = chunk.data() + PalmLib::get_short(header); + + setAboutInformation( (char*)q); +} + +void PalmLib::FlatFile::DB::parse_record(PalmLib::Record& record, + std::vector<pi_char_t *>& ptrs, + std::vector<size_t>& sizes) +{ + unsigned i; + + // Ensure that enough space for the offset table exists. + if (record.size() < getNumOfFields() * sizeof(pi_uint16_t)) { +// throw PalmLib::error("record is corrupt"); + kdDebug() << "record is corrupt" << endl; + } + // Extract the offsets from the record. Determine field pointers. + std::vector<pi_uint16_t> offsets(getNumOfFields()); + for (i = 0; i < getNumOfFields(); ++i) { + offsets[i] = get_short(record.data() + i * sizeof(pi_uint16_t)); + if (offsets[i] >= record.size()) { +// throw PalmLib::error("record is corrupt"); + kdDebug() << "record is corrupt" << endl; + } + ptrs.push_back(record.data() + offsets[i]); + } + + // Determine the field sizes. + for (i = 0; i < getNumOfFields() - 1; ++i) { + sizes.push_back(offsets[i + 1] - offsets[i]); + } + sizes.push_back(record.size() - offsets[getNumOfFields() - 1]); +} + +PalmLib::FlatFile::DB::DB(PalmLib::Database& pdb) + : Database("db", pdb), m_flags(0) +{ + // Split the application information block into its component chunks. + extract_chunks(pdb.getAppInfoBlock()); + + // Pull the header fields and schema out of the databasse. + m_flags = get_short(pdb.getAppInfoBlock().data()); + unsigned numFields = get_short(pdb.getAppInfoBlock().data() + 2); + extract_schema(numFields); + + // Extract all of the list views. + extract_listviews(); + + extract_aboutinfo(); + + for (unsigned i = 0; i < pdb.getNumRecords(); ++i) { + PalmLib::Record record = pdb.getRecord(i); + Record rec; + + std::vector<pi_char_t *> ptrs; + std::vector<size_t> sizes; + parse_record(record, ptrs, sizes); + for (unsigned j = 0; j < getNumOfFields(); ++j) { + PalmLib::FlatFile::Field f; + f.type = field_type(j); + + switch (field_type(j)) { + case PalmLib::FlatFile::Field::STRING: + f.type = PalmLib::FlatFile::Field::STRING; + f.v_string = std::string((char *) ptrs[j], sizes[j] - 1); + break; + + case PalmLib::FlatFile::Field::BOOLEAN: + f.type = PalmLib::FlatFile::Field::BOOLEAN; + if (*(ptrs[j])) + f.v_boolean = true; + else + f.v_boolean = false; + break; + + case PalmLib::FlatFile::Field::INTEGER: + f.type = PalmLib::FlatFile::Field::INTEGER; + f.v_integer = PalmLib::get_long(ptrs[j]); + break; + + case PalmLib::FlatFile::Field::FLOAT: { + // Place data from database in a union for conversion. + pi_double_t value; + value.words.hi = PalmLib::get_long(ptrs[j]); + value.words.lo = PalmLib::get_long(ptrs[j] + 4); + + // Fill out the information for this field. + f.type = PalmLib::FlatFile::Field::FLOAT; + f.v_float = value.number; + } + break; + + case PalmLib::FlatFile::Field::DATE: + f.type = PalmLib::FlatFile::Field::DATE; + f.v_date.year = PalmLib::get_short(ptrs[j]); + f.v_date.month = static_cast<int> (*(ptrs[j] + 2)); + f.v_date.day = static_cast<int> (*(ptrs[j] + 3)); + break; + + case PalmLib::FlatFile::Field::TIME: + f.type = PalmLib::FlatFile::Field::TIME; + f.v_time.hour = static_cast<int> (*(ptrs[j])); + f.v_time.minute = static_cast<int> (*(ptrs[j] + 1)); + break; + + case PalmLib::FlatFile::Field::NOTE: + f.type = PalmLib::FlatFile::Field::NOTE; + f.v_string = std::string((char *) ptrs[j], sizes[j] - 3); + f.v_note = std::string((char *) (record.data() + get_short(ptrs[j] + strlen(f.v_string.c_str()) + 1))); + break; + + case PalmLib::FlatFile::Field::LIST: + f.type = PalmLib::FlatFile::Field::LIST; + if (!field(j).argument().empty()) { + std::string data = field(j).argument(); + unsigned int k; + std::string::size_type pos = 0; + pi_uint16_t itemID = *ptrs[j]; // TR: a list value is stored on 1 byte + + for (k = 0; k < itemID; k++) { + if ((pos = data.find(charSeperator, pos)) == std::string::npos) { + break; + } + pos++; + } + if (pos == std::string::npos) { + f.v_string = "N/A"; + } else { + if (data.find(charSeperator, pos) == std::string::npos) { + f.v_string = data.substr( pos, std::string::npos); + } else { + f.v_string = data.substr( pos, data.find(charSeperator, pos) - pos); + } + } + } + break; + + case PalmLib::FlatFile::Field::LINK: + f.type = PalmLib::FlatFile::Field::LINK; + f.v_integer = PalmLib::get_long(ptrs[j]); + f.v_string = std::string((char *) (ptrs[j] + 4), sizes[j] - 5); + break; + + case PalmLib::FlatFile::Field::LINKED: + f.type = PalmLib::FlatFile::Field::LINKED; + f.v_string = std::string((char *) ptrs[j], sizes[j] - 1); + break; + + case PalmLib::FlatFile::Field::CALCULATED: { + std::ostringstream value; + f.type = PalmLib::FlatFile::Field::CALCULATED; + switch (ptrs[j][0]) { + case 1: //string + value << std::string((char *) ptrs[j] + 1, sizes[j] - 2); + break; + case 2: //integer + value << PalmLib::get_long(ptrs[j] + 1); + break; + case 9: //float + { + pi_double_t fvalue; + fvalue.words.hi = PalmLib::get_long(ptrs[j] + 1); + fvalue.words.lo = PalmLib::get_long(ptrs[j] + 5); + + value << fvalue.number; + } + default: + value << "N/A"; + } + f.v_string = value.str(); + } break; + + default: + kdDebug() << "unknown field type" << endl; + break; + } + + // Append this field to the record. + rec.appendField(f); + } + rec.unique_id(record.unique_id()); + // Append this record to the database. + appendRecord(rec); + } +} + +void PalmLib::FlatFile::DB::make_record(PalmLib::Record& pdb_record, + const Record& record) const +{ + unsigned int i; + + // Determine the packed size of this record. + size_t size = getNumOfFields() * sizeof(pi_uint16_t); + for (i = 0; i < getNumOfFields(); i++) { +#ifdef HAVE_VECTOR_AT + const Field field = record.fields().at(i); +#else + const Field field = record.fields()[i]; +#endif + switch (field.type) { + case PalmLib::FlatFile::Field::STRING: + size += field.v_string.length() + 1; + break; + + case PalmLib::FlatFile::Field::NOTE: + size += field.v_string.length() + 3; + size += field.v_note.length() + 1; + break; + + case PalmLib::FlatFile::Field::BOOLEAN: + size += 1; + break; + + case PalmLib::FlatFile::Field::INTEGER: + size += 4; + break; + + case PalmLib::FlatFile::Field::FLOAT: + size += 8; + break; + + case PalmLib::FlatFile::Field::DATE: + size += sizeof(pi_uint16_t) + 2 * sizeof(pi_char_t); + break; + + case PalmLib::FlatFile::Field::TIME: + size += 2 * sizeof(pi_char_t); + break; + + case PalmLib::FlatFile::Field::LIST: + size += sizeof(pi_char_t); + break; + + case PalmLib::FlatFile::Field::LINK: + size += sizeof(pi_int32_t); + size += field.v_string.length() + 1; + break; + + case PalmLib::FlatFile::Field::LINKED: + size += field.v_string.length() + 1; + break; + + case PalmLib::FlatFile::Field::CALCULATED: + size += 1; + break; + + default: + kdDebug() << "unsupported field type" << endl; + break; + } + } + + // Allocate a block for the packed record and setup the pointers. + pi_char_t* buf = new pi_char_t[size]; + pi_char_t* p = buf + getNumOfFields() * sizeof(pi_uint16_t); + pi_char_t* offsets = buf; + + // Pack the fields into the buffer. + for (i = 0; i < getNumOfFields(); i++) { + pi_char_t* noteOffsetOffset = 0; + bool setNote = false; +#ifdef HAVE_VECTOR_AT + const Field fieldData = record.fields().at(i); +#else + const Field fieldData = record.fields()[i]; +#endif + + // Mark the offset to the start of this field in the offests table. + PalmLib::set_short(offsets, static_cast<pi_uint16_t> (p - buf)); + offsets += sizeof(pi_uint16_t); + + // Pack the field. + switch (fieldData.type) { + case PalmLib::FlatFile::Field::STRING: + memcpy(p, fieldData.v_string.c_str(), fieldData.v_string.length() + 1); + p += fieldData.v_string.length() + 1; + break; + + case PalmLib::FlatFile::Field::NOTE: + if (setNote) + kdDebug() << "unsupported field type"; + memcpy(p, fieldData.v_string.c_str(), fieldData.v_string.length() + 1); + p += fieldData.v_string.length() + 1; + noteOffsetOffset = p; + p += 2; + setNote = true; + break; + + case PalmLib::FlatFile::Field::BOOLEAN: + *p++ = ((fieldData.v_boolean) ? 1 : 0); + break; + + case PalmLib::FlatFile::Field::INTEGER: + PalmLib::set_long(p, fieldData.v_integer); + p += sizeof(pi_int32_t); + break; + + case PalmLib::FlatFile::Field::FLOAT: { + // Place data the data in a union for easy conversion. + pi_double_t value; + value.number = fieldData.v_float; + PalmLib::set_long(p, value.words.hi); + p += sizeof(pi_uint32_t); + PalmLib::set_long(p, value.words.lo); + p += sizeof(pi_uint32_t); + break; + } + + case PalmLib::FlatFile::Field::DATE: + PalmLib::set_short(p, fieldData.v_date.year); + p += sizeof(pi_uint16_t); + *p++ = static_cast<pi_char_t> (fieldData.v_date.month & 0xFF); + *p++ = static_cast<pi_char_t> (fieldData.v_date.day & 0xFF); + break; + + case PalmLib::FlatFile::Field::TIME: + *p++ = static_cast<pi_char_t> (fieldData.v_time.hour & 0xFF); + *p++ = static_cast<pi_char_t> (fieldData.v_time.minute & 0xFF); + break; + + case PalmLib::FlatFile::Field::LIST: + if (!field(i).argument().empty()) { + std::string data = field(i).argument(); + std::string::size_type pos = 0, next; + unsigned int j = 0; + pi_int16_t itemID = -1; + + while ( (next = data.find(charSeperator, pos)) != std::string::npos) { + if (fieldData.v_string == data.substr( pos, next - pos)) { + itemID = j; + break; + } + j++; + pos = next + 1; + } + // TR: the following test handles the case where the field value + // equals the last item in list (bugfix) + if (itemID == -1 && fieldData.v_string == data.substr( pos, std::string::npos)) { + itemID = j; + } + p[0] = itemID; // TR: a list value is stored on 1 byte + p += sizeof(pi_char_t); + } + break; + + case PalmLib::FlatFile::Field::LINK: + PalmLib::set_long(p, fieldData.v_integer); + p += sizeof(pi_int32_t); + memcpy(p, fieldData.v_string.c_str(), fieldData.v_string.length() + 1); + p += fieldData.v_string.length() + 1; + break; + + case PalmLib::FlatFile::Field::LINKED: + memcpy(p, fieldData.v_string.c_str(), fieldData.v_string.length() + 1); + p += fieldData.v_string.length() + 1; + break; + + case PalmLib::FlatFile::Field::CALCULATED: + *p = 13; + p++; + break; + + default: + kdDebug() << "unsupported field type"; + break; + } + if (setNote) { + if (fieldData.v_note.length()) { + memcpy(p, fieldData.v_note.c_str(), fieldData.v_note.length() + 1); + PalmLib::set_short(noteOffsetOffset, (pi_uint16_t)(p - buf)); + p += fieldData.v_note.length() + 1; + } else { + PalmLib::set_short(noteOffsetOffset, 0); + } + } + } + + // Place the packed data into the PalmOS record. + pdb_record.set_raw(buf, size); + delete [] buf; +} + +void PalmLib::FlatFile::DB::build_fieldsdata_chunks(std::vector<DB::Chunk>& chunks) const +{ + pi_char_t * buf = 0, * p; + unsigned int size, i; + + for (i = 0; i < getNumOfFields(); ++i) { + size = 0; + switch (field_type(i)) { + case PalmLib::FlatFile::Field::STRING: + if (!field(i).argument().empty()) { + size = (field(i).argument().length() + 1) + 2; + buf = new pi_char_t[size]; + PalmLib::set_short(buf, i); + strcpy((char *) (buf + 2), field(i).argument().c_str()); + } + break; + + case PalmLib::FlatFile::Field::BOOLEAN: + break; + + case PalmLib::FlatFile::Field::INTEGER: + if (!field(i).argument().empty()) { + std::string data = field(i).argument(); + std::pair< PalmLib::pi_int32_t, PalmLib::pi_int16_t> values(0, 0); + + if ( data.find(charSeperator) != std::string::npos) { + StrOps::convert_string(data.substr( 0, data.find(charSeperator)), values.first); + StrOps::convert_string(data.substr( data.find(charSeperator) + 1, std::string::npos), values.second); + } else + StrOps::convert_string(data, values.first); + + size = 2 + sizeof(pi_uint32_t) + sizeof(pi_uint16_t); + buf = new pi_char_t[size]; + p = buf; + PalmLib::set_short(p, i); + p += sizeof(pi_uint16_t); + PalmLib::set_long(p, values.first); + p += sizeof(pi_uint32_t); + PalmLib::set_short(p, values.second); + p += sizeof(pi_uint16_t); + } + break; + + case PalmLib::FlatFile::Field::FLOAT: + if (!field(i).argument().empty()) { + std::string data = field(i).argument(); + pi_double_t value; + + StrOps::convert_string(data, value.number); + + size = 2 + 2 * sizeof(pi_uint32_t); + buf = new pi_char_t[size]; + p = buf; + PalmLib::set_short(p, i); + p += sizeof(pi_uint16_t); + PalmLib::set_long(p, value.words.hi); + p += sizeof(pi_uint32_t); + PalmLib::set_long(p, value.words.lo); + p += sizeof(pi_uint32_t); + } + break; + + case PalmLib::FlatFile::Field::DATE: + if (!field(i).argument().empty()) { + std::string data = field(i).argument(); + struct tm date; + pi_char_t type; + + if (data.substr(0, 3) == "now") { + type = NOW_DEFAULT; + const struct tm * tm_ptr; + time_t now; + + time(&now); + tm_ptr = localtime(&now); + memcpy(&date, tm_ptr, sizeof(tm)); + } else +#ifdef strptime + if (strptime(data.c_str(), "%Y/%m/%d", &date)) +#else + if (StrOps::strptime(data.c_str(), "%Y/%m/%d", &date)) +#endif + type = CONSTANT_DEFAULT; + else + type = INVALID_DEFAULT; + + if (type != INVALID_DEFAULT) { + size = sizeof(pi_uint16_t) + 1 + sizeof(pi_uint16_t) + 2; + buf = new pi_char_t[size]; + p = buf; + PalmLib::set_short(p, i); + p += sizeof(pi_uint16_t); + *p++ = static_cast<pi_char_t> (type & 0xFF); + PalmLib::set_short(p, date.tm_year + 1900); + p += sizeof(pi_uint16_t); + *p++ = static_cast<pi_char_t> ((date.tm_mon + 1) & 0xFF); + *p++ = static_cast<pi_char_t> (date.tm_mday & 0xFF); + } + + } + break; + + case PalmLib::FlatFile::Field::TIME: + if (!field(i).argument().empty()) { + std::string data = field(i).argument(); + struct tm t; + pi_char_t type; + + if (data == "now") { + type = NOW_DEFAULT; + const struct tm * tm_ptr; + time_t now; + + time(&now); + tm_ptr = localtime(&now); + memcpy(&t, tm_ptr, sizeof(tm)); + } else +#ifdef strptime + if (!strptime(data.c_str(), "%H/%M", &t)) +#else + if (!StrOps::strptime(data.c_str(), "%H/%M", &t)) +#endif + type = CONSTANT_DEFAULT; + else + type = INVALID_DEFAULT; + + if (type != INVALID_DEFAULT) { + size = sizeof(pi_uint16_t) + 1 + sizeof(pi_uint16_t) + 2; + buf = new pi_char_t[size]; + p = buf; + PalmLib::set_short(p, i); + p += sizeof(pi_uint16_t); + *p++ = static_cast<pi_char_t> (type & 0xFF); + *p++ = static_cast<pi_char_t> (t.tm_hour & 0xFF); + *p++ = static_cast<pi_char_t> (t.tm_min & 0xFF); + } + + } + break; + + case PalmLib::FlatFile::Field::NOTE: + break; + + case PalmLib::FlatFile::Field::LIST: + if (!field(i).argument().empty()) { + std::string data = field(i).argument(); + std::vector<std::string> items; + std::string::size_type pos = 0, next; + std::vector<std::string>::iterator iter; + size = 2 + 2 * sizeof(pi_uint16_t); + while ( (next = data.find(charSeperator, pos)) != std::string::npos) { + std::string item = data.substr( pos, next - pos); + items.push_back(item); + size += item.length() + 1; + pos = next + 1; + } + if (pos != std::string::npos) { + std::string item = data.substr( pos, std::string::npos); + items.push_back(item); + size += item.length() + 1; + } + + buf = new pi_char_t[size]; + p = buf; + PalmLib::set_short(p, i); + p += sizeof(pi_uint16_t); + PalmLib::set_short(p, items.size()); + p += sizeof(pi_uint16_t); + p += sizeof(pi_uint16_t); + for (iter = items.begin(); iter != items.end(); ++iter) { + std::string& item = (*iter); + strcpy((char *) p, item.c_str()); + p[item.length()] = 0; + p += item.length() + 1; + } + + } + break; + + case PalmLib::FlatFile::Field::LINK: + if (!field(i).argument().empty()) { + std::string data = field(i).argument(); + std::string databasename; + pi_uint16_t fieldnum; + + if ( data.find(charSeperator) != std::string::npos) { + databasename = data.substr( 0, data.find(charSeperator)); + StrOps::convert_string(data.substr( data.find(charSeperator) + 1, std::string::npos), fieldnum); + } else { + databasename = data; + fieldnum = 0; + } + + size = 2 + 32 * sizeof(pi_char_t) + sizeof(pi_uint16_t); + buf = new pi_char_t[size]; + p = buf; + PalmLib::set_short(p, i); + p += sizeof(pi_uint16_t); + strcpy((char *) p, databasename.c_str()); + p += 32 * sizeof(pi_char_t); + PalmLib::set_short(p, fieldnum); + p += sizeof(pi_uint16_t); + } + break; + + case PalmLib::FlatFile::Field::LINKED: + if (!field(i).argument().empty()) { + std::string data = field(i).argument(); + pi_uint16_t linknum; + pi_uint16_t fieldnum; + + if ( data.find(charSeperator) != std::string::npos) { + StrOps::convert_string(data.substr( 0, data.find(charSeperator)), linknum); + StrOps::convert_string(data.substr( data.find(charSeperator) + 1, std::string::npos), fieldnum); + if (field_type(linknum) != PalmLib::FlatFile::Field::LINK) { + unsigned int j = 0; + while (field_type(j) != PalmLib::FlatFile::Field::LINK && j < i) j++; + linknum = j; + } + } else { + unsigned int j = 0; + while (field_type(j) != PalmLib::FlatFile::Field::LINK && j < i) j++; + linknum = j; + fieldnum = 0; + } + + size = 2 + 2 * sizeof(pi_uint16_t); + buf = new pi_char_t[size]; + p = buf; + PalmLib::set_short(p, i); + p += sizeof(pi_uint16_t); + PalmLib::set_short(p, linknum); + p += sizeof(pi_uint16_t); + PalmLib::set_short(p, fieldnum); + p += sizeof(pi_uint16_t); + } + break; + + case PalmLib::FlatFile::Field::CALCULATED: + break; + + default: + kdDebug() << "unknown field type" << endl; + break; + } + + if (size) { + Chunk data_chunk(buf, size); + data_chunk.chunk_type = CHUNK_FIELD_DATA; + delete [] buf; + chunks.push_back(data_chunk); + } + } +} + +void PalmLib::FlatFile::DB::build_about_chunk(std::vector<DB::Chunk>& chunks) const +{ + pi_char_t* buf; + pi_char_t* p; + int headersize = 2*sizeof(pi_uint16_t); + std::string information = getAboutInformation(); + + if (!information.length()) + return; + // Build the names chunk. + buf = new pi_char_t[headersize + information.length() + 1]; + p = buf; + + PalmLib::set_short(p, headersize); + p += 2; + PalmLib::set_short(p, 1); //about type version + p += 2; + memcpy(p, information.c_str(), information.length() + 1); + p += information.length() + 1; + Chunk chunk(buf, headersize + information.length() + 1); + chunk.chunk_type = CHUNK_ABOUT; + delete [] buf; + chunks.push_back(chunk); + +} + +void PalmLib::FlatFile::DB::build_standard_chunks(std::vector<DB::Chunk>& chunks) const +{ + pi_char_t* buf; + pi_char_t* p; + unsigned i; + + // Determine the size needed for the names chunk. + size_t names_chunk_size = 0; + for (i = 0; i < getNumOfFields(); ++i) { + names_chunk_size += field_name(i).length() + 1; + } + + // Build the names chunk. + buf = new pi_char_t[names_chunk_size]; + p = buf; + for (i = 0; i < getNumOfFields(); ++i) { + const std::string name = field_name(i); + memcpy(p, name.c_str(), name.length() + 1); + p += name.length() + 1; + } + Chunk names_chunk(buf, names_chunk_size); + names_chunk.chunk_type = CHUNK_FIELD_NAMES; + delete [] buf; + + // Build the types chunk. + buf = new pi_char_t[getNumOfFields() * sizeof(pi_uint16_t)]; + p = buf; + for (i = 0; i < getNumOfFields(); ++i) { + // Pack the type of the current field. + switch (field_type(i)) { + case PalmLib::FlatFile::Field::STRING: + PalmLib::set_short(p, 0); + break; + + case PalmLib::FlatFile::Field::BOOLEAN: + PalmLib::set_short(p, 1); + break; + + case PalmLib::FlatFile::Field::INTEGER: + PalmLib::set_short(p, 2); + break; + + case PalmLib::FlatFile::Field::DATE: + PalmLib::set_short(p, 3); + break; + + case PalmLib::FlatFile::Field::TIME: + PalmLib::set_short(p, 4); + break; + + case PalmLib::FlatFile::Field::NOTE: + PalmLib::set_short(p, 5); + break; + + case PalmLib::FlatFile::Field::LIST: + PalmLib::set_short(p, 6); + break; + + case PalmLib::FlatFile::Field::LINK: + PalmLib::set_short(p, 7); + break; + + case PalmLib::FlatFile::Field::FLOAT: + PalmLib::set_short(p, 8); + break; + + case PalmLib::FlatFile::Field::CALCULATED: + PalmLib::set_short(p, 9); + break; + + case PalmLib::FlatFile::Field::LINKED: + PalmLib::set_short(p, 10); + break; + + default: + kdDebug() << "unsupported field type" << endl; + break; + } + + // Advance to the next position. + p += sizeof(pi_uint16_t); + } + Chunk types_chunk(buf, getNumOfFields() * sizeof(pi_uint16_t)); + types_chunk.chunk_type = CHUNK_FIELD_TYPES; + delete [] buf; + + // Build the list view options chunk. + buf = new pi_char_t[2 * sizeof(pi_uint16_t)]; + PalmLib::set_short(buf, 0); + PalmLib::set_short(buf + sizeof(pi_uint16_t), 0); + Chunk listview_options_chunk(buf, 2 * sizeof(pi_uint16_t)); + listview_options_chunk.chunk_type = CHUNK_LISTVIEW_OPTIONS; + delete [] buf; + + // Build the local find options chunk. + buf = new pi_char_t[sizeof(pi_uint16_t)]; + PalmLib::set_short(buf, 0); + Chunk lfind_options_chunk(buf, 1 * sizeof(pi_uint16_t)); + lfind_options_chunk.chunk_type = CHUNK_LFIND_OPTIONS; + delete [] buf; + + // Add all the chunks to the chunk list. + chunks.push_back(names_chunk); + chunks.push_back(types_chunk); + chunks.push_back(listview_options_chunk); + chunks.push_back(lfind_options_chunk); +} + +void PalmLib::FlatFile::DB::build_listview_chunk(std::vector<DB::Chunk>& chunks, + const ListView& lv) const +{ + // Calculate size and allocate space for the temporary buffer. + size_t size = 2 * sizeof(pi_uint16_t) + 32 + + lv.size() * (2 * sizeof(pi_uint16_t)); + pi_char_t* buf = new pi_char_t[size]; + + // Fill in the header details. + pi_uint16_t flags = 0; + if (lv.editoruse) { + std::cout << "editoruse\n"; + flags |= VIEWFLAG_USE_IN_EDITVIEW; + } + PalmLib::set_short(buf, flags); + PalmLib::set_short(buf + sizeof(pi_uint16_t), lv.size()); + memset((char *) (buf + 4), 0, 32); + strncpy((char *) (buf + 4), lv.name.c_str(), 32); + + // Fill in the column details. + pi_char_t* p = buf + 4 + 32; + for (ListView::const_iterator i = lv.begin(); i != lv.end(); ++i) { + const ListViewColumn& col = (*i); + PalmLib::set_short(p, col.field); + PalmLib::set_short(p + sizeof(pi_uint16_t), col.width); + p += 2 * sizeof(pi_uint16_t); + } + + // Create the chunk and place it in the chunks list. + Chunk chunk(buf, size); + chunk.chunk_type = CHUNK_LISTVIEW_DEFINITION; + delete [] buf; + chunks.push_back(chunk); +} + +void PalmLib::FlatFile::DB::build_appinfo_block(const std::vector<DB::Chunk>& chunks, PalmLib::Block& appinfo) const +{ + std::vector<Chunk>::const_iterator iter; + + // Determine the size of the final app info block. + size_t size = 2 * sizeof(pi_uint16_t); + for (iter = chunks.begin(); iter != chunks.end(); ++iter) { + const Chunk& chunk = (*iter); + size += 2 * sizeof(pi_uint16_t) + chunk.size(); + } + + // Allocate the temporary buffer. Fill in the header. + pi_char_t* buf = new pi_char_t[size]; + PalmLib::set_short(buf, m_flags); + PalmLib::set_short(buf + sizeof(pi_uint16_t), getNumOfFields()); + + // Pack the chunks into the buffer. + size_t i = 4; + for (iter = chunks.begin(); iter != chunks.end(); ++iter) { + const Chunk& chunk = (*iter); + // Set the chunk type and size. + PalmLib::set_short(buf + i, chunk.chunk_type); + PalmLib::set_short(buf + i + 2, chunk.size()); + i += 4; + + // Copy the chunk data into the buffer. + memcpy(buf + i, chunk.data(), chunk.size()); + i += chunk.size(); + } + + // Finally, move the buffer into the provided appinfo block. + appinfo.set_raw(buf, size); + delete [] buf; +} + +void PalmLib::FlatFile::DB::outputPDB(PalmLib::Database& pdb) const +{ + unsigned i; + + // Let the superclass have a chance. + SUPERCLASS(PalmLib::FlatFile, Database, outputPDB, (pdb)); + + // Set the database's type and creator. + pdb.type(PalmLib::mktag('D','B','0','0')); + pdb.creator(PalmLib::mktag('D','B','O','S')); + + // Create the app info block. + std::vector<Chunk> chunks; + build_standard_chunks(chunks); + for (i = 0; i < getNumOfListViews(); ++i) { + build_listview_chunk(chunks, getListView(i)); + } + build_fieldsdata_chunks(chunks); + build_about_chunk(chunks); + + PalmLib::Block appinfo; + build_appinfo_block(chunks, appinfo); + pdb.setAppInfoBlock(appinfo); + + // Output each record to the PalmOS database. + for (i = 0; i < getNumRecords(); ++i) { + Record record = getRecord(i); + PalmLib::Record pdb_record; + + make_record(pdb_record, record); + pdb.appendRecord(pdb_record); + } +} + +unsigned PalmLib::FlatFile::DB::getMaxNumOfFields() const +{ + return 0; +} + +bool +PalmLib::FlatFile::DB::supportsFieldType(const Field::FieldType& type) const +{ + switch (type) { + case PalmLib::FlatFile::Field::STRING: + case PalmLib::FlatFile::Field::BOOLEAN: + case PalmLib::FlatFile::Field::INTEGER: + case PalmLib::FlatFile::Field::FLOAT: + case PalmLib::FlatFile::Field::DATE: + case PalmLib::FlatFile::Field::TIME: + case PalmLib::FlatFile::Field::NOTE: + case PalmLib::FlatFile::Field::LIST: + case PalmLib::FlatFile::Field::LINK: + case PalmLib::FlatFile::Field::LINKED: + case PalmLib::FlatFile::Field::CALCULATED: + return true; + default: + return false; + } +} + +std::vector<std::string> +PalmLib::FlatFile::DB::field_argumentf(int i, std::string& format) +{ + std::vector<std::string> vtitles(0, std::string("")); + int j; + + switch (field_type(i)) { + case PalmLib::FlatFile::Field::STRING: + format = std::string("%s"); + vtitles.push_back(std::string("default value")); + break; + case PalmLib::FlatFile::Field::INTEGER: + format = std::string("%ld/%d"); + vtitles.push_back(std::string("default value")); + vtitles.push_back(std::string("increment")); + break; + case PalmLib::FlatFile::Field::FLOAT: + format = std::string("%f"); + vtitles.push_back(std::string("default value")); + break; + case PalmLib::FlatFile::Field::DATE: + format = std::string("%d/%d/%d"); + vtitles.push_back(std::string("Year (or now)")); + vtitles.push_back(std::string("Month")); + vtitles.push_back(std::string("Day in the month")); + break; + case PalmLib::FlatFile::Field::TIME: + format = std::string("%d/%d"); + vtitles.push_back(std::string("Hour (or now)")); + vtitles.push_back(std::string("Minute")); + break; + case PalmLib::FlatFile::Field::LIST: + format = std::string(""); + for (j = 0; j < 31; i++) { + format += std::string("%s/"); + std::ostringstream title; + title << "item " << j; + vtitles.push_back(title.str()); + } + format += std::string("%s"); + vtitles.push_back(std::string("item 32")); + break; + case PalmLib::FlatFile::Field::LINK: + format = std::string("%s/%d"); + vtitles.push_back(std::string("database")); + vtitles.push_back(std::string("field number")); + break; + case PalmLib::FlatFile::Field::LINKED: + format = std::string("%d/%d"); + vtitles.push_back(std::string("link field number")); + vtitles.push_back(std::string("field number")); + break; + case PalmLib::FlatFile::Field::CALCULATED: + case PalmLib::FlatFile::Field::BOOLEAN: + case PalmLib::FlatFile::Field::NOTE: + default: + format = std::string(""); + break; + } + return vtitles; +} + +unsigned PalmLib::FlatFile::DB::getMaxNumOfListViews() const +{ + return 0; +} + +void PalmLib::FlatFile::DB::doneWithSchema() +{ + // Let the superclass have a chance. + SUPERCLASS(PalmLib::FlatFile, Database, doneWithSchema, ()); +/* false from the 0.3.3 version + if (getNumOfListViews() < 1) + throw PalmLib::error("at least one list view must be specified"); +*/ +} + +void PalmLib::FlatFile::DB::setOption(const std::string& name, + const std::string& value) +{ + if (name == "find") { + if (!StrOps::string2boolean(value)) + m_flags &= ~(1); + else + m_flags |= 1; + } else if (name == "read-only" + || name == "readonly") { + if (!StrOps::string2boolean(value)) + m_flags &= ~(0x8000); + else + m_flags |= 0x8000; + } else { + SUPERCLASS(PalmLib::FlatFile, Database, setOption, (name, value)); + } +} + +PalmLib::FlatFile::Database::options_list_t +PalmLib::FlatFile::DB::getOptions(void) const +{ + typedef PalmLib::FlatFile::Database::options_list_t::value_type value; + PalmLib::FlatFile::Database::options_list_t result; + + result = SUPERCLASS(PalmLib::FlatFile, Database, getOptions, ()); + + if (m_flags & 1) + result.push_back(value("find", "true")); + + if (m_flags & 0x8000) + result.push_back(value("read-only", "true")); + + return result; +} diff --git a/src/translators/pilotdb/libflatfile/DB.h b/src/translators/pilotdb/libflatfile/DB.h new file mode 100644 index 0000000..dd09d36 --- /dev/null +++ b/src/translators/pilotdb/libflatfile/DB.h @@ -0,0 +1,166 @@ +/* + * This class provides access to DB-format databases. + */ + +#ifndef __PALMLIB_FLATFILE_DB_H__ +#define __PALMLIB_FLATFILE_DB_H__ + +#include <map> +#include <string> + +#include "../libpalm/Block.h" +#include "../libpalm/Database.h" +#include "Database.h" + +namespace PalmLib { + namespace FlatFile { + + class DB : public Database { + public: + /** + * Return true if this class can handle the given PalmOS + * database. + * + * @param pdb PalmOS database to check for support. + */ + static bool classify(PalmLib::Database& pdb); + + /** + * Return true if this class is the database identified by + * name. + * + * @param name A database type name to check. + */ + static bool match_name(const std::string& name); + + /** + * Default constructor for an initially empty database. + */ + DB():Database("db"), m_flags(0) { } + + /** + * Constructor which fills the flat-file structure from a + * PalmOS database. + */ + DB(PalmLib::Database&); + + // destructor + virtual ~DB() { } + + /** + * After all processing to add fields and records is done, + * outputPDB is called to create the actual file format + * used by the flat-file database product. + * + * @param pdb An instance of PalmLib::Database. + */ + virtual void outputPDB(PalmLib::Database& pdb) const; + + /** + * Return the maximum number of fields allowed in the + * database. This class returns 0 since there is no limit. + */ + virtual unsigned getMaxNumOfFields() const; + + /** + * Return true for the field types that this class + * currently supports. Returns false otherwise. + * + * @param type The field type to check for support. + */ + virtual bool supportsFieldType(const Field::FieldType& type) const; + + /** + * write the format of the field's argument in format, + * and return a strings' vector with name of each argument part. + * the format use the same display as used by printf + */ + virtual std::vector<std::string> field_argumentf(int i, std::string& format); + + /** + * Return the maximum number of views supported by this + * type of flat-file database. + */ + virtual unsigned getMaxNumOfListViews() const; + + /** + * Hook the end of the schema processing. + */ + virtual void doneWithSchema(); + + /** + * Set a extra option. + * + * @param opt_name The name of the option to set. + * @param opt_value The value to assign to this option. + */ + virtual void setOption(const std::string& name, + const std::string& value); + + /** + * Get a list of extra options. + */ + virtual options_list_t getOptions(void) const; + + // Produce a PalmOS record from a flat-file record. + void make_record(PalmLib::Record& pdb_record, + const PalmLib::FlatFile::Record& record) const; + + private: + pi_uint16_t m_flags; + + class Chunk : public PalmLib::Block { + public: + Chunk() : PalmLib::Block(), chunk_type(0) { } + Chunk(const Chunk& rhs) + : PalmLib::Block(rhs), chunk_type(rhs.chunk_type) { } + Chunk(PalmLib::Block::const_pointer data, + const PalmLib::Block::size_type size) + : PalmLib::Block(data, size), chunk_type(0) { } + Chunk& operator = (const Chunk& rhs) { + Block::operator = (rhs); + chunk_type = rhs.chunk_type; + return *this; + } + + pi_uint16_t chunk_type; + }; + + typedef std::map<pi_uint16_t, std::vector<Chunk> > chunks_t; + chunks_t m_chunks; + + // Extract the chunks from an app info block to m_chunks. + void extract_chunks(const PalmLib::Block&); + + // Extract the schema. + void extract_schema(unsigned numFields); + + // Extract the list views from the app info block. + void extract_listviews(); + + //extract the field data + std::string extract_fieldsdata(pi_uint16_t field_search, + PalmLib::FlatFile::Field::FieldType type); + + void extract_aboutinfo(); + + // Determine location and size of each field. + void parse_record(PalmLib::Record& record, + std::vector<pi_char_t *>& ptrs, + std::vector<size_t>& sizes); + + // The following routines build various types of chunks + // for the app info block and assemble them all. + void build_fieldsdata_chunks(std::vector<Chunk>& chunks) const; + void build_standard_chunks(std::vector<Chunk>&) const; + void build_listview_chunk(std::vector<Chunk>&, + const ListView&) const; + void build_about_chunk(std::vector<Chunk>& chunks) const; + void build_appinfo_block(const std::vector<Chunk>&, + PalmLib::Block&) const; + }; + + } +} + +#endif diff --git a/src/translators/pilotdb/libflatfile/Database.cpp b/src/translators/pilotdb/libflatfile/Database.cpp new file mode 100644 index 0000000..578b82d --- /dev/null +++ b/src/translators/pilotdb/libflatfile/Database.cpp @@ -0,0 +1,331 @@ +/* + * palm-db-tools: Abstract adaptor for flat-file databases. + * Copyright (C) 1999-2000 by Tom Dyas (tdyas@users.sourceforge.net) + */ + +#include <iostream> +#include <sstream> +#include <stdexcept> +#include <sstream> +#include <utility> +#include <cctype> + +#include <kdebug.h> + +#include "Database.h" + +PalmLib::FlatFile::Database::Database(std::string p_Type, const PalmLib::Database& pdb) + : m_Type(p_Type) +{ + title(pdb.name()); + m_backup = pdb.backup(); + m_readonly = pdb.readonly(); + m_copy_prevention = pdb.copy_prevention(); +} + +void +PalmLib::FlatFile::Database::outputPDB(PalmLib::Database& pdb) const +{ + pdb.name(title()); + pdb.backup(m_backup); + pdb.readonly(m_readonly); + pdb.copy_prevention(m_copy_prevention); +} + +std::string +PalmLib::FlatFile::Database::title() const +{ + return m_title; +} + +void +PalmLib::FlatFile::Database::title(const std::string& title) +{ + m_title = title; +} + +unsigned +PalmLib::FlatFile::Database::getNumOfFields() const +{ + return m_fields.size(); +} + +std::string +PalmLib::FlatFile::Database::field_name(int i) const +{ + return m_fields[i].title(); +/* return m_fields[i].first;*/ +} + +PalmLib::FlatFile::Field::FieldType +PalmLib::FlatFile::Database::field_type(int i) const +{ + return m_fields[i].type(); +/* return m_fields[i].second;*/ +} + +PalmLib::FlatFile::FType +PalmLib::FlatFile::Database::field(int i) const +{ + return m_fields[i]; +} + +void +PalmLib::FlatFile::Database::appendField(PalmLib::FlatFile::FType field) +{ + if (! supportsFieldType(field.type())) { +// throw PalmLib::error("unsupported field type"); + kdDebug() << "unsupported field type" << endl; + return; + } + if (getMaxNumOfFields() != 0 && getNumOfFields() + 1 > getMaxNumOfFields()) { +// throw PalmLib::error("maximum number of fields reached"); + kdDebug() << "maximum number of fields reached" << endl; + return; + } + m_fields.push_back(field); +} + +void +PalmLib::FlatFile::Database::appendField(const std::string& name, + Field::FieldType type, std::string data) +{ + if (! supportsFieldType(type)) { + kdDebug() << "PalmLib::FlatFile::Database::appendField() - unsupported field type" << endl; + return; + } +// throw PalmLib::error("unsupported field type"); + if (getMaxNumOfFields() != 0 && getNumOfFields() + 1 > getMaxNumOfFields()) { + kdDebug() << "PalmLib::FlatFile::Database::appendField() - maximum number of fields reached" << endl; + return; + } +// throw PalmLib::error("maximum number of fields reached"); + +/* m_fields.push_back(std::make_pair(name, type));*/ +/* m_fields.push_back(PalmLib::FlatFile::make_ftype(name, type));*/ + m_fields.push_back(PalmLib::FlatFile::FType(name, type, data)); +} + +void +PalmLib::FlatFile::Database::insertField(int i, PalmLib::FlatFile::FType field) +{ + if (! supportsFieldType(field.type())) { +// throw PalmLib::error("unsupported field type"); + kdDebug() << "unsupported field type" << endl; + return; + } + if (getMaxNumOfFields() != 0 && getNumOfFields() + 1 > getMaxNumOfFields()) { +// throw PalmLib::error("maximum number of fields reached"); + kdDebug() << "maximum number of fields reached" << endl; + return; + } +/* m_fields.push_back(std::make_pair(name, type));*/ +/* m_fields.push_back(PalmLib::FlatFile::make_ftype(name, type));*/ + m_fields.insert(m_fields.begin() + i, field); +} + +void +PalmLib::FlatFile::Database::insertField(int i, const std::string& name, + Field::FieldType type, std::string data) +{ + if (! supportsFieldType(type)) { +// throw PalmLib::error("unsupported field type"); + kdDebug() << "unsupported field type" << endl; + return; + } + if (getMaxNumOfFields() != 0 && getNumOfFields() + 1 > getMaxNumOfFields()) { +// throw PalmLib::error("maximum number of fields reached"); + kdDebug() << "maximum number of fields reached" << endl; + return; + } +/* m_fields.push_back(std::make_pair(name, type));*/ +/* m_fields.push_back(PalmLib::FlatFile::make_ftype(name, type));*/ + m_fields.insert(m_fields.begin() + i, PalmLib::FlatFile::FType(name, type, data)); +} + +void +PalmLib::FlatFile::Database::removeField(int i) +{ + m_fields.erase(m_fields.begin() + i); +} + +unsigned +PalmLib::FlatFile::Database::getNumRecords() const +{ + return m_records.size(); +} + +PalmLib::FlatFile::Record +PalmLib::FlatFile::Database::getRecord(unsigned index) const +{ + if (index >= getNumRecords()) { + kdDebug() << "invalid index" << endl; + //throw std::out_of_range("invalid index"); + } + return m_records[index]; +} + +void +PalmLib::FlatFile::Database::appendRecord(PalmLib::FlatFile::Record rec) +{ + if (rec.fields().size() != getNumOfFields()) { +// throw PalmLib::error("the number of fields mismatch"); + kdDebug() << "the number of fields mismatch" << endl; + return; + } + for (unsigned int i = 0; i < getNumOfFields(); i++) { +#ifdef HAVE_VECTOR_AT + const Field field = rec.fields().at(i); +#else + const Field field = rec.fields()[i]; +#endif + if (field.type != field_type(i)) { + kdDebug() << "field " << i << " type " << field_type(i) << " mismatch: " << field.type << endl; + return; +// throw PalmLib::error(buffer.str()); + } + } + m_records.push_back(rec); +} + +void +PalmLib::FlatFile::Database::deleteRecord(unsigned index) +{ + m_records.erase(m_records.begin() + index); +} + +void +PalmLib::FlatFile::Database::clearRecords() +{ + m_records.clear(); +} + +unsigned +PalmLib::FlatFile::Database::getNumOfListViews() const +{ + return m_listviews.size(); +} + +PalmLib::FlatFile::ListView +PalmLib::FlatFile::Database::getListView(unsigned index) const +{ + return m_listviews[index]; +} + +void +PalmLib::FlatFile::Database::setListView(unsigned index, + const PalmLib::FlatFile::ListView& lv) +{ + // Ensure that the field numbers are within range. + for (PalmLib::FlatFile::ListView::const_iterator i = lv.begin(); + i != lv.end(); ++i) { + if ((*i).field >= getNumOfFields()) + return; + } + + m_listviews[index] = lv; +} + +void +PalmLib::FlatFile::Database::appendListView(const ListView& lv) +{ + // Enforce any limit of the maximum number of list views. + if (getMaxNumOfListViews() != 0 + && getNumOfListViews() + 1 > getMaxNumOfListViews()) + return; +// throw PalmLib::error("too many list views for this database type"); + + // Ensure that the field numbers are within range. + for (PalmLib::FlatFile::ListView::const_iterator i = lv.begin(); + i != lv.end(); ++i) { + if ((*i).field >= getNumOfFields()) + return; + } + m_listviews.push_back(lv); +} + +void +PalmLib::FlatFile::Database::removeListView(unsigned index) +{ + if (index < getNumOfListViews()) + m_listviews.erase( m_listviews.begin()+index); +} + +static void +strlower(std::string& str) +{ + for (std::string::iterator p = str.begin(); p != str.end(); ++p) { + if (isupper(*p)) + *p = tolower(*p); + } +} + +static bool +string2boolean(std::string str) +{ + strlower(str); + if (str == "on") + return true; + else if (str == "off") + return false; + else if (str == "true") + return true; + else if (str == "t") + return true; + else if (str == "false") + return false; + else if (str == "f") + return false; + else { + int num = 0; + + std::istringstream(str.c_str()) >> num; + return num != 0 ? true : false; + } +} + +void +PalmLib::FlatFile::Database::setOption(const std::string& name, + const std::string& value) +{ + if (name == "backup") + m_backup = string2boolean(value); + else if (name == "inROM") + m_readonly = string2boolean(value); + else if (name == "copy-prevention") + m_copy_prevention = string2boolean(value); +} + +PalmLib::FlatFile::Database::options_list_t +PalmLib::FlatFile::Database::getOptions() const +{ + PalmLib::FlatFile::Database::options_list_t set; + typedef PalmLib::FlatFile::Database::options_list_t::value_type value; + + if (m_backup) + set.push_back(value("backup", "true")); + else + set.push_back(value("backup", "false")); + + if (m_readonly) + set.push_back(value("inROM", "true")); + + if (m_copy_prevention) + set.push_back(value("copy-prevention", "true")); + + return set; +} + +void +PalmLib::FlatFile::Database::doneWithSchema() +{ + // Ensure that the database has at least one field. + if (getNumOfFields() == 0) + return; +// throw PalmLib::error("at least one field must be specified"); + + // Ensure that the database has a title. + if (title().empty()) + return; +// throw PalmLib::error("a title must be specified"); +} diff --git a/src/translators/pilotdb/libflatfile/Database.h b/src/translators/pilotdb/libflatfile/Database.h new file mode 100644 index 0000000..5bcba32 --- /dev/null +++ b/src/translators/pilotdb/libflatfile/Database.h @@ -0,0 +1,320 @@ +/* + * palm-db-tools: Abstract adaptor for flat-file databases. + * Copyright (C) 1999-2000 by Tom Dyas (tdyas@users.sourceforge.net) + */ + +#ifndef __PALMLIB_FLATFILE_DATABASE_H__ +#define __PALMLIB_FLATFILE_DATABASE_H__ + +#include <vector> +#include <string> +#include <utility> + +#include "../libpalm/Database.h" +#include "Field.h" +#include "Record.h" +#include "ListView.h" +#include "FType.h" + +#define NOTETITLE_LENGTH 32 + +namespace PalmLib { + namespace FlatFile { + + // This class is an in-memory representation of a typical + // PalmOS flat-file database. The caller can request write the + // data to a real PalmLib::Database object at any time to + // actually obtain the data in a format usable on the Palm + // Pilot. + + class Database { + public: + // convenience type for the options list parsing + typedef std::vector< std::pair< std::string, std::string> > options_list_t; + + /** + * Default constructor which creates an empty + * database. Subclasses should provide a default + * constructor and an additional constructorwhich takes a + * PalmOS::Database as an argument. + */ + Database(std::string p_Type) + : m_backup(false), m_readonly(false), + m_copy_prevention(false), m_Type(p_Type) + { } + + /** + * Constructor which fills the flat-file structure from a + * PalmOS database. + * + * @param pdb PalmOS database to read from. + */ + Database(std::string p_Type, const PalmLib::Database& pdb); + + /** + * The destructor is empty since we have no other objects + * to dispose of. It is virtual since we have subclasses + * for specific flat-file database products. + */ + virtual ~Database() { } + + /** + * After all processing to add fields and records is done, + * outputPDB is called to create the actual file format + * used by the flat-file database product. This method is + * abstract since only subclasses know the specific file + * formats. + * + * @param pdb An instance of PalmLib::Database. + */ + virtual void outputPDB(PalmLib::Database& pdb) const; + + /** + * Return the title of this flat-file database. + */ + virtual std::string title() const; + + /** + * Set the title of this database. + * + * @param title New title of the database. + */ + virtual void title(const std::string& title); + + /** + * Return the maximum number of fields allowed in the + * database. The object will not allow the number of + * fields to exceed the returned value. This method is + * abstract since only the subclasses know the limit on + * fields. 0 is returned if there is no limit. + */ + virtual unsigned getMaxNumOfFields() const = 0; + + /** + * Return the number of fields in the database. + */ + virtual unsigned getNumOfFields() const; + + /** + * Accessor function for the name of a field. + */ + virtual std::string field_name(int i) const; + + /** + * Accessor function for type of a field. + */ + virtual Field::FieldType field_type(int i) const; + + /** + * Accessor function for the field informations + */ + virtual FType field(int i) const; + + /** + * write the format of the field's argument in format, + * and return a strings' vector with name of each argument part. + * the format use the same display as used by printf + */ + virtual std::vector<std::string> field_argumentf(int, std::string& format) + { format = std::string(""); return std::vector<std::string>(0, std::string(""));} + + /** + * Add a field to the flat-file database. An exception + * will be thrown if the maximum number of fields would be + * exceeded or the field type is unsupported. + * + * @param name Name of the new field. + * @param type The type of the new field. + */ + virtual void appendField(FType field); + virtual void appendField(const std::string& name, + Field::FieldType type, std::string data = std::string("")); + + /** + * Insert a field to the flat-file database. An exception + * will be thrown if the maximum number of fields would be + * exceeded or the field type is unsupported. + * + * @param name Name of the new field. + * @param type The type of the new field. + */ + virtual void insertField(int i, FType field); + virtual void insertField(int i, const std::string& name, + Field::FieldType type, std::string data = std::string("")); + + /** + * Remove a Field in the flat-file database. An Exception + * will thrown if the field doesn't exist. + */ + virtual void removeField(int i); + + /** + * Returns true if this database supports a specific field + * type. This method is abstract since only the subclasses + * know which field types are supported. + * + * @param type The field type that should be checked for support. + */ + virtual bool supportsFieldType(const Field::FieldType& type) const = 0; + + /** + * Return the number of records in the database. + */ + virtual unsigned getNumRecords() const; + + /** + * Return the record with the given index. The caller gets + * a private copy of the data and _not_ a reference to the + * data. + * + * @param index Index of the record to retrieve. + */ + virtual Record getRecord(unsigned index) const; + + /** + * Append a record to the database. An exception will be + * thrown if their are not enough fields or if field types + * mismatch. + * + * @param rec The record to append. + */ + virtual void appendRecord(Record rec); + + /** + * Remove all records from the database + */ + virtual void clearRecords(); + + /** + * Remove a record from the database + */ + virtual void deleteRecord(unsigned index); + + /** + * Return the maximum number of views supported by this + * type of flat-file database. This method is abstract + * since only the subclasses know the exact value. + */ + virtual unsigned getMaxNumOfListViews() const = 0; + + /** + * Return the actual number of views present in this + * database. + */ + virtual unsigned getNumOfListViews() const; + + /** + * Return a copy of the list view at the given index. + * + * @param index Index of the list view to return. + */ + virtual ListView getListView(unsigned index) const; + + /** + * Set the list view at the given index to the new list + * view. An exception may be thrown if field numbers are + * invalid or the list view doesn't pass muster with the + * subclass. + * + * @param index Index of the list view to set. + * @param listview The new list view. + */ + virtual void setListView(unsigned index, const ListView& listview); + + /** + * Append a new list view. This will fail if the maximum + * number of list views would be exceeded. + * + * @param listview The new list view to append. + */ + virtual void appendListView(const ListView& listview); + + /** + * Remove a list view. + * + * @param index Index of the list view to remove. + */ + virtual void removeListView(unsigned index); + + /** + * Process a special option. If the option is not + * supported, then it is silently ignored. Subclasses + * should call the base class first so that options common + * to all flat-file databases can be processed. + * + * @param name Name of the option. + * @param value String value assigned to the option. */ + virtual void setOption(const std::string& name, + const std::string& value); + + /** + * Return a list of of all extra options supported by this + * database. Subclasses should call the base class first + * and then merge any extra options. Get a list of extra + * options. + */ + virtual options_list_t getOptions(void) const; + + /** + * Hook function which should be invoked by a caller after + * all calls the meta-deta functions have completed. This + * allows the database type-specific code to do final + * checks on the meta-data. An exception will be throw if + * there is an error. Otherwise, nothing will happen. + */ + virtual void doneWithSchema(); + + /** + * Change and Return the about information + * of the database when it's supportted + */ + virtual void setAboutInformation(std::string _string) + { + about.information = _string; + } + + virtual std::string getAboutInformation() const + { + return about.information; + } + + std::string type() const + { + return m_Type; + } + + private: + // We provide a dummy copy constructor and assignment + // operator in order to prevent any copying of the object. + Database(const Database&) { } + Database& operator = (const Database&) { return *this; } + +/* typedef std::vector< std::pair< std::string, Field::FieldType > >*/ + typedef std::vector< FType> + field_list_t; + typedef std::vector<Record> record_list_t; + typedef std::vector<ListView> listview_list_t; + + typedef std::vector< std::pair< std::string, std::vector< std::string > > > + listitems_list_t; + + field_list_t m_fields; // database schema + record_list_t m_records; // the database itself + listitems_list_t m_list; // the items lists include in the database + listview_list_t m_listviews; // list views + bool m_backup; // backup flag for PDB + bool m_readonly; // readonly flag for PDB + bool m_copy_prevention; // copy prevention for PDB + std::string m_title; // name of database + class About + { + public: + std::string information; + } about; + std::string m_Type; + }; + + } // namespace FlatFile +} // namespace PalmLib + +#endif diff --git a/src/translators/pilotdb/libflatfile/FType.h b/src/translators/pilotdb/libflatfile/FType.h new file mode 100644 index 0000000..86396b3 --- /dev/null +++ b/src/translators/pilotdb/libflatfile/FType.h @@ -0,0 +1,48 @@ +/* + * palm-db-tools: Field Type definitions for flat-file database objects. + * Copyright (C) 2000 by Tom Dyas (tdyas@users.sourceforge.net) + */ + +#ifndef __PALMLIB_FLATFILE_FTYPE_H__ +#define __PALMLIB_FLATFILE_FTYPE_H__ + +#include <string> +#include <utility> + +#include "../libpalm/palmtypes.h" +#include "Field.h" + +namespace PalmLib { + namespace FlatFile { + + class FType { + public: + friend class PalmLib::FlatFile::Field; + FType(std::string title, PalmLib::FlatFile::Field::FieldType type) : + m_title(title), m_type(type), m_data("") { } + + FType(std::string title, PalmLib::FlatFile::Field::FieldType type, std::string data) : + m_title(title), m_type(type), m_data(data) { } + + virtual ~FType() { } + + std::string title() const {return m_title;} + virtual PalmLib::FlatFile::Field::FieldType type() const + { return m_type;} + + virtual std::string argument() const { return m_data;} + + void set_argument( const std::string data) { m_data = data;} + + void setTitle( const std::string value) { m_title = value;} + void setType( const PalmLib::FlatFile::Field::FieldType value) { m_type = value;} + private: + std::string m_title; + PalmLib::FlatFile::Field::FieldType m_type; + + std::string m_data; + }; + } +} + +#endif diff --git a/src/translators/pilotdb/libflatfile/Field.h b/src/translators/pilotdb/libflatfile/Field.h new file mode 100644 index 0000000..583bc21 --- /dev/null +++ b/src/translators/pilotdb/libflatfile/Field.h @@ -0,0 +1,119 @@ +/* + * palm-db-tools: Field definitions for flat-file database objects. + * Copyright (C) 2000 by Tom Dyas (tdyas@users.sourceforge.net) + */ + +#ifndef __PALMLIB_FLATFILE_FIELD_H__ +#define __PALMLIB_FLATFILE_FIELD_H__ + +#include <string> + +#include "../libpalm/palmtypes.h" + +namespace PalmLib { + namespace FlatFile { + + class Field { + public: + Field() : no_value(false), type(STRING), v_boolean(false), + v_integer(0), v_float(0) { } + ~Field() { } + + // True if this field has no value. (NULL in SQL terms) + bool no_value; + + enum FieldType { + STRING, + BOOLEAN, + INTEGER, + FLOAT, + DATE, + TIME, + DATETIME, + LIST, + LINK, + NOTE, + CALCULATED, + LINKED, + LAST + }; + + enum FieldType type; + + std::string v_string; + std::string v_note; + bool v_boolean; + PalmLib::pi_int32_t v_integer; + long double v_float; + struct { + int month; + int day; + int year; + } v_date; // valid for DATE and DATETIME + struct { + int hour; + int minute; + } v_time; // valid for TIME and DATETIME + + /* + friend Field operator = (const Field& y) + { + this.v_string = y.v_string; + this.v_boolean = y.v_boolean; + this.v_integer = y.v_integer; + this.v_float = y.v_float; + this.v_date.month = y.v_date.month; + this.v_date.day = y.v_date.day; + this.v_date.year = y.v_date.year; + this.v_time.hour = y.v_time.hour; + this.v_time.minute = y.v_time.minute; + this.v_note = y.v_note; + } +*/ + friend bool operator==(const Field& x, const Field& y) + { + if (x.type != y.type) + return false; + switch (x.type) { + case STRING: + return (x.v_string == y.v_string); + case BOOLEAN: + return (x.v_boolean == y.v_boolean); + case INTEGER: + return (x.v_integer == y.v_integer); + case FLOAT: + return (x.v_float == y.v_float); + case DATE: + return (x.v_date.month == y.v_date.month + && x.v_date.day == y.v_date.day + && x.v_date.year == y.v_date.year); + case TIME: + return (x.v_time.hour == y.v_time.hour + && x.v_time.minute == y.v_time.minute); + case DATETIME: + return (x.v_date.month == y.v_date.month + && x.v_date.day == y.v_date.day + && x.v_date.year == y.v_date.year + && x.v_time.hour == y.v_time.hour + && x.v_time.minute == y.v_time.minute); + case LIST: + return (x.v_string == y.v_string); + case LINK: + return (x.v_string == y.v_string); + case NOTE: + return (x.v_string == y.v_string + && x.v_note == y.v_note); + case CALCULATED: + return true; //a calculated field could be recalculate at each time + case LINKED: + return (x.v_string == y.v_string); + default: + return (x.v_string == y.v_string); + } + } + }; + + } +} + +#endif diff --git a/src/translators/pilotdb/libflatfile/ListView.h b/src/translators/pilotdb/libflatfile/ListView.h new file mode 100644 index 0000000..4229548 --- /dev/null +++ b/src/translators/pilotdb/libflatfile/ListView.h @@ -0,0 +1,77 @@ +#ifndef __PALMOS__FLATFILE__VIEW_H__ +#define __PALMOS__FLATFILE__VIEW_H__ + +#include <string> +#include <vector> + +#include "ListViewColumn.h" + +namespace PalmLib { + namespace FlatFile { + + // The ListView class represents the a "list view" as + // implemented by the major PalmOS flat-file programs. The + // main idea is a series of columns that display a field of + // the database. + // + // For fun, this class exports the STL interface of the STL + // class it uses to store the ListViewColumn classes. + + class ListView { + private: + typedef std::vector<ListViewColumn> rep_type; + rep_type rep; + + public: + typedef rep_type::value_type value_type; + typedef rep_type::iterator iterator; + typedef rep_type::const_iterator const_iterator; + typedef rep_type::reference reference; + typedef rep_type::const_reference const_reference; + typedef rep_type::size_type size_type; + typedef rep_type::difference_type difference_type; + typedef rep_type::reverse_iterator reverse_iterator; + typedef rep_type::const_reverse_iterator const_reverse_iterator; + + // global fields + std::string name; + bool editoruse; + + // STL pull-up interface (probably not complete) + iterator begin() { return rep.begin(); } + const_iterator begin() const { return rep.begin(); } + iterator end() { return rep.end(); } + const_iterator end() const { return rep.end(); } + reverse_iterator rbegin() { return rep.rbegin(); } + const_reverse_iterator rbegin() const { return rep.rbegin(); } + reverse_iterator rend() { return rep.rend(); } + const_reverse_iterator rend() const { return rep.rend(); } + size_type size() const { return rep.size(); } + size_type max_size() const { return rep.max_size(); } + bool empty() const { return rep.empty(); } + reference front() { return rep.front(); } + const_reference front() const { return rep.front(); } + reference back() { return rep.back(); } + const_reference back() const { return rep.back(); } + void push_back(const ListViewColumn& x) { rep.push_back(x); } + void pop_back() { rep.pop_back(); } + void clear() { rep.clear(); } + void resize(size_type new_size, const ListViewColumn& x) + { rep.resize(new_size, x); } + void resize(size_type new_size) + { rep.resize(new_size, ListViewColumn()); } + + ListView() : rep(), name(""), editoruse(false) { } + ListView(const ListView& rhs) : rep(rhs.rep), name(rhs.name), editoruse(false) { } + ListView& operator = (const ListView& rhs) { + name = rhs.name; + rep = rhs.rep; + return *this; + } + + }; + + } +} + +#endif diff --git a/src/translators/pilotdb/libflatfile/ListViewColumn.h b/src/translators/pilotdb/libflatfile/ListViewColumn.h new file mode 100644 index 0000000..7b5330e --- /dev/null +++ b/src/translators/pilotdb/libflatfile/ListViewColumn.h @@ -0,0 +1,19 @@ +#ifndef __PALMLIB_FLATFILE_LISTVIEWCOLUMN_H__ +#define __PALMLIB_FLATFILE_LISTVIEWCOLUMN_H__ + +namespace PalmLib { + namespace FlatFile { + + // The ListViewColumn class stores the field number and column + // width for a column in a list view. + + struct ListViewColumn { + ListViewColumn() : field(0), width(80) { } + ListViewColumn(unsigned a1, unsigned a2) : field(a1), width(a2) { } + unsigned field; + unsigned width; + }; + } +} + +#endif diff --git a/src/translators/pilotdb/libflatfile/Makefile.am b/src/translators/pilotdb/libflatfile/Makefile.am new file mode 100644 index 0000000..d3ab012 --- /dev/null +++ b/src/translators/pilotdb/libflatfile/Makefile.am @@ -0,0 +1,20 @@ +####### kdevelop will overwrite this part!!! (begin)########## +noinst_LIBRARIES = liblibflatfile.a + +AM_CPPFLAGS = $(all_includes) + +liblibflatfile_a_METASOURCES = AUTO + +liblibflatfile_a_SOURCES = DB.cpp Database.cpp + + +EXTRA_DIST = Database.cpp Database.h DB.cpp DB.h Field.h FType.h ListView.h ListViewColumn.h Record.h + +####### kdevelop will overwrite this part!!! (end)############ + +# is this the right way to do this? I need to include the strop.o object file since its +# in the parent directory +liblibflatfile_a_LIBADD = ../strop.o +CLEANFILES = strop.Po + +KDE_OPTIONS = noautodist diff --git a/src/translators/pilotdb/libflatfile/Record.h b/src/translators/pilotdb/libflatfile/Record.h new file mode 100644 index 0000000..537953e --- /dev/null +++ b/src/translators/pilotdb/libflatfile/Record.h @@ -0,0 +1,45 @@ +/* + * palm-db-tools: Field definitions for flat-file database objects. + * Copyright (C) 2000 by Tom Dyas (tdyas@users.sourceforge.net) + */ + +#ifndef __PALMLIB_FLATFILE_RECORD_H__ +#define __PALMLIB_FLATFILE_RECORD_H__ + +#include <vector> + +#include "Field.h" + +namespace PalmLib { + namespace FlatFile { +// typedef std::vector<Field> Record; + + class Record{ + public: + + const std::vector<Field> fields() const { return m_Fields; } + + void appendField(Field newfield) { m_Fields.push_back(newfield); } + bool created() const { return m_New;} + void created(bool on){ m_New = on;} + bool secret() const { return m_Secret;} + void secret(bool on) { m_Secret = on;} + + bool dirty() const { return m_Dirty; } + void dirty( bool on) { m_Dirty = on; } + + pi_uint32_t unique_id() const { return m_UID; } + void unique_id(pi_uint32_t id) { m_UID = id; } + private: + + std::vector<Field> m_Fields; + bool m_Secret; + bool m_New; + + bool m_Dirty; + pi_uint32_t m_UID; + }; + } +} + +#endif diff --git a/src/translators/pilotdb/libpalm/Block.cpp b/src/translators/pilotdb/libpalm/Block.cpp new file mode 100644 index 0000000..c58f6f1 --- /dev/null +++ b/src/translators/pilotdb/libpalm/Block.cpp @@ -0,0 +1,85 @@ +/* + * palm-db-tools: Encapsulate "blocks" of data. + * Copyright (C) 2000 by Tom Dyas (tdyas@users.sourceforge.net) + * + * The PalmLib::Block class represents a generic block of data. It is + * used to simplify passing arrays of pi_char_t around. + */ + +#include <cstring> + +#include "Block.h" + +void PalmLib::Block::reserve(PalmLib::Block::size_type new_size) +{ + if (new_size > capacity()) { + // Allocate a new buffer containing a copy of the old with the + // remainder zero'ed out. + pointer new_data = new pi_char_t[new_size]; + memcpy(new_data, m_data, m_size); + memset(new_data + m_size, 0, new_size - m_size); + + // Replace the existing buffer. + delete [] m_data; + m_data = new_data; + m_size = new_size; + } +} + +void PalmLib::Block::resize(size_type new_size) +{ + if (new_size < m_size) { + // Copy the data that will remain to a new buffer and switch to it. + pointer new_data = new pi_char_t[new_size]; + memcpy(new_data, m_data, new_size); + + // Replace the existing buffer. + delete [] m_data; + m_data = new_data; + m_size = new_size; + } else if (new_size > m_size) { + // Copy the data that will remain to a new buffer and switch to it. + pointer new_data = new pi_char_t[new_size]; + memcpy(new_data, m_data, m_size); + memset(new_data + m_size, 0, new_size - m_size); + + // Replace the existing buffer. + delete [] m_data; + m_data = new_data; + m_size = new_size; + } +} + +void PalmLib::Block::assign(PalmLib::Block::const_pointer data, + const PalmLib::Block::size_type size) +{ + clear(); + if (data && size > 0) { + m_size = size; + m_data = new pi_char_t[m_size]; + memcpy(m_data, data, m_size); + } +} + +void PalmLib::Block::assign(const PalmLib::Block::size_type size, + const PalmLib::Block::value_type value) +{ + clear(); + if (size > 0) { + m_size = size; + m_data = new pi_char_t[m_size]; + memset(m_data, value, m_size); + } +} + +bool operator == (const PalmLib::Block& lhs, const PalmLib::Block& rhs) +{ + if (lhs.size() == rhs.size()) { + if (lhs.data()) { + if (memcmp(lhs.data(), rhs.data(), lhs.size()) != 0) + return false; + } + return true; + } + return false; +} diff --git a/src/translators/pilotdb/libpalm/Block.h b/src/translators/pilotdb/libpalm/Block.h new file mode 100644 index 0000000..6ed6069 --- /dev/null +++ b/src/translators/pilotdb/libpalm/Block.h @@ -0,0 +1,186 @@ +/* + * palm-db-tools: Encapsulate "blocks" of data. + * Copyright (C) 2000 by Tom Dyas (tdyas@users.sourceforge.net) + * + * The PalmLib::Block class represents a generic block of data. It is + * used to make passing pi_char_t buffers around very easy. The Record + * and Resource classes both inherit from this class. A STL interface + * is also attempted though it is probably not complete. + */ + +#ifndef __PALMLIB_BLOCK_H__ +#define __PALMLIB_BLOCK_H__ + +#include <algorithm> +#include <iterator> + +#include "palmtypes.h" + +namespace PalmLib { + + class Block { + public: + // STL: container type definitions + typedef PalmLib::pi_char_t value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type* iterator; + typedef const value_type* const_iterator; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + + // STL: reverisible container type definitions +#ifdef __GNUG__ + typedef std::reverse_iterator<const_iterator> const_reverse_iterator; + typedef std::reverse_iterator<iterator> reverse_iterator; +#endif + + /** + * Default constructor. + */ + Block() : m_data(0), m_size(0) { } + + /** + * Constructor which fills the block from buffer "raw" with + * length "len". + */ + Block(const_pointer raw, const size_type len) : m_data(0), m_size(0) { + assign(raw, len); + } + + /** + * Constructor which takes a size and allocates a zero'ed out + * buffer of that size. (STL: Sequence: default fill + * constructor) + */ + Block(const size_type size, const value_type value = 0) + : m_data(0), m_size(0) { + assign(size, value); + } + + /** + * Constructor which takes two iterators and builds the block + * from the region between the iterators. (STL: Sequence: + * range constructor) + */ + Block(const_iterator a, const_iterator b) : m_data(0), m_size(0) { + assign(a, b - a); + } + + /** + * Copy constructor. Just copies the data from the other block + * into this block. + */ + Block(const Block& rhs) : m_data(0), m_size(0) { + assign(rhs.data(), rhs.size()); + } + + /** + * Destructor. Just frees the buffer if it exists. + */ + virtual ~Block() { clear(); } + + /** + * Assignment operator. + * + * @param rhs The block whose contents should be copied. + */ + Block& operator = (const Block& rhs) { + assign(rhs.data(), rhs.size()); + return *this; + } + + // STL: Container + iterator begin() { return m_data; } + const_iterator begin() const { return m_data; } + iterator end() { return (m_data != 0) ? (m_data + m_size) : (0); } + const_iterator end() const + { return (m_data != 0) ? (m_data + m_size) : (0); } + size_type size() const { return m_size; } + size_type max_size() const { + return size_type(-1) / sizeof(value_type); + } + bool empty() const { return m_size == 0; } + + // STL: Reversible Container +#ifdef __GNUG__ + reverse_iterator rbegin() { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(end()); + } + reverse_iterator rend() { return reverse_iterator(begin()); } + const_reverse_iterator rend() const { + return const_reverse_iterator(begin()); + } +#endif + + // STL: Random Access Container + reference operator [] (size_type index) { return m_data[index]; } + const_reference operator [] (size_type index) const + { return m_data[index]; } + + // STL: Sequence (not complete) + void clear() { + if (m_data) { + delete [] m_data; + m_data = 0; + m_size = 0; + } + } + void resize(size_type n); + reference front() { return m_data[0]; } + const_reference front() const { return m_data[0]; } + + // STL: (present in vector but not part of a interface spec) + size_type capacity() const { return m_size; } + void reserve(size_type size); + + /** + * Return a pointer to the data area. If there are no + * contents, then the return value will be NULL. This is not + * an STL method but goes with this class as a singular data + * block and not a container (even though it is). + */ + iterator data() { return m_data; } + const_iterator data() const { return m_data; } + + /** + * Replace the existing contents of the Block with the buffer + * that starts at raw of size len. + * + * @param raw Pointer to the new contents. + * @param len Size of the new contents. + */ + void assign(const_pointer data, const size_type size); + + /** + * Replace the existing contents of the Block with a buffer + * consisting of size elements equal to fill. + * + * @param size The size of the new contents. + * @param value Value to fill the contents with. + */ + void assign(const size_type size, const value_type value = 0); + + // compatiblity functions (remove before final 0.3.0 release) + const_pointer raw_data() const { return data(); } + pointer raw_data() { return data(); } + size_type raw_size() const { return size(); } + void set_raw(const_pointer raw, const size_type len) + { assign(raw, len); } + + private: + pointer m_data; + size_type m_size; + }; + +} + +bool operator == (const PalmLib::Block& lhs, const PalmLib::Block& rhs); + +inline bool operator != (const PalmLib::Block& lhs, const PalmLib::Block& rhs) +{ return ! (lhs == rhs); } + +#endif diff --git a/src/translators/pilotdb/libpalm/Database.cpp b/src/translators/pilotdb/libpalm/Database.cpp new file mode 100644 index 0000000..38d896f --- /dev/null +++ b/src/translators/pilotdb/libpalm/Database.cpp @@ -0,0 +1,43 @@ +/* + * palm-db-tools: General interface to a PalmOS database. + * Copyright (C) 2000 by Tom Dyas (tdyas@users.sourceforge.net) + * + * This file implens an abstract interface to PalmOS + * databases. Subclasses would include the class that reads/writes PDB + * files and possibly databases that can be accessed over the HotSync + * protocols. + */ + +#include "palmtypes.h" +#include "Record.h" +#include "Database.h" + +#ifndef __GNUG__ + +// MSVC: Visual C++ doesn't like initializers in the header ... +const PalmLib::pi_uint16_t PalmLib::Database::FLAG_HDR_RESOURCE = 0x0001; +const PalmLib::pi_uint16_t PalmLib::Database::FLAG_HDR_READ_ONLY = 0x0002; +const PalmLib::pi_uint16_t PalmLib::Database::FLAG_HDR_APPINFO_DIRTY = 0x0004; +const PalmLib::pi_uint16_t PalmLib::Database::FLAG_HDR_BACKUP = 0x0008; +const PalmLib::pi_uint16_t PalmLib::Database::FLAG_HDR_OK_TO_INSTALL_NEWER = 0x0010; +const PalmLib::pi_uint16_t PalmLib::Database::FLAG_HDR_RESET_AFTER_INSTALL = 0x0020; +const PalmLib::pi_uint16_t PalmLib::Database::FLAG_HDR_COPY_PREVENTION = 0x0040; +const PalmLib::pi_uint16_t PalmLib::Database::FLAG_HDR_STREAM = 0x0080; +const PalmLib::pi_uint16_t PalmLib::Database::FLAG_HDR_HIDDEN = 0x0100; +const PalmLib::pi_uint16_t PalmLib::Database::FLAG_HDR_LAUNCHABLE_DATA = 0x0200; +const PalmLib::pi_uint16_t PalmLib::Database::FLAG_HDR_OPEN = 0x8000; +const PalmLib::pi_char_t PalmLib::Record::FLAG_ATTR_DELETED = 0x80; +const PalmLib::pi_char_t PalmLib::Record::FLAG_ATTR_DIRTY = 0x40; +const PalmLib::pi_char_t PalmLib::Record::FLAG_ATTR_BUSY = 0x20; +const PalmLib::pi_char_t PalmLib::Record::FLAG_ATTR_SECRET = 0x10; + +#endif + +PalmLib::Database::Database(bool resourceDB) + : m_name(""), m_version(0), m_time_created(0), m_time_modified(0), + m_time_backup(0), m_modification(0), m_unique_id_seed(0) +{ + m_flags = resourceDB ? FLAG_HDR_RESOURCE : 0; + m_type = PalmLib::mktag(' ', ' ', ' ', ' '); + m_creator = PalmLib::mktag(' ', ' ', ' ', ' '); +} diff --git a/src/translators/pilotdb/libpalm/Database.h b/src/translators/pilotdb/libpalm/Database.h new file mode 100644 index 0000000..bcde8c0 --- /dev/null +++ b/src/translators/pilotdb/libpalm/Database.h @@ -0,0 +1,181 @@ +/* + * palm-db-tools: General interface to a PalmOS database. + * Copyright (C) 2000 by Tom Dyas (tdyas@users.sourceforge.net) + * + * This header defines an abstract interface to PalmOS + * databases. Subclasses would include the class that reads/writes PDB + * files and possibly databases that can be accessed over the HotSync + * protocols. + */ + +#ifndef __PALMLIB_DATABASE_H__ +#define __PALMLIB_DATABASE_H__ + +#include <string> + +#include "palmtypes.h" +#include "Block.h" +#include "Record.h" +#include "Resource.h" + +namespace PalmLib { + + class Database { + public: + // Constants for bits in the flags field of a PalmOS database. +#ifdef __GNUG__ + static const pi_uint16_t FLAG_HDR_RESOURCE = 0x0001; + static const pi_uint16_t FLAG_HDR_READ_ONLY = 0x0002; + static const pi_uint16_t FLAG_HDR_APPINFO_DIRTY = 0x0004; + static const pi_uint16_t FLAG_HDR_BACKUP = 0x0008; + static const pi_uint16_t FLAG_HDR_OK_TO_INSTALL_NEWER = 0x0010; + static const pi_uint16_t FLAG_HDR_RESET_AFTER_INSTALL = 0x0020; + static const pi_uint16_t FLAG_HDR_COPY_PREVENTION = 0x0040; + static const pi_uint16_t FLAG_HDR_STREAM = 0x0080; + static const pi_uint16_t FLAG_HDR_HIDDEN = 0x0100; + static const pi_uint16_t FLAG_HDR_LAUNCHABLE_DATA = 0x0200; + static const pi_uint16_t FLAG_HDR_OPEN = 0x8000; +#else + static const pi_uint16_t FLAG_HDR_RESOURCE; + static const pi_uint16_t FLAG_HDR_READ_ONLY; + static const pi_uint16_t FLAG_HDR_APPINFO_DIRTY; + static const pi_uint16_t FLAG_HDR_BACKUP; + static const pi_uint16_t FLAG_HDR_OK_TO_INSTALL_NEWER; + static const pi_uint16_t FLAG_HDR_RESET_AFTER_INSTALL; + static const pi_uint16_t FLAG_HDR_COPY_PREVENTION; + static const pi_uint16_t FLAG_HDR_STREAM; + static const pi_uint16_t FLAG_HDR_HIDDEN; + static const pi_uint16_t FLAG_HDR_LAUNCHABLE_DATA; + static const pi_uint16_t FLAG_HDR_OPEN; +#endif + + Database(bool resourceDB = false); + virtual ~Database() { } + + bool isResourceDB() const {return (m_flags & FLAG_HDR_RESOURCE) != 0;} + + virtual pi_uint32_t type() const { return m_type; } + virtual void type(pi_uint32_t new_type) { m_type = new_type; } + + virtual pi_uint32_t creator() const { return m_creator; } + virtual void creator(pi_uint32_t new_creator) + { m_creator = new_creator; } + + virtual pi_uint16_t version() const { return m_version; } + virtual void version(pi_uint16_t v) { m_version = v; } + + virtual pi_int32_t creation_time() const { return m_time_created; } + virtual void creation_time(pi_int32_t ct) { m_time_created = ct; } + + virtual pi_uint32_t modification_time() const + { return m_time_modified; } + virtual void modification_time(pi_uint32_t mt) + { m_time_modified = mt; } + + virtual pi_uint32_t backup_time() const { return m_time_backup; } + virtual void backup_time(pi_uint32_t bt) { m_time_backup = bt; } + + virtual pi_uint32_t modnum() const { return m_modification; } + virtual void modnum(pi_uint32_t new_modnum) + { m_modification = new_modnum; } + + virtual pi_uint32_t unique_id_seed() const + { return m_unique_id_seed; } + virtual void unique_id_seed(pi_uint32_t uid_seed) + { m_unique_id_seed = uid_seed; } + + virtual pi_uint16_t flags() const { return m_flags; } + virtual void flags(pi_uint16_t flags) + { m_flags = flags & ~(FLAG_HDR_RESOURCE | FLAG_HDR_OPEN); } + + virtual std::string name() const { return m_name; } + virtual void name(const std::string& new_name) { m_name = new_name; } + + virtual bool backup() const + { return (m_flags & FLAG_HDR_BACKUP) != 0; } + virtual void backup(bool state) { + if (state) + m_flags |= FLAG_HDR_BACKUP; + else + m_flags &= ~(FLAG_HDR_BACKUP); + } + + virtual bool readonly() const + { return (m_flags & FLAG_HDR_READ_ONLY) != 0; } + virtual void readonly(bool state) { + if (state) + m_flags |= FLAG_HDR_READ_ONLY; + else + m_flags &= ~(FLAG_HDR_READ_ONLY); + } + + virtual bool copy_prevention() const + { return (m_flags & FLAG_HDR_COPY_PREVENTION) != 0; } + virtual void copy_prevention(bool state) { + if (state) + m_flags |= FLAG_HDR_COPY_PREVENTION; + else + m_flags &= ~(FLAG_HDR_COPY_PREVENTION); + } + + // Return the total number of records/resources in this + // database. + virtual unsigned getNumRecords() const = 0; + + // Return the database's application info block as a Block + // object. + virtual Block getAppInfoBlock() const { return Block(); } + + // Set the database's app info block to the contents of the + // passed Block object. + virtual void setAppInfoBlock(const Block &) { } + + // Return the database's sort info block as a Block object. + virtual Block getSortInfoBlock() const { return Block(); } + + // Set the database's sort info block to the contents of the + // passed Block object. + virtual void setSortInfoBlock(const Block &) { } + + // Return the record identified by the given index. The caller + // owns the returned RawRecord object. + virtual Record getRecord(unsigned index) const = 0; + + // Set the record identified by the given index to the given + // record. + virtual void setRecord(unsigned index, const Record& rec) = 0; + + // Append the given record to the database. + virtual void appendRecord(const Record& rec) = 0; + + // returned if the specified (type, ID) combination is not + // present in the database. The caller owns the returned + // RawRecord object. + virtual Resource getResourceByType(pi_uint32_t type, + pi_uint32_t id) const = 0; + + // Return the resource present at the given index. NULL is + // returned if the index is invalid. The caller owns the + // returned RawRecord object. + virtual Resource getResourceByIndex(unsigned index) const = 0; + + // Set the resouce at given index to passed Resource object. + virtual void setResource(unsigned index, const Resource& rsrc) = 0; + + private: + std::string m_name; + pi_uint16_t m_flags; + pi_uint16_t m_version; + pi_uint32_t m_time_created; + pi_uint32_t m_time_modified; + pi_uint32_t m_time_backup; + pi_uint32_t m_modification; + pi_uint32_t m_type; + pi_uint32_t m_creator; + pi_uint32_t m_unique_id_seed; + + }; + +} // namespace PalmLib + +#endif diff --git a/src/translators/pilotdb/libpalm/Makefile.am b/src/translators/pilotdb/libpalm/Makefile.am new file mode 100644 index 0000000..ea92331 --- /dev/null +++ b/src/translators/pilotdb/libpalm/Makefile.am @@ -0,0 +1,15 @@ +####### kdevelop will overwrite this part!!! (begin)########## +noinst_LIBRARIES = liblibpalm.a + +AM_CPPFLAGS = $(all_includes) + +liblibpalm_a_METASOURCES = AUTO + +liblibpalm_a_SOURCES = Database.cpp Block.cpp + + +EXTRA_DIST = Block.cpp Block.h palmtypes.h Record.h Resource.h Database.h Database.cpp + +####### kdevelop will overwrite this part!!! (end)############ + +KDE_OPTIONS = noautodist diff --git a/src/translators/pilotdb/libpalm/Record.h b/src/translators/pilotdb/libpalm/Record.h new file mode 100644 index 0000000..ecf19e3 --- /dev/null +++ b/src/translators/pilotdb/libpalm/Record.h @@ -0,0 +1,168 @@ +/* + * palm-db-tools: Raw PalmOS Records + * Copyright (C) 2000 by Tom Dyas (tdyas@users.sourceforge.net) + */ + +#ifndef __PALMLIB_RECORD_H__ +#define __PALMLIB_RECORD_H__ + +#include "Block.h" + +namespace PalmLib { + + class Record : public Block { + public: +#ifdef __GNUG__ + static const pi_char_t FLAG_ATTR_DELETED = 0x80; + static const pi_char_t FLAG_ATTR_DIRTY = 0x40; + static const pi_char_t FLAG_ATTR_BUSY = 0x20; + static const pi_char_t FLAG_ATTR_SECRET = 0x10; +#else + static const pi_char_t FLAG_ATTR_DELETED; + static const pi_char_t FLAG_ATTR_DIRTY; + static const pi_char_t FLAG_ATTR_BUSY; + static const pi_char_t FLAG_ATTR_SECRET; +#endif + + /** + * Default constructor. + */ + Record() : Block(), m_attrs(0), m_unique_id(0) { } + + /** + * Copy constructor. + */ + Record(const Record& rhs) : Block(rhs.data(), rhs.size()) { + m_attrs = rhs.attrs(); + m_unique_id = rhs.unique_id(); + } + + /** + * Destructor. + */ + virtual ~Record() { } + + /** + * Constructor which lets the caller specify all the + * parameters. + * + * @param attrs Attribute byte (flags + category). + * @param unique_id Unique ID for this record. + * @param data Start of buffer to copy (or 0 for empty). + * @param size Size of the buffer to copy. + */ + Record(pi_char_t attrs, pi_uint32_t unique_id, + Block::const_pointer data, const Block::size_type size) + : Block(data, size), m_attrs(attrs), m_unique_id(unique_id) { } + + /** + * Constructor which lets the caller use the default fill + * constructor. + * @param attrs Attribute byte (flags + category). + * @param unique_id Unique ID for this record. + * @param size Size of buffer to generate. + * @param value Value to fill buffer with. + */ + Record(pi_char_t attrs, pi_uint32_t unique_id, + const size_type size, const value_type value = 0) + : Block(size, value), m_attrs(attrs), m_unique_id(unique_id) { } + + /** + * Assignment operator. + * + * @param rhs The PalmLib::Record we should become. */ + Record& operator = (const Record& rhs) { + Block::operator = (rhs); + m_attrs = rhs.attrs(); + m_unique_id = rhs.unique_id(); + return *this; + } + + /** + * Return the attributes byte which contains the category and + * flags. + */ + pi_char_t attrs() const { return m_attrs; } + + /** + * Return the state of the record's "deleted" flag. + */ + bool deleted() const { return (m_attrs & FLAG_ATTR_DELETED) != 0; } + + /** + * Set the state of the record's "deleted" flag. + * + * @param state New state of the "deleted" flag. + */ + void deleted(bool state) { + if (state) + m_attrs |= FLAG_ATTR_DELETED; + else + m_attrs &= ~(FLAG_ATTR_DELETED); + } + + /** + * Return the state of the record's "dirty" flag. + */ + bool dirty() const { return (m_attrs & FLAG_ATTR_DIRTY) != 0; } + + /** + * Set the state of the record's "dirty" flag. + * + * @param state New state of the "dirty" flag. + */ + void dirty(bool state) { + if (state) + m_attrs |= FLAG_ATTR_DIRTY; + else + m_attrs &= ~(FLAG_ATTR_DIRTY); + } + + /** + * Return the state of the record's "secret" flag. + */ + bool secret() const { return (m_attrs & FLAG_ATTR_SECRET) != 0; } + + /** + * Set the state of the record's "secret" flag. + * + * @param state New state of the "secret" flag. + */ + void secret(bool state) { + if (state) + m_attrs |= FLAG_ATTR_SECRET; + else + m_attrs &= ~(FLAG_ATTR_SECRET); + } + + /** + * Return the category of this record. + */ + pi_char_t category() const { return (m_attrs & 0x0F); } + + /** + * Set the category of this record. + */ + void category(pi_char_t cat) + { m_attrs &= ~(0x0F); m_attrs |= (cat & 0x0F); } + + /** + * Return the unique ID of this record. + */ + pi_uint32_t unique_id() const { return m_unique_id; } + + /** + * Set the unique ID of this record to uid. + * + * @param uid New unique ID for this record. + */ + void unique_id(pi_uint32_t uid) { m_unique_id = uid; } + + private: + pi_char_t m_attrs; + pi_uint32_t m_unique_id; + }; + +} + +#endif diff --git a/src/translators/pilotdb/libpalm/Resource.h b/src/translators/pilotdb/libpalm/Resource.h new file mode 100644 index 0000000..b98f718 --- /dev/null +++ b/src/translators/pilotdb/libpalm/Resource.h @@ -0,0 +1,85 @@ +/* + * palm-db-tools: PalmOS Resources + * Copyright (C) 2000 by Tom Dyas (tdyas@users.sourceforge.net) + */ + +#ifndef __PALMLIB_RESOURCE_H__ +#define __PALMLIB_RESOURCE_H__ + +#include "Block.h" +#include "palmtypes.h" + +namespace PalmLib { + + class Resource : public Block { + public: + /** + * Default constructor. + */ + Resource() : Block(), m_type(0), m_id(0) { } + + /** + * Copy constructor. + */ + Resource(const Resource& rhs) : Block(rhs.data(), rhs.size()) { + m_type = rhs.type(); + m_id = rhs.id(); + } + + /** + * Destructor. + */ + virtual ~Resource() { } + + /** + * Constructor which lets the caller specify all the + * parameters. + * + * @param type Resource type + * @param id Resource ID + * @param data Start of buffer to copy. + * @param size Size of the buffer to copy. + */ + Resource(pi_uint32_t type, pi_uint32_t id, + const_pointer data, const size_type size) + : Block(data, size), m_type(type), m_id(id) { } + + /** + * Constructor which lets the caller use the default fill + * constructor. + * + * @param type Resource type + * @param id Resource ID + * @param size Size of buffer to generate. + * @param value Value to fill buffer with. + */ + Resource(pi_uint32_t type, pi_uint32_t id, + const size_type size, const value_type value = 0) + : Block(size, value), m_type(type), m_id(id) { } + + /** + * Assignment operator. + */ + Resource& operator = (const Resource& rhs) { + Block::operator = (rhs); + m_type = rhs.type(); + m_id = rhs.id(); + return *this; + } + + // Accessor functions for the resource type. + pi_uint32_t type() const { return m_type; } + void type(const pi_uint32_t _type) { m_type = _type; } + + // Accessor functions for the resource ID. + pi_uint32_t id() const { return m_id; } + void id(const pi_uint32_t _id) { m_id = _id; } + + private: + pi_uint32_t m_type; + pi_uint32_t m_id; + }; + +} + +#endif diff --git a/src/translators/pilotdb/libpalm/palmtypes.h b/src/translators/pilotdb/libpalm/palmtypes.h new file mode 100644 index 0000000..5c12262 --- /dev/null +++ b/src/translators/pilotdb/libpalm/palmtypes.h @@ -0,0 +1,117 @@ +/* + * This file contains type definitions and helper functions to make + * access to data in Palm Pilot order easier. + */ + +#ifndef __LIBPALM_PALMTYPES_H__ +#define __LIBPALM_PALMTYPES_H__ + +#include <stdexcept> + +#include "../portability.h" + +namespace PalmLib { + +#if SIZEOF_UNSIGNED_CHAR == 1 + typedef unsigned char pi_char_t; +#else +#error Unable to determine the size of pi_char_t. +#endif + +#if SIZEOF_UNSIGNED_LONG == 2 + typedef unsigned long pi_uint16_t; +#elif SIZEOF_UNSIGNED_INT == 2 + typedef unsigned int pi_uint16_t; +#elif SIZEOF_UNSIGNED_SHORT == 2 + typedef unsigned short pi_uint16_t; +#else +#error Unable to determine the size of pi_uint16_t. +#endif + +#if SIZEOF_LONG == 2 + typedef long pi_int16_t; +#elif SIZEOF_INT == 2 + typedef int pi_int16_t; +#elif SIZEOF_SHORT == 2 + typedef short pi_int16_t; +#else +#error Unable to determine the size of pi_int16_t. +#endif + +#if SIZEOF_UNSIGNED_LONG == 4 + typedef unsigned long pi_uint32_t; +#elif SIZEOF_UNSIGNED_INT == 4 + typedef unsigned int pi_uint32_t; +#elif SIZEOF_UNSIGNED_SHORT == 4 + typedef unsigned short pi_uint32_t; +#else +#error Unable to determine the size of pi_uint32_t. +#endif + +#if SIZEOF_LONG == 4 + typedef long pi_int32_t; +#elif SIZEOF_INT == 4 + typedef int pi_int32_t; +#elif SIZEOF_SHORT == 4 + typedef short pi_int32_t; +#else +#error Unable to determine the size of pi_int32_t. +#endif + +typedef union { + double number; +#ifdef WORDS_BIGENDIAN + struct { + PalmLib::pi_uint32_t hi; + PalmLib::pi_uint32_t lo; + } words; +#else + struct { + PalmLib::pi_uint32_t lo; + PalmLib::pi_uint32_t hi; + } words; +#endif +} pi_double_t; + + inline pi_int32_t get_long(const pi_char_t* p) { + return ( (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3] ); + } + + inline pi_int32_t get_treble(const pi_char_t* p) { + return ( (p[0] << 16) || (p[1] << 8) || p[0]); + } + + inline pi_int16_t get_short(const pi_char_t* p) { + return ( (p[0] << 8) | p[1] ); + } + + inline void set_long(pi_char_t *p, pi_int32_t v) { + p[0] = (v >> 24) & 0xFF; + p[1] = (v >> 16) & 0xFF; + p[2] = (v >> 8 ) & 0xFF; + p[3] = (v ) & 0xFF; + } + + inline void set_treble(pi_char_t *p, pi_int32_t v) { + p[0] = (v >> 16) & 0xFF; + p[1] = (v >> 8 ) & 0xFF; + p[2] = (v ) & 0xFF; + } + + inline void set_short(pi_char_t *p, pi_int16_t v) { + p[0] = (v >> 8) & 0xFF; + p[1] = (v ) & 0xFF; + } + + inline pi_uint32_t mktag(pi_char_t c1, pi_char_t c2, + pi_char_t c3, pi_char_t c4) + { return (((c1)<<24)|((c2)<<16)|((c3)<<8)|(c4)); } + + class error : public std::runtime_error { + public: + error(const std::string & what_arg) : std::runtime_error(what_arg) { } + }; + +} // namespace PalmLib + +#endif diff --git a/src/translators/pilotdb/pilotdb.cpp b/src/translators/pilotdb/pilotdb.cpp new file mode 100644 index 0000000..d7779e4 --- /dev/null +++ b/src/translators/pilotdb/pilotdb.cpp @@ -0,0 +1,277 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "pilotdb.h" +#include "strop.h" +#include "libflatfile/Record.h" + +#include <kdebug.h> + +#include <qbuffer.h> + +using namespace PalmLib; +using Tellico::Export::PilotDB; + +namespace { + static const int PI_HDR_SIZE = 78; + static const int PI_RESOURCE_ENT_SIZE = 10; + static const int PI_RECORD_ENT_SIZE = 8; +} + +PilotDB::PilotDB() : Database(false), m_app_info(), m_sort_info(), + m_next_record_list_id(0) { + pi_int32_t now = StrOps::get_current_time(); + creation_time(now); + modification_time(now); + backup_time(now); +} + +PilotDB::~PilotDB() { + for(record_list_t::iterator i = m_records.begin(); i != m_records.end(); ++i) { + delete (*i); + } +} + +QByteArray PilotDB::data() { + QBuffer b; + b.open(IO_WriteOnly); + + pi_char_t buf[PI_HDR_SIZE]; + pi_int16_t ent_hdr_size = isResourceDB() ? PI_RESOURCE_ENT_SIZE : PI_RECORD_ENT_SIZE; + std::streampos offset = PI_HDR_SIZE + m_records.size() * ent_hdr_size + 2; + + for(int i=0; i<32; ++i) { + buf[i] = 0; + } + memcpy(buf, name().c_str(), QMIN(31, name().length())); + set_short(buf + 32, flags()); + set_short(buf + 34, version()); + set_long(buf + 36, creation_time()); + set_long(buf + 40, modification_time()); + set_long(buf + 44, backup_time()); + set_long(buf + 48, modnum()); + if(m_app_info.raw_size() > 0) { + set_long(buf + 52, offset); + offset += m_app_info.raw_size(); + } else { + set_long(buf + 52, 0); + } + if(m_sort_info.raw_size() > 0) { + set_long(buf + 56, offset); + offset += m_sort_info.raw_size(); + } else { + set_long(buf + 56, 0); + } + set_long(buf + 60, type()); + set_long(buf + 64, creator()); + set_long(buf + 68, unique_id_seed()); + set_long(buf + 72, m_next_record_list_id); + set_short(buf + 76, m_records.size()); + + // Write the PDB/PRC header to the string. + b.writeBlock(reinterpret_cast<char *>(buf), sizeof(buf)); + + for(record_list_t::iterator i = m_records.begin(); i != m_records.end(); ++i) { + Block* entry = *i; + + if(isResourceDB()) { + Resource * resource = reinterpret_cast<Resource *> (entry); + set_long(buf, resource->type()); + set_short(buf + 4, resource->id()); + set_long(buf + 6, offset); + } else { + Record * record = reinterpret_cast<Record *> (entry); + set_long(buf, offset); + buf[4] = record->attrs(); + set_treble(buf + 5, record->unique_id()); + } + b.writeBlock(reinterpret_cast<char *>(buf), ent_hdr_size); + offset += entry->raw_size(); + } + + b.writeBlock("\0", 1); + b.writeBlock("\0", 1); + + if(m_app_info.raw_size() > 0) { + b.writeBlock((char *) m_app_info.raw_data(), m_app_info.raw_size()); + } + + if(m_sort_info.raw_size() > 0) { + b.writeBlock((char *) m_sort_info.raw_data(), m_sort_info.raw_size()); + } + + for(record_list_t::iterator q = m_records.begin(); q != m_records.end(); ++q) { + Block* entry = *q; + b.writeBlock((char *) entry->raw_data(), entry->raw_size()); + } + + b.close(); + return b.buffer(); +} + +// Return the record identified by the given index. The caller owns +// the returned RawRecord object. +Record PilotDB::getRecord(unsigned index) const +{ + if (index >= m_records.size()) kdDebug() << "invalid index" << endl; + return *(reinterpret_cast<Record *> (m_records[index])); +} + +// Set the record identified by the given index to the given record. +void PilotDB::setRecord(unsigned index, const Record& rec) +{ +// if (index >= m_records.size()) kdDebug() << "invalid index"); + *(reinterpret_cast<Record *> (m_records[index])) = rec; +} + +// Append the given record to the database. +void PilotDB::appendRecord(const Record& rec) +{ + Record* record = new Record(rec); + + // If this new record has a unique ID that duplicates any other + // record, then reset the unique ID to an unused value. + if (m_uid_map.find(record->unique_id()) != m_uid_map.end()) { + uid_map_t::iterator iter = max_element(m_uid_map.begin(), + m_uid_map.end()); + pi_uint32_t maxuid = (*iter).first; + + // The new unique ID becomes the max plus one. + record->unique_id(maxuid + 1); + } + + m_uid_map[record->unique_id()] = record; + m_records.push_back(record); +} + + +void PilotDB::clearRecords() +{ + m_records.erase(m_records.begin(), m_records.end()); +} + +// Return the resource with the given type and ID. NULL is returned if +// the specified (type, ID) combination is not present in the +// database. The caller owns the returned RawRecord object. +Resource PilotDB::getResourceByType(pi_uint32_t type, pi_uint32_t id) const +{ + for (record_list_t::const_iterator i = m_records.begin(); + i != m_records.end(); ++i) { + Resource* resource = reinterpret_cast<Resource *> (*i); + if (resource->type() == type && resource->id() == id) + return *resource; + } + + kdWarning() << "PilotDB::getResourceByType() - not found!" << endl; + return Resource(); +} + +// Return the resource present at the given index. NULL is returned if +// the index is invalid. The caller owns the returned RawRecord +// object. +Resource PilotDB::getResourceByIndex(unsigned index) const +{ + if (index >= m_records.size()) kdDebug() << "invalid index" << endl; + return *(reinterpret_cast<Resource *> (m_records[index])); +} + +// Set the resouce at given index to passed RawResource object. +void PilotDB::setResource(unsigned index, const Resource& resource) +{ + if (index >= m_records.size()) kdDebug() << "invalid index" << endl; + *(reinterpret_cast<Resource *> (m_records[index])) = resource; +} + +FlatFile::Field PilotDB::string2field(FlatFile::Field::FieldType type, const std::string& fldstr) { + FlatFile::Field field; + + switch (type) { + case FlatFile::Field::STRING: + field.type = FlatFile::Field::STRING; + field.v_string = fldstr; + break; + + case FlatFile::Field::BOOLEAN: + field.type = FlatFile::Field::BOOLEAN; + field.v_boolean = StrOps::string2boolean(fldstr); + break; + + case FlatFile::Field::INTEGER: + field.type = FlatFile::Field::INTEGER; + StrOps::convert_string(fldstr, field.v_integer); + break; + + case FlatFile::Field::FLOAT: + field.type = FlatFile::Field::FLOAT; + StrOps::convert_string(fldstr, field.v_float); + break; + + case FlatFile::Field::NOTE: + field.type = FlatFile::Field::NOTE; + field.v_string = fldstr.substr(0,NOTETITLE_LENGTH - 1); + field.v_note = fldstr; + break; + + case FlatFile::Field::LIST: + field.type = FlatFile::Field::LIST; + field.v_string = fldstr; + break; + + case FlatFile::Field::LINK: + field.type = FlatFile::Field::LINK; + field.v_integer = 0; + field.v_string = fldstr; + break; + + case FlatFile::Field::LINKED: + field.type = FlatFile::Field::LINKED; + field.v_string = fldstr; + break; + + case FlatFile::Field::CALCULATED: + field.type = FlatFile::Field::CALCULATED; + field.v_string = fldstr; + break; + + case FlatFile::Field::DATE: + field.type = FlatFile::Field::DATE; + struct tm time; + if (!fldstr.empty()) { +#ifdef strptime + if(!strptime(fldstr.c_str(), "%Y/%m/%d", &time)) { +#else + if(!StrOps::strptime(fldstr.c_str(), "%Y/%m/%d", &time)) { +#endif + kdDebug() << "invalid date in field" << endl; + } + field.v_date.month = time.tm_mon + 1; + field.v_date.day = time.tm_mday; + field.v_date.year = time.tm_year + 1900; + field.v_time.hour = time.tm_hour; + field.v_time.minute = time.tm_min; + } else { + field.v_date.month = 0; + field.v_date.day = 0; + field.v_date.year = 0; + field.v_time.hour = 24; + field.v_time.minute = 0; + } + break; + + default: + kdWarning() << "PilotDB::string2field() - unsupported field type" << endl; + break; + } + + return field; +} diff --git a/src/translators/pilotdb/pilotdb.h b/src/translators/pilotdb/pilotdb.h new file mode 100644 index 0000000..dd21c7b --- /dev/null +++ b/src/translators/pilotdb/pilotdb.h @@ -0,0 +1,127 @@ +/*************************************************************************** + pilotdb.h + ------------------- + begin : Thu Nov 20 2003 + copyright : (C) 2003 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef PILOTDB_H +#define PILOTDB_H + +#include <map> +#include <vector> + +#include "libpalm/Database.h" +#include "libflatfile/Field.h" + +#include <qcstring.h> + +namespace Tellico { + namespace Export { + +/** + * @author Robby Stephenson + */ +class PilotDB : public PalmLib::Database { +public: + PilotDB(); + ~PilotDB(); + + QByteArray data(); + + /** + * Return the total number of records/resources in this database. + */ + virtual unsigned getNumRecords() const { return m_records.size(); } + + /** + * Return the database's application info block as a Block + * object. The caller owns the returned object. + */ + virtual PalmLib::Block getAppInfoBlock() const { return m_app_info; } + + /** + * Set the database's app info block to the contents of the + * passed Block object. + */ + virtual void setAppInfoBlock(const PalmLib::Block& new_app_info) { m_app_info = new_app_info; } + + /** + * Return the database's sort info block as a Block + * object. The caller owns the returned object. + */ + virtual PalmLib::Block getSortInfoBlock() const { return m_sort_info; } + + /** + * Set the database's sort info block to the contents of the + * passed Block object. + */ + virtual void setSortInfoBlock(const PalmLib::Block& new_sort_info) { m_sort_info = new_sort_info; } + + /** + * Return the record identified by the given index. The caller + * owns the returned RawRecord object. + */ + virtual PalmLib::Record getRecord(unsigned index) const; + + /** + * Set the record identified by the given index to the given record. + */ + virtual void setRecord(unsigned index, const PalmLib::Record& rec); + + /** + * Append the given record to the database. + */ + virtual void appendRecord(const PalmLib::Record& rec); + + /** + * Delete all records + */ + virtual void clearRecords(); + + /** + * returned if the specified (type, ID) combination is not + * present in the database. The caller owns the returned RawRecord object. + */ + virtual PalmLib::Resource getResourceByType(PalmLib::pi_uint32_t type, PalmLib::pi_uint32_t id) const; + + /** + * Return the resource present at the given index. NULL is + * returned if the index is invalid. The caller owns the + * returned RawRecord object. + */ + virtual PalmLib::Resource getResourceByIndex(unsigned index) const; + + /** + * Set the resource at given index to passed Resource object. + */ + virtual void setResource(unsigned index, const PalmLib::Resource& rsrc); + + static PalmLib::FlatFile::Field string2field(PalmLib::FlatFile::Field::FieldType type, + const std::string& fldstr); + +protected: + typedef std::vector<PalmLib::Block *> record_list_t; + typedef std::map<PalmLib::pi_uint32_t, PalmLib::Record *> uid_map_t; + + record_list_t m_records; + uid_map_t m_uid_map; + +private: + PalmLib::Block m_app_info; + PalmLib::Block m_sort_info; + PalmLib::pi_int32_t m_next_record_list_id; +}; + + } //end namespace +} // end namespace +#endif diff --git a/src/translators/pilotdb/portability.h b/src/translators/pilotdb/portability.h new file mode 100644 index 0000000..cb41f79 --- /dev/null +++ b/src/translators/pilotdb/portability.h @@ -0,0 +1,72 @@ +/* + * palm-db-tools: Support Library: String Parsing Utility Functions + * Copyright (C) 1999-2000 by Tom Dyas (tdyas@users.sourceforge.net) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifh Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __LIBSUPPORT_PORTABILITY_H__ +#define __LIBSUPPORT_PORTABILITY_H__ + +/* + * Pull in the correct configuration header. + */ + +#ifndef WIN32 +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#else +#include "win32/win32-config.h" +#endif + +#ifdef _MSC_VER +/* Borrowed from GLib: Make MSVC more pedantic, this is a recommended + * pragma list from _Win32_Programming_ by Rector and Newcomer. + */ +#pragma warning(error:4002) +#pragma warning(error:4003) +#pragma warning(1:4010) +#pragma warning(error:4013) +#pragma warning(1:4016) +#pragma warning(error:4020) +#pragma warning(error:4021) +#pragma warning(error:4027) +#pragma warning(error:4029) +#pragma warning(error:4033) +#pragma warning(error:4035) +#pragma warning(error:4045) +#pragma warning(error:4047) +#pragma warning(error:4049) +#pragma warning(error:4053) +#pragma warning(error:4071) +#pragma warning(disable:4101) +#pragma warning(error:4150) + +#pragma warning(disable:4244) /* No possible loss of data warnings */ +#pragma warning(disable:4305) /* No truncation from int to char warnings */ +#endif /* _MSC_VER */ + +/* MSVC is screwed up when it comes to calling base class virtual + * functions from a subclass. Thus, the following macro which makes + * calling the superclass nice and simple. + */ +#ifndef _MSC_VER +#define SUPERCLASS(namespace, class, function, args) namespace::class::function args +#else +#define SUPERCLASS(namespace, class, function, args) this-> class::function args +#endif + +#endif diff --git a/src/translators/pilotdb/strop.cpp b/src/translators/pilotdb/strop.cpp new file mode 100644 index 0000000..b8c7f55 --- /dev/null +++ b/src/translators/pilotdb/strop.cpp @@ -0,0 +1,589 @@ +/* + * palm-db-tools: Support Library: String Parsing Utility Functions + * Copyright (C) 1999-2000 by Tom Dyas (tdyas@users.sourceforge.net) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifh Floor, Boston, MA 02110-1301 USA + */ + +#include <string> +#include <vector> +#include <algorithm> +#include <cctype> +#include <sstream> + +#include "strop.h" +#include <kdebug.h> + +extern std::ostream* err; + +void StrOps::lower(std::string& str) +{ + for (std::string::iterator p = str.begin(); p != str.end(); ++p) { + if (isupper(*p)) + *p = tolower(*p); + } +} + +bool StrOps::string2boolean(const std::string& str) +{ + std::string value(str); + + StrOps::lower(value); + + if (value == "on") return true; + else if (str == "off") return false; + else if (str == "true") return true; + else if (str == "t") return true; + else if (str == "false") return false; + else if (str == "f") return false; + else { + int num = 0; + + std::istringstream(str.c_str()) >> num; + return num != 0 ? true : false; + } +} + +std::string StrOps::type2string(PalmLib::FlatFile::Field::FieldType t) +{ + switch (t) { + case PalmLib::FlatFile::Field::STRING: + return "string"; + + case PalmLib::FlatFile::Field::BOOLEAN: + return "boolean"; + + case PalmLib::FlatFile::Field::INTEGER: + return "integer"; + + case PalmLib::FlatFile::Field::FLOAT: + return "float"; + + case PalmLib::FlatFile::Field::DATE: + return "date"; + + case PalmLib::FlatFile::Field::TIME: + return "time"; + + case PalmLib::FlatFile::Field::DATETIME: + return "datetime"; + + case PalmLib::FlatFile::Field::NOTE: + return "note"; + + case PalmLib::FlatFile::Field::LIST: + return "list"; + + case PalmLib::FlatFile::Field::LINK: + return "link"; + + case PalmLib::FlatFile::Field::CALCULATED: + return "calculated"; + + case PalmLib::FlatFile::Field::LINKED: + return "linked"; + + default: + // If we don't support the field type, then fake it as a string. + return "string"; + } +} + +PalmLib::FlatFile::Field::FieldType StrOps::string2type(std::string typestr) +{ + StrOps::lower(typestr); + if (typestr == "string") + return PalmLib::FlatFile::Field::STRING; + else if (typestr == "str") + return PalmLib::FlatFile::Field::STRING; + else if (typestr == "note") + return PalmLib::FlatFile::Field::NOTE; + else if (typestr == "bool") + return PalmLib::FlatFile::Field::BOOLEAN; + else if (typestr == "boolean") + return PalmLib::FlatFile::Field::BOOLEAN; + else if (typestr == "integer") + return PalmLib::FlatFile::Field::INTEGER; + else if (typestr == "int") + return PalmLib::FlatFile::Field::INTEGER; + else if (typestr == "float") + return PalmLib::FlatFile::Field::FLOAT; + else if (typestr == "date") + return PalmLib::FlatFile::Field::DATE; + else if (typestr == "time") + return PalmLib::FlatFile::Field::TIME; + else if (typestr == "datetime") + return PalmLib::FlatFile::Field::DATETIME; + else if (typestr == "list") + return PalmLib::FlatFile::Field::LIST; + else if (typestr == "link") + return PalmLib::FlatFile::Field::LINK; + else if (typestr == "linked") + return PalmLib::FlatFile::Field::LINKED; + else if (typestr == "calculated") + return PalmLib::FlatFile::Field::CALCULATED; + else + kdDebug() << "unknown field type" << endl; + return PalmLib::FlatFile::Field::STRING; +} + +std::string StrOps::strip_back(const std::string& str, const std::string& what) +{ + std::string result(str); + std::string::reverse_iterator p = result.rbegin(); + + while (p != result.rend() + && (std::find(what.begin(), what.end(), *p) != what.end())) ++p; + + result.erase(p.base(), result.end()); + + return result; +} + +std::string StrOps::strip_front(const std::string& str,const std::string& what) +{ + std::string result(str); + std::string::iterator p = result.begin(); + + while (p != result.end() + && (std::find(what.begin(), what.end(), *p) != what.end())) ++p; + + result.erase(result.begin(), p); + + return result; +} + +StrOps::string_list_t StrOps::csv_to_array(const std::string& str, char delim, bool quoted_string) +{ + enum { STATE_NORMAL, STATE_QUOTES } state; + StrOps::string_list_t result; + std::string elem; + + state = STATE_NORMAL; + for (std::string::const_iterator p = str.begin(); p != str.end(); ++p) { + switch (state) { + case STATE_NORMAL: + if (quoted_string && *p == '"') { + state = STATE_QUOTES; + } else if (*p == delim) { + result.push_back(elem); + elem = ""; + } else { + elem += *p; + } + break; + + case STATE_QUOTES: + if (quoted_string && *p == '"') { + if ((p + 1) != str.end() && *(p+1) == '"') { + ++p; + elem += '"'; + } else { + state = STATE_NORMAL; + } + } else { + elem += *p; + } + break; + } + } + + switch (state) { + case STATE_NORMAL: + result.push_back(elem); + break; + case STATE_QUOTES: + kdDebug() << "unterminated quotes" << endl; + break; + } + + return result; +} + +StrOps::string_list_t +StrOps::str_to_array(const std::string& str, const std::string& delim, + bool multiple_delim, bool handle_comments) +{ + enum { STATE_NORMAL, STATE_COMMENT, STATE_QUOTE_DOUBLE, STATE_QUOTE_SINGLE, + STATE_BACKSLASH, STATE_BACKSLASH_DOUBLEQUOTE } state; + StrOps::string_list_t result; + std::string elem; + + state = STATE_NORMAL; + for (std::string::const_iterator p = str.begin(); p != str.end(); ++p) { + switch (state) { + case STATE_NORMAL: + if (*p == '"') { + state = STATE_QUOTE_DOUBLE; + } else if (*p == '\'') { + state = STATE_QUOTE_SINGLE; + } else if (std::find(delim.begin(), delim.end(), *p) != delim.end()) { + if (multiple_delim) { + ++p; + while (p != str.end() + && std::find(delim.begin(), delim.end(), *p) != delim.end()) { + ++p; + } + --p; + } + result.push_back(elem); + elem = ""; + } else if (*p == '\\') { + state = STATE_BACKSLASH; + } else if (handle_comments && *p == '#') { + state = STATE_COMMENT; + } else { + elem += *p; + } + break; + + case STATE_COMMENT: + break; + + case STATE_QUOTE_DOUBLE: + if (*p == '"') + state = STATE_NORMAL; + else if (*p == '\\') + state = STATE_BACKSLASH_DOUBLEQUOTE; + else + elem += *p; + break; + + case STATE_QUOTE_SINGLE: + if (*p == '\'') + state = STATE_NORMAL; + else + elem += *p; + break; + + case STATE_BACKSLASH: + elem += *p; + state = STATE_NORMAL; + break; + + case STATE_BACKSLASH_DOUBLEQUOTE: + switch (*p) { + case '\\': + elem += '\\'; + break; + + case 'n': + elem += '\n'; + break; + + case 'r': + elem += '\r'; + break; + + case 't': + elem += '\t'; + break; + + case 'v': + elem += '\v'; + break; + + case '"': + elem += '"'; + break; + + case 'x': + { + char buf[3]; + + // Extract and check the first hexadecimal character. + if ((p + 1) == str.end()) + kdDebug() << "truncated escape" << endl; + if (! isxdigit(*(p + 1))) + kdDebug() << "invalid hex character" << endl; + buf[0] = *++p; + + // Extract and check the second (if any) hex character. + if ((p + 1) != str.end() && isxdigit(*(p + 1))) { + buf[1] = *++p; + buf[2] = '\0'; + } else { + buf[1] = buf[2] = '\0'; + } + + std::istringstream stream(buf); + stream.setf(std::ios::hex, std::ios::basefield); + unsigned value; + stream >> value; + + elem += static_cast<char> (value & 0xFFu); + } + break; + } + + // Escape is done. Go back to the normal double quote state. + state = STATE_QUOTE_DOUBLE; + break; + } + } + + switch (state) { + case STATE_NORMAL: + result.push_back(elem); + break; + + case STATE_QUOTE_DOUBLE: + kdDebug() << "unterminated double quotes" << endl; + break; + + case STATE_QUOTE_SINGLE: + kdDebug() << "unterminated single quotes" << endl; + break; + + case STATE_BACKSLASH: + case STATE_BACKSLASH_DOUBLEQUOTE: + kdDebug() << "an escape character must follow a backslash" << endl; + break; + + default: + break; + } + + return result; +} + +PalmLib::pi_uint32_t +StrOps::get_current_time(void) +{ + time_t now; + + time(&now); + return static_cast<PalmLib::pi_uint32_t> (now) + PalmLib::pi_uint32_t(2082844800); +} + +char * +StrOps::strptime(const char *s, const char *format, struct tm *tm) +{ + char *data = (char *)s; + char option = false; + + while (*format != 0) { + if (*data == 0) + return NULL; + switch (*format) { + case '%': + option = true; + format++; + break; + case 'd': + if (option) { + tm->tm_mday = strtol(data, &data, 10); + if (tm->tm_mday < 1 || tm->tm_mday > 31) + return NULL; + } else if (*data != 'd') { + return data; + } + option = false; + format++; + break; + case 'm': + if (option) { + /* tm_mon between 0 and 11 */ + tm->tm_mon = strtol(data, &data, 10) - 1; + if (tm->tm_mon < 0 || tm->tm_mon > 11) + return NULL; + } else if (*data != 'm') { + return data; + } + option = false; + format++; + break; + case 'y': + if (option) { + tm->tm_year = strtol(data, &data, 10); + if (tm->tm_year < 60) tm->tm_year += 100; + } else if (*data != 'y') { + return data; + } + option = false; + format++; + break; + case 'Y': + if (option) { + tm->tm_year = strtol(data, &data, 10) - 1900; + } else if (*data != 'Y') { + return data; + } + option = false; + format++; + break; + case 'H': + if (option) { + tm->tm_hour = strtol(data, &data, 10); + if (tm->tm_hour < 0 || tm->tm_hour > 23) + return NULL; + } else if (*data != 'H') { + return data; + } + option = false; + format++; + break; + case 'M': + if (option) { + tm->tm_min = strtol(data, &data, 10); + if (tm->tm_min < 0 || tm->tm_min > 59) + return NULL; + } else if (*data != 'M') { + return data; + } + option = false; + format++; + break; + default: + if (option) + return data; + if (*data != *format) + return data; + format++; + data++; + } + } + return data; +} + +// Read a line from an istream w/o concern for buffer limitations. +std::string +StrOps::readline(std::istream& stream) +{ + std::string line; + char buf[1024]; + + while (1) { + // Read the next line (or part thereof) from the stream. + stream.getline(buf, sizeof(buf)); + // Bail out of the loop if the stream has reached end-of-file. + if ((stream.eof() && !buf[0]) || stream.bad()) + break; + + // Append the data read to the result string. + line.append(buf); + + // If the stream is good, then stop. Otherwise, clear the + // error indicator so that getline will work again. + if ((stream.eof() && buf[0]) || stream.good()) + break; + stream.clear(); + } + + return line; +} + +std::string +StrOps::quote_string(std::string str, bool extended_mode) +{ + std::string result; + std::ostringstream error; + + if (extended_mode) { + result += '"'; + for (std::string::iterator c = str.begin(); c != str.end(); ++c) { + switch (*c) { + case '\\': + result += '\\'; + result += '\\'; + break; + + case '\r': + result += '\\'; + result += 'r'; + break; + + case '\n': + result += '\\'; + result += 'n'; + break; + + case '\t': + result += '\\'; + result += 't'; + break; + + case '\v': + result += '\\'; + result += 'v'; + break; + + case '"': + result += '\\'; + result += '"'; + break; + + default: + if (isprint(*c)) { + result += *c; + } else { + std::ostringstream buf; + buf.width(2); + buf.setf(std::ios::hex, std::ios::basefield); + buf.setf(std::ios::left); + buf << ((static_cast<unsigned> (*c)) & 0xFF) << std::ends; + + result += "\\x"; + result += buf.str(); + } + break; + } + } + result += '"'; + } else { + result += '"'; + for (std::string::iterator c = str.begin(); c != str.end(); ++c) { + if (*c == '"') { + result += "\"\""; + } else if (*c == '\n' || *c == '\r') { + error << "use extended csv mode for newlines\n"; + *err << error.str(); + kdDebug() << error.str().c_str() << endl; + } else { + result += *c; + } + } + result += '"'; + } + + return result; +} + +std::string +StrOps::concatenatepath(std::string p_Path, + std::string p_FileName, std::string p_Ext) +{ + std::string l_FilePath; +#ifdef WIN32 + if (p_FileName[1] == ':' || p_FileName[0] == '\\') + return p_FileName; + else if (p_Path.empty()) + l_FilePath = p_FileName; + else + l_FilePath = p_Path + std::string("\\") + p_FileName; +#else + if (p_FileName[0] == '/') + return p_FileName; + else if (p_Path.empty()) + l_FilePath = p_FileName; + else + l_FilePath = p_Path + std::string("/") + p_FileName; + +#endif + if (!p_Ext.empty() && (p_FileName.rfind(p_Ext) == std::string::npos)) + l_FilePath += p_Ext; + + return l_FilePath; +} diff --git a/src/translators/pilotdb/strop.h b/src/translators/pilotdb/strop.h new file mode 100644 index 0000000..3d9dc8d --- /dev/null +++ b/src/translators/pilotdb/strop.h @@ -0,0 +1,153 @@ +/* + * palm-db-tools: Support Library: String Parsing Utility Functions + * Copyright (C) 1999-2000 by Tom Dyas (tdyas@users.sourceforge.net) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifh Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __LIBSUPPORT_STROP_H__ +#define __LIBSUPPORT_STROP_H__ + +#include <stdexcept> +#include <vector> +#include <string> +#include <sstream> +#include <time.h> +#include "libflatfile/Database.h" + +namespace StrOps { + + // This exception is thrown whenever an error is encountered in + // csv_to_array and str_to_array. + class csv_parse_error : public std::runtime_error { + public: + csv_parse_error(const std::string& msg) : std::runtime_error(msg) { } + }; + + class csv_unterminated_quote : public std::runtime_error { + public: + csv_unterminated_quote(const std::string& msg) : std::runtime_error(msg) { } + }; + + // The results of any parse are returned as this type. + typedef std::vector<std::string> string_list_t; + + + /** + * Convert all uppercase characters in a string to lowercase. + */ + void lower(std::string& str); + + /** + * Convert a string to a boolean value. + * + * @param str The string containing a boolean value to convert. + */ + bool string2boolean(const std::string& str); + + /** + * Convert a string to a FieldType value. + * + * @param typestr The string containing a FieldType value to convert. + */ + PalmLib::FlatFile::Field::FieldType string2type(std::string typestr); + + /** + * Convert a FieldType value to a string. + * + * @param t The FieldType value containing a string to convert. + */ + std::string type2string(PalmLib::FlatFile::Field::FieldType t); + + /** + * Strip trailing characters from a string. + * + * @param str The string to strip characters from. + * @param what The string containing characters to strip. + */ + std::string strip_back(const std::string& str, const std::string& what); + + /** + * Strip leading characters from a string. + * + * @param str The string to strip characters from. + * @param what The string containing characters to strip. + */ + std::string strip_front(const std::string& str, const std::string& what); + + /** + * Convert a string to a target type using a istringstream. + */ + template<class T> + inline void convert_string(const std::string& str, T& result) { + std::istringstream(str.c_str()) >> result; + } + + /** + * Parse a string in CSV (comman-seperated values) format and + * return it as a list. + * + * @param str The string containing the CSV fields. + * @param delim The field delimiter. Defaults to a comma. + */ + string_list_t csv_to_array(const std::string& str, char delim = ',', bool quoted_string = true); + + + /** + * Parse an argv-style array and return it as a list. + * + * @param str The string to parse. + * @param delim String containing the delimiter characters. + * @param multiple_delim Should multiple delimiters count as one? + * @param handle_comments Handle # as a comment character. + */ + string_list_t str_to_array(const std::string& str, + const std::string& delim, + bool multiple_delim, + bool handle_comments); + + /** + * return the current date in the palm format. + */ + PalmLib::pi_uint32_t get_current_time(void); + + /** + * fill a char array with a tm structure in the format passed. + * @param s the char array filled. + * @param format the string of the format to use to print the date. + * @param tm a pointer to time structure. + */ + char *strptime(const char *s, const char *format, struct tm *tm); + + /** + * read one line from the input stream of a file + */ + std::string readline(std::istream& stream); + + /** + * add the quotes to a string + */ + std::string quote_string(std::string str, bool extended_mode); + + /** + * concatenate the path directory, the file name and the extension + * to give the file path to a file + */ + std::string concatenatepath(std::string p_Path, std::string p_FileName, + std::string p_Ext = std::string("")); + +} // namespace StrOps + +#endif diff --git a/src/translators/pilotdbexporter.cpp b/src/translators/pilotdbexporter.cpp new file mode 100644 index 0000000..b9e7367 --- /dev/null +++ b/src/translators/pilotdbexporter.cpp @@ -0,0 +1,232 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "pilotdbexporter.h" +#include "pilotdb/pilotdb.h" +#include "pilotdb/libflatfile/DB.h" + +#include "../collection.h" +#include "../filehandler.h" + +#include <klocale.h> +#include <kdebug.h> +#include <kconfig.h> +#include <kglobal.h> +#include <kcharsets.h> + +#include <qlayout.h> +#include <qgroupbox.h> +#include <qcheckbox.h> +#include <qwhatsthis.h> +#include <qtextcodec.h> +#include <qdatetime.h> + +using Tellico::Export::PilotDBExporter; + +PilotDBExporter::PilotDBExporter() : Tellico::Export::Exporter(), + m_backup(true), + m_widget(0), + m_checkBackup(0) { +} + +QString PilotDBExporter::formatString() const { + return i18n("PilotDB"); +} + +QString PilotDBExporter::fileFilter() const { + return i18n("*.pdb|Pilot Database Files (*.pdb)") + QChar('\n') + i18n("*|All Files"); +} + +bool PilotDBExporter::exec() { + Data::CollPtr coll = collection(); + if(!coll) { + return false; + } + + // This is something of a hidden preference cause I don't want to put it in the GUI right now + // Latin1 by default + QTextCodec* codec = 0; + { + // Latin1 is default + KConfigGroup group(KGlobal::config(), QString::fromLatin1("ExportOptions - %1").arg(formatString())); + codec = KGlobal::charsets()->codecForName(group.readEntry("Charset")); + } + if(!codec) { + kdWarning() << "PilotDBExporter::exec() - no QTextCodec!" << endl; + return false; +#ifndef NDEBUG + } else { + kdDebug() << "PilotDBExporter::exec() - encoding with " << codec->name() << endl; +#endif + } + + // DB 0.3.x format + PalmLib::FlatFile::DB db; + + // set database title + db.title(codec->fromUnicode(coll->title()).data()); + + // set backup option +// db.setOption("backup", (m_checkBackup && m_checkBackup->isChecked()) ? "true" : "false"); + + // all fields are added + // except that only one field of type NOTE + bool hasNote = false; + Data::FieldVec outputFields; // not all fields will be output + Data::FieldVec fields = coll->fields(); + for(Data::FieldVec::Iterator fIt = fields.begin(); fIt != fields.end(); ++fIt) { + switch(fIt->type()) { + case Data::Field::Choice: + // the charSeparator is actually defined in DB.h + db.appendField(codec->fromUnicode(fIt->title()).data(), PalmLib::FlatFile::Field::LIST, + codec->fromUnicode(fIt->allowed().join(QChar('/'))).data()); + outputFields.append(fIt); + break; + + case Data::Field::Number: + // the DB only supports single values of integers + if(fIt->flags() & Data::Field::AllowMultiple) { + db.appendField(codec->fromUnicode(fIt->title()).data(), PalmLib::FlatFile::Field::STRING); + } else { + db.appendField(codec->fromUnicode(fIt->title()).data(), PalmLib::FlatFile::Field::INTEGER); + } + outputFields.append(fIt); + break; + + case Data::Field::Bool: + db.appendField(codec->fromUnicode(fIt->title()).data(), PalmLib::FlatFile::Field::BOOLEAN); + outputFields.append(fIt); + break; + + case Data::Field::Para: + if(hasNote) { // only one is allowed, according to palm-db-tools documentation + kdDebug() << "PilotDBExporter::data() - adding note as string" << endl; + db.appendField(codec->fromUnicode(fIt->title()).data(), PalmLib::FlatFile::Field::STRING); + } else { + kdDebug() << "PilotDBExporter::data() - adding note" << endl; + db.appendField(codec->fromUnicode(fIt->title()).data(), PalmLib::FlatFile::Field::NOTE); + hasNote = true; + } + outputFields.append(fIt); + break; + + case Data::Field::Date: + db.appendField(codec->fromUnicode(fIt->title()).data(), PalmLib::FlatFile::Field::DATE); + outputFields.append(fIt); + break; + + case Data::Field::Image: + // don't include images + kdDebug() << "PilotDBExporter::data() - skipping " << fIt->title() << " image field" << endl; + break; + + default: + db.appendField(codec->fromUnicode(fIt->title()).data(), PalmLib::FlatFile::Field::STRING); + outputFields.append(fIt); + break; + } + } + + // add view with visible fields + if(m_columns.count() > 0) { + PalmLib::FlatFile::ListView lv; + lv.name = codec->fromUnicode(i18n("View Columns")).data(); + for(QStringList::ConstIterator it = m_columns.begin(); it != m_columns.end(); ++it) { + PalmLib::FlatFile::ListViewColumn col; + col.field = coll->fieldTitles().findIndex(*it); + lv.push_back(col); + } + db.appendListView(lv); + } + db.doneWithSchema(); + + Data::FieldVec::ConstIterator fIt, end = outputFields.constEnd(); + bool format = options() & Export::ExportFormatted; + + QRegExp br(QString::fromLatin1("<br/?>"), false /*case-sensitive*/); + QRegExp tags(QString::fromLatin1("<.*>")); + tags.setMinimal(true); + + QString value; + for(Data::EntryVec::ConstIterator entryIt = entries().begin(); entryIt != entries().end(); ++entryIt) { + PalmLib::FlatFile::Record record; + unsigned i = 0; + for(fIt = outputFields.constBegin(); fIt != end; ++fIt, ++i) { + value = entryIt->field(fIt->name(), format); + + if(fIt->type() == Data::Field::Date) { + QStringList s = QStringList::split('-', value, true); + bool ok = true; + int y = s.count() > 0 ? s[0].toInt(&ok) : QDate::currentDate().year(); + if(!ok) { + y = QDate::currentDate().year(); + } + int m = s.count() > 1 ? s[1].toInt(&ok) : 1; + if(!ok) { + m = 1; + } + int d = s.count() > 2 ? s[2].toInt(&ok) : 1; + if(!ok) { + d = 1; + } + QDate date(y, m, d); + value = date.toString(QString::fromLatin1("yyyy/MM/dd")); + } else if(fIt->type() == Data::Field::Para) { + value.replace(br, QChar('\n')); + value.replace(tags, QString::null); + } + // the number of fields in the record must match the number of fields in the database + record.appendField(PilotDB::string2field(db.field_type(i), + value.isEmpty() ? std::string() : codec->fromUnicode(value).data())); + } + // Add the record to the database. + db.appendRecord(record); + } + + PilotDB pdb; + db.outputPDB(pdb); + + return FileHandler::writeDataURL(url(), pdb.data(), options() & Export::ExportForce); +} + +QWidget* PilotDBExporter::widget(QWidget* parent_, const char* name_/*=0*/) { + if(m_widget && m_widget->parent() == parent_) { + return m_widget; + } + + m_widget = new QWidget(parent_, name_); + QVBoxLayout* l = new QVBoxLayout(m_widget); + + QGroupBox* box = new QGroupBox(1, Qt::Horizontal, i18n("PilotDB Options"), m_widget); + l->addWidget(box); + + m_checkBackup = new QCheckBox(i18n("Set PDA backup flag for database"), box); + m_checkBackup->setChecked(m_backup); + QWhatsThis::add(m_checkBackup, i18n("Set PDA backup flag for database")); + + l->addStretch(1); + return m_widget; +} + +void PilotDBExporter::readOptions(KConfig* config_) { + KConfigGroup group(config_, QString::fromLatin1("ExportOptions - %1").arg(formatString())); + m_backup = group.readBoolEntry("Backup", m_backup); +} + +void PilotDBExporter::saveOptions(KConfig* config_) { + KConfigGroup group(config_, QString::fromLatin1("ExportOptions - %1").arg(formatString())); + m_backup = m_checkBackup->isChecked(); + group.writeEntry("Backup", m_backup); +} + +#include "pilotdbexporter.moc" diff --git a/src/translators/pilotdbexporter.h b/src/translators/pilotdbexporter.h new file mode 100644 index 0000000..13d603b --- /dev/null +++ b/src/translators/pilotdbexporter.h @@ -0,0 +1,55 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef PILOTDBEXPORTER_H +#define PILOTDBEXPORTER_H + +class QCheckBox; + +#include "exporter.h" + +#include <qstringlist.h> + +namespace Tellico { + namespace Export { + +/** + * @author Robby Stephenson + */ +class PilotDBExporter : public Exporter { +Q_OBJECT + +public: + PilotDBExporter(); + + virtual bool exec(); + virtual QString formatString() const; + virtual QString fileFilter() const; + + virtual QWidget* widget(QWidget* parent, const char* name=0); + virtual void readOptions(KConfig* cfg); + virtual void saveOptions(KConfig* cfg); + + void setColumns(const QStringList& columns) { m_columns = columns; } + +private: + bool m_backup; + + QWidget* m_widget; + QCheckBox* m_checkBackup; + QStringList m_columns; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/referencerimporter.cpp b/src/translators/referencerimporter.cpp new file mode 100644 index 0000000..32ba251 --- /dev/null +++ b/src/translators/referencerimporter.cpp @@ -0,0 +1,71 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "referencerimporter.h" +#include "../collection.h" +#include "../core/netaccess.h" +#include "../imagefactory.h" + +#include <kstandarddirs.h> + +using Tellico::Import::ReferencerImporter; + +ReferencerImporter::ReferencerImporter(const KURL& url_) : XSLTImporter(url_) { + QString xsltFile = locate("appdata", QString::fromLatin1("referencer2tellico.xsl")); + if(!xsltFile.isEmpty()) { + KURL u; + u.setPath(xsltFile); + XSLTImporter::setXSLTURL(u); + } else { + kdWarning() << "ReferencerImporter() - unable to find referencer2tellico.xml!" << endl; + } +} + +bool ReferencerImporter::canImport(int type) const { + return type == Data::Collection::Bibtex; +} + +Tellico::Data::CollPtr ReferencerImporter::collection() { + Data::CollPtr coll = XSLTImporter::collection(); + if(!coll) { + return 0; + } + + Data::FieldPtr field = coll->fieldByName(QString::fromLatin1("cover")); + if(!field && !coll->imageFields().isEmpty()) { + field = coll->imageFields().front(); + } else if(!field) { + field = new Data::Field(QString::fromLatin1("cover"), i18n("Front Cover"), Data::Field::Image); + coll->addField(field); + } + + Data::EntryVec entries = coll->entries(); + for(Data::EntryVecIt entry = entries.begin(); entry != entries.end(); ++entry) { + QString url = entry->field(QString::fromLatin1("url")); + if(url.isEmpty()) { + continue; + } + QPixmap pix = NetAccess::filePreview(url); + if(pix.isNull()) { + continue; + } + QString id = ImageFactory::addImage(pix, QString::fromLatin1("PNG")); + if(id.isEmpty()) { + continue; + } + entry->setField(field, id); + } + return coll; +} + +#include "referencerimporter.moc" diff --git a/src/translators/referencerimporter.h b/src/translators/referencerimporter.h new file mode 100644 index 0000000..65cc3a0 --- /dev/null +++ b/src/translators/referencerimporter.h @@ -0,0 +1,49 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_IMPORT_REFERENCERIMPORTER_H +#define TELLICO_IMPORT_REFERENCERIMPORTER_H + +#include "xsltimporter.h" +#include "../datavectors.h" + +namespace Tellico { + namespace Import { + +/** + * @author Robby Stephenson +*/ +class ReferencerImporter : public XSLTImporter { +Q_OBJECT + +public: + /** + */ + ReferencerImporter(const KURL& url); + + /** + */ + virtual Data::CollPtr collection(); + /** + */ + virtual QWidget* widget(QWidget*, const char*) { return 0; } + virtual bool canImport(int type) const; + +private: + // private so it can't be changed accidently + void setXSLTURL(const KURL& url); +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/risimporter.cpp b/src/translators/risimporter.cpp new file mode 100644 index 0000000..0420f66 --- /dev/null +++ b/src/translators/risimporter.cpp @@ -0,0 +1,315 @@ +/*************************************************************************** + copyright : (C) 2004-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "risimporter.h" +#include "../collections/bibtexcollection.h" +#include "../document.h" +#include "../entry.h" +#include "../field.h" +#include "../latin1literal.h" +#include "../progressmanager.h" +#include "../filehandler.h" +#include "../isbnvalidator.h" +#include "../tellico_debug.h" + +#include <kapplication.h> + +#include <qdict.h> +#include <qregexp.h> +#include <qmap.h> + +using Tellico::Import::RISImporter; +QMap<QString, QString>* RISImporter::s_tagMap = 0; +QMap<QString, QString>* RISImporter::s_typeMap = 0; + +// static +void RISImporter::initTagMap() { + if(!s_tagMap) { + s_tagMap = new QMap<QString, QString>(); + // BT is special and is handled separately + s_tagMap->insert(QString::fromLatin1("TY"), QString::fromLatin1("entry-type")); + s_tagMap->insert(QString::fromLatin1("ID"), QString::fromLatin1("bibtex-key")); + s_tagMap->insert(QString::fromLatin1("T1"), QString::fromLatin1("title")); + s_tagMap->insert(QString::fromLatin1("TI"), QString::fromLatin1("title")); + s_tagMap->insert(QString::fromLatin1("T2"), QString::fromLatin1("booktitle")); + s_tagMap->insert(QString::fromLatin1("A1"), QString::fromLatin1("author")); + s_tagMap->insert(QString::fromLatin1("AU"), QString::fromLatin1("author")); + s_tagMap->insert(QString::fromLatin1("ED"), QString::fromLatin1("editor")); + s_tagMap->insert(QString::fromLatin1("YR"), QString::fromLatin1("year")); + s_tagMap->insert(QString::fromLatin1("PY"), QString::fromLatin1("year")); + s_tagMap->insert(QString::fromLatin1("N1"), QString::fromLatin1("note")); + s_tagMap->insert(QString::fromLatin1("AB"), QString::fromLatin1("abstract")); // should be note? + s_tagMap->insert(QString::fromLatin1("N2"), QString::fromLatin1("abstract")); + s_tagMap->insert(QString::fromLatin1("KW"), QString::fromLatin1("keyword")); + s_tagMap->insert(QString::fromLatin1("JF"), QString::fromLatin1("journal")); + s_tagMap->insert(QString::fromLatin1("JO"), QString::fromLatin1("journal")); + s_tagMap->insert(QString::fromLatin1("JA"), QString::fromLatin1("journal")); + s_tagMap->insert(QString::fromLatin1("VL"), QString::fromLatin1("volume")); + s_tagMap->insert(QString::fromLatin1("IS"), QString::fromLatin1("number")); + s_tagMap->insert(QString::fromLatin1("PB"), QString::fromLatin1("publisher")); + s_tagMap->insert(QString::fromLatin1("SN"), QString::fromLatin1("isbn")); + s_tagMap->insert(QString::fromLatin1("AD"), QString::fromLatin1("address")); + s_tagMap->insert(QString::fromLatin1("CY"), QString::fromLatin1("address")); + s_tagMap->insert(QString::fromLatin1("UR"), QString::fromLatin1("url")); + s_tagMap->insert(QString::fromLatin1("L1"), QString::fromLatin1("pdf")); + s_tagMap->insert(QString::fromLatin1("T3"), QString::fromLatin1("series")); + s_tagMap->insert(QString::fromLatin1("EP"), QString::fromLatin1("pages")); + } +} + +// static +void RISImporter::initTypeMap() { + if(!s_typeMap) { + s_typeMap = new QMap<QString, QString>(); + // leave capitalized, except for bibtex types + s_typeMap->insert(QString::fromLatin1("ABST"), QString::fromLatin1("Abstract")); + s_typeMap->insert(QString::fromLatin1("ADVS"), QString::fromLatin1("Audiovisual material")); + s_typeMap->insert(QString::fromLatin1("ART"), QString::fromLatin1("Art Work")); + s_typeMap->insert(QString::fromLatin1("BILL"), QString::fromLatin1("Bill/Resolution")); + s_typeMap->insert(QString::fromLatin1("BOOK"), QString::fromLatin1("book")); // bibtex + s_typeMap->insert(QString::fromLatin1("CASE"), QString::fromLatin1("Case")); + s_typeMap->insert(QString::fromLatin1("CHAP"), QString::fromLatin1("inbook")); // == "inbook" ? + s_typeMap->insert(QString::fromLatin1("COMP"), QString::fromLatin1("Computer program")); + s_typeMap->insert(QString::fromLatin1("CONF"), QString::fromLatin1("inproceedings")); // == "conference" ? + s_typeMap->insert(QString::fromLatin1("CTLG"), QString::fromLatin1("Catalog")); + s_typeMap->insert(QString::fromLatin1("DATA"), QString::fromLatin1("Data file")); + s_typeMap->insert(QString::fromLatin1("ELEC"), QString::fromLatin1("Electronic Citation")); + s_typeMap->insert(QString::fromLatin1("GEN"), QString::fromLatin1("Generic")); + s_typeMap->insert(QString::fromLatin1("HEAR"), QString::fromLatin1("Hearing")); + s_typeMap->insert(QString::fromLatin1("ICOMM"), QString::fromLatin1("Internet Communication")); + s_typeMap->insert(QString::fromLatin1("INPR"), QString::fromLatin1("In Press")); + s_typeMap->insert(QString::fromLatin1("JFULL"), QString::fromLatin1("Journal (full)")); // = "periodical" ? + s_typeMap->insert(QString::fromLatin1("JOUR"), QString::fromLatin1("article")); // "Journal" + s_typeMap->insert(QString::fromLatin1("MAP"), QString::fromLatin1("Map")); + s_typeMap->insert(QString::fromLatin1("MGZN"), QString::fromLatin1("article")); // bibtex + s_typeMap->insert(QString::fromLatin1("MPCT"), QString::fromLatin1("Motion picture")); + s_typeMap->insert(QString::fromLatin1("MUSIC"), QString::fromLatin1("Music score")); + s_typeMap->insert(QString::fromLatin1("NEWS"), QString::fromLatin1("Newspaper")); + s_typeMap->insert(QString::fromLatin1("PAMP"), QString::fromLatin1("Pamphlet")); // = "booklet" ? + s_typeMap->insert(QString::fromLatin1("PAT"), QString::fromLatin1("Patent")); + s_typeMap->insert(QString::fromLatin1("PCOMM"), QString::fromLatin1("Personal communication")); + s_typeMap->insert(QString::fromLatin1("RPRT"), QString::fromLatin1("Report")); // = "techreport" ? + s_typeMap->insert(QString::fromLatin1("SER"), QString::fromLatin1("Serial (BookMonograph)")); + s_typeMap->insert(QString::fromLatin1("SLIDE"), QString::fromLatin1("Slide")); + s_typeMap->insert(QString::fromLatin1("SOUND"), QString::fromLatin1("Sound recording")); + s_typeMap->insert(QString::fromLatin1("STAT"), QString::fromLatin1("Statute")); + s_typeMap->insert(QString::fromLatin1("THES"), QString::fromLatin1("phdthesis")); // "mastersthesis" ? + s_typeMap->insert(QString::fromLatin1("UNBILL"), QString::fromLatin1("Unenacted bill/resolution")); + s_typeMap->insert(QString::fromLatin1("UNPB"), QString::fromLatin1("unpublished")); // bibtex + s_typeMap->insert(QString::fromLatin1("VIDEO"), QString::fromLatin1("Video recording")); + } +} + +RISImporter::RISImporter(const KURL::List& urls_) : Tellico::Import::Importer(urls_), m_coll(0), m_cancelled(false) { + initTagMap(); + initTypeMap(); +} + +bool RISImporter::canImport(int type) const { + return type == Data::Collection::Bibtex; +} + +Tellico::Data::CollPtr RISImporter::collection() { + if(m_coll) { + return m_coll; + } + + m_coll = new Data::BibtexCollection(true); + + QDict<Data::Field> risFields; + + // need to know if any extended properties in current collection point to RIS + // if so, add to collection + Data::CollPtr currColl = Data::Document::self()->collection(); + Data::FieldVec vec = currColl->fields(); + for(Data::FieldVec::Iterator it = vec.begin(); it != vec.end(); ++it) { + // continue if property is empty + QString ris = it->property(QString::fromLatin1("ris")); + if(ris.isEmpty()) { + continue; + } + // if current collection has one with the same name, set the property + Data::FieldPtr f = m_coll->fieldByName(it->name()); + if(!f) { + f = new Data::Field(*it); + m_coll->addField(f); + } + f->setProperty(QString::fromLatin1("ris"), ris); + risFields.insert(ris, f); + } + + ProgressItem& item = ProgressManager::self()->newProgressItem(this, progressLabel(), true); + item.setTotalSteps(urls().count() * 100); + connect(&item, SIGNAL(signalCancelled(ProgressItem*)), SLOT(slotCancel())); + ProgressItem::Done done(this); + + int count = 0; + KURL::List urls = this->urls(); + for(KURL::List::ConstIterator it = urls.begin(); it != urls.end() && !m_cancelled; ++it, ++count) { + readURL(*it, count, risFields); + } + + if(m_cancelled) { + m_coll = 0; + } + return m_coll; +} + +void RISImporter::readURL(const KURL& url_, int n, const QDict<Data::Field>& risFields_) { + QString str = FileHandler::readTextFile(url_); + if(str.isEmpty()) { + return; + } + + ISBNValidator isbnval(this); + + QTextIStream t(&str); + + const uint length = str.length(); + const uint stepSize = QMAX(s_stepSize, length/100); + const bool showProgress = options() & ImportProgress; + + bool needToAddFinal = false; + + QString sp, ep; + + uint j = 0; + Data::EntryPtr entry = new Data::Entry(m_coll); + // technically, the spec requires a space immediately after the hyphen + // however, at least one website (Springer) outputs RIS with no space after the final "ER -" + // so just strip the white space later + // also be gracious and allow only any amount of space before hyphen + QRegExp rx(QString::fromLatin1("^(\\w\\w)\\s+-(.*)$")); + QString currLine, nextLine; + for(currLine = t.readLine(); !m_cancelled && !currLine.isNull(); currLine = nextLine, j += currLine.length()) { + nextLine = t.readLine(); + rx.search(currLine); + QString tag = rx.cap(1); + QString value = rx.cap(2).stripWhiteSpace(); + if(tag.isEmpty()) { + continue; + } +// myDebug() << tag << ": " << value << endl; + // if the next line is not empty and does not match start regexp, append to value + while(!nextLine.isEmpty() && nextLine.find(rx) == -1) { + value += nextLine.stripWhiteSpace(); + nextLine = t.readLine(); + } + + // every entry ends with "ER" + if(tag == Latin1Literal("ER")) { + m_coll->addEntries(entry); + entry = new Data::Entry(m_coll); + needToAddFinal = false; + continue; + } else if(tag == Latin1Literal("TY") && s_typeMap->contains(value)) { + // for entry-type, switch it to normalized type name + value = (*s_typeMap)[value]; + } else if(tag == Latin1Literal("SN")) { + // test for valid isbn, sometimes the issn gets stuck here + int pos = 0; + if(isbnval.validate(value, pos) != ISBNValidator::Acceptable) { + continue; + } + } else if(tag == Latin1Literal("SP")) { + sp = value; + if(!ep.isEmpty()) { + value = sp + '-' + ep; + tag = QString::fromLatin1("EP"); + sp = QString(); + ep = QString(); + } else { + // nothing else to do + continue; + } + } else if(tag == Latin1Literal("EP")) { + ep = value; + if(!sp.isEmpty()) { + value = sp + '-' + ep; + sp = QString(); + ep = QString(); + } else { + continue; + } + } else if(tag == Latin1Literal("YR") || tag == Latin1Literal("PY")) { // for now, just grab the year + value = value.section('/', 0, 0); + } + + // the lookup scheme is: + // 1. any field has an RIS property that matches the tag name + // 2. default field mapping tag -> field name + Data::FieldPtr f = risFields_.find(tag); + if(!f) { + // special case for BT + // primary title for books, secondary for everything else + if(tag == Latin1Literal("BT")) { + if(entry->field(QString::fromLatin1("entry-type")) == Latin1Literal("book")) { + f = m_coll->fieldByName(QString::fromLatin1("title")); + } else { + f = m_coll->fieldByName(QString::fromLatin1("booktitle")); + } + } else { + f = fieldByTag(tag); + } + } + if(!f) { + continue; + } + needToAddFinal = true; + + // harmless for non-choice fields + // for entry-type, want it in lower case + f->addAllowed(value); + // if the field can have multiple values, append current values to new value + if((f->flags() & Data::Field::AllowMultiple) && !entry->field(f->name()).isEmpty()) { + value.prepend(entry->field(f->name()) + QString::fromLatin1("; ")); + } + entry->setField(f, value); + + if(showProgress && j%stepSize == 0) { + ProgressManager::self()->setProgress(this, n*100 + 100*j/length); + kapp->processEvents(); + } + } + + if(needToAddFinal) { + m_coll->addEntries(entry); + } +} + +Tellico::Data::FieldPtr RISImporter::fieldByTag(const QString& tag_) { + Data::FieldPtr f = 0; + const QString& fieldTag = (*s_tagMap)[tag_]; + if(!fieldTag.isEmpty()) { + f = m_coll->fieldByName(fieldTag); + if(f) { + f->setProperty(QString::fromLatin1("ris"), tag_); + return f; + } + } + + // add non-default fields if not already there + if(tag_== Latin1Literal("L1")) { + f = new Data::Field(QString::fromLatin1("pdf"), i18n("PDF"), Data::Field::URL); + f->setProperty(QString::fromLatin1("ris"), QString::fromLatin1("L1")); + f->setCategory(i18n("Miscellaneous")); + } + m_coll->addField(f); + return f; +} + +void RISImporter::slotCancel() { + m_cancelled = true; +} + +#include "risimporter.moc" diff --git a/src/translators/risimporter.h b/src/translators/risimporter.h new file mode 100644 index 0000000..c7d08d2 --- /dev/null +++ b/src/translators/risimporter.h @@ -0,0 +1,71 @@ +/*************************************************************************** + copyright : (C) 2004-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef RISIMPORTER_H +#define RISIMPORTER_H + +#include "importer.h" +#include "../datavectors.h" + +#include <qstring.h> +#include <qmap.h> + +template<class type> +class QDict; + +namespace Tellico { + namespace Data { + class Field; + } + namespace Import { + +/** + * @author Robby Stephenson + */ +class RISImporter : public Importer { +Q_OBJECT + +public: + /** + */ + RISImporter(const KURL::List& urls); + + /** + * @return A pointer to a @ref Data::Collection, or 0 if none can be created. + */ + virtual Data::CollPtr collection(); + /** + */ + virtual QWidget* widget(QWidget*, const char*) { return 0; } + virtual bool canImport(int type) const; + +public slots: + void slotCancel(); + +private: + static void initTagMap(); + static void initTypeMap(); + + Data::FieldPtr fieldByTag(const QString& tag); + void readURL(const KURL& url, int n, const QDict<Data::Field>& risFields); + + Data::CollPtr m_coll; + bool m_cancelled; + + static QMap<QString, QString>* s_tagMap; + static QMap<QString, QString>* s_typeMap; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/tellico_xml.cpp b/src/translators/tellico_xml.cpp new file mode 100644 index 0000000..8e7ac61 --- /dev/null +++ b/src/translators/tellico_xml.cpp @@ -0,0 +1,98 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "tellico_xml.h" + +#include <libxml/parserInternals.h> // needed for IS_LETTER +#include <libxml/parser.h> // has to be before valid.h +#include <libxml/valid.h> + +#include <qregexp.h> + +const QString Tellico::XML::nsXSL = QString::fromLatin1("http://www.w3.org/1999/XSL/Transform"); +const QString Tellico::XML::nsBibtexml = QString::fromLatin1("http://bibtexml.sf.net/"); +const QString Tellico::XML::dtdBibtexml = QString::fromLatin1("bibtexml.dtd"); + +/* + * VERSION 2 added namespaces, changed to multiple elements, + * and changed the "keywords" field to "keyword" + * + * VERSION 3 broke out the formatFlag, and changed NoComplete to AllowCompletion + * + * VERSION 4 added a bibtex-field name for Bibtex collections, element name was + * changed to 'entry', field elements changed to 'field', and boolean fields are now "true" + * + * VERSION 5 moved the bibtex-field and any other extended field property to property elements + * inside the field element, and added the image element. + * + * VERSION 6 added id, i18n attributes, and year, month, day elements in date fields with a calendar name + * attribute. + * + * VERSION 7 changed the application name to Tellico, renamed unitTitle to entryTitle, and made the id permanent. + * + * VERSION 8 added loans and saved filters. + * + * VERSION 9 changed music collections to always have three columns by default, with title/artist/length and + * added file catalog collection. + * + * VERSION 10 added the game board collection. + */ +const uint Tellico::XML::syntaxVersion = 10; +const QString Tellico::XML::nsTellico = QString::fromLatin1("http://periapsis.org/tellico/"); + +const QString Tellico::XML::nsBookcase = QString::fromLatin1("http://periapsis.org/bookcase/"); +const QString Tellico::XML::nsDublinCore = QString::fromLatin1("http://purl.org/dc/elements/1.1/"); +const QString Tellico::XML::nsZing = QString::fromLatin1("http://www.loc.gov/zing/srw/"); +const QString Tellico::XML::nsZingDiag = QString::fromLatin1("http://www.loc.gov/zing/srw/diagnostic/"); + +QString Tellico::XML::pubTellico(int version) { + return QString::fromLatin1("-//Robby Stephenson/DTD Tellico V%1.0//EN").arg(version); +} + +QString Tellico::XML::dtdTellico(int version) { + return QString::fromLatin1("http://periapsis.org/tellico/dtd/v%1/tellico.dtd").arg(version); +} + +bool Tellico::XML::validXMLElementName(const QString& name_) { + return xmlValidateNameValue((xmlChar *)name_.utf8().data()); +} + +QString Tellico::XML::elementName(const QString& name_) { + QString name = name_; + // change white space to dashes + name.replace(QRegExp(QString::fromLatin1("\\s+")), QString::fromLatin1("-")); + // first cut, if it passes, we're done + if(XML::validXMLElementName(name)) { + return name; + } + + // next check first characters IS_DIGIT is defined in libxml/vali.d + for(uint i = 0; i < name.length() && (!IS_LETTER(name[i].unicode()) || name[i] == '_'); ++i) { + name = name.mid(1); + } + if(name.isEmpty() || XML::validXMLElementName(name)) { + return name; // empty names are handled later + } + + // now brute-force it, one character at a time + uint i = 0; + while(i < name.length()) { + if(!XML::validXMLElementName(name.left(i+1))) { + name.remove(i, 1); // remember it's zero-indexed + } else { + // character is ok, increment i + ++i; + } + } + return name; +} diff --git a/src/translators/tellico_xml.h b/src/translators/tellico_xml.h new file mode 100644 index 0000000..7c1a3e2 --- /dev/null +++ b/src/translators/tellico_xml.h @@ -0,0 +1,41 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_XML_H +#define TELLICO_XML_H + +#include <qstring.h> + +namespace Tellico { + namespace XML { + extern const QString nsXSL; + extern const QString nsBibtexml; + extern const QString dtdBibtexml; + + extern const uint syntaxVersion; + extern const QString nsTellico; + + QString pubTellico(int version = syntaxVersion); + QString dtdTellico(int version = syntaxVersion); + + extern const QString nsBookcase; + extern const QString nsDublinCore; + extern const QString nsZing; + extern const QString nsZingDiag; + + bool validXMLElementName(const QString& name); + QString elementName(const QString& name); + } +} + +#endif diff --git a/src/translators/tellicoimporter.cpp b/src/translators/tellicoimporter.cpp new file mode 100644 index 0000000..cb3c7a3 --- /dev/null +++ b/src/translators/tellicoimporter.cpp @@ -0,0 +1,1021 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "tellicoimporter.h" +#include "tellico_xml.h" +#include "../collectionfactory.h" +#include "../collections/bibtexcollection.h" +#include "../entry.h" +#include "../field.h" +#include "../imagefactory.h" +#include "../image.h" +#include "../isbnvalidator.h" +#include "../latin1literal.h" +#include "../tellico_strings.h" +#include "../tellico_kernel.h" +#include "../tellico_utils.h" +#include "../tellico_debug.h" +#include "../progressmanager.h" + +#include <klocale.h> +#include <kmdcodec.h> +#include <kzip.h> +#include <kapplication.h> + +#include <qdom.h> +#include <qbuffer.h> +#include <qfile.h> +#include <qtimer.h> + +using Tellico::Import::TellicoImporter; + +bool TellicoImporter::versionConversion(uint from, uint to) { + // version 10 only added board games to version 9 + return from < to && (from != 9 || to != 10); +} + +TellicoImporter::TellicoImporter(const KURL& url_, bool loadAllImages_) : DataImporter(url_), + m_coll(0), m_loadAllImages(loadAllImages_), m_format(Unknown), m_modified(false), + m_cancelled(false), m_hasImages(false), m_buffer(0), m_zip(0), m_imgDir(0) { +} + +TellicoImporter::TellicoImporter(const QString& text_) : DataImporter(text_), + m_coll(0), m_loadAllImages(true), m_format(Unknown), m_modified(false), + m_cancelled(false), m_hasImages(false), m_buffer(0), m_zip(0), m_imgDir(0) { +} + +TellicoImporter::~TellicoImporter() { + if(m_zip) { + m_zip->close(); + } + delete m_zip; + m_zip = 0; + delete m_buffer; + m_buffer = 0; +} + +Tellico::Data::CollPtr TellicoImporter::collection() { + if(m_coll) { + return m_coll; + } + + QCString s; // read first 5 characters + if(source() == URL) { + if(!fileRef().open()) { + return 0; + } + QIODevice* f = fileRef().file(); + for(uint i = 0; i < 5; ++i) { + s += static_cast<char>(f->getch()); + } + f->reset(); + } else { + if(data().size() < 5) { + m_format = Error; + return 0; + } + s = QCString(data(), 6); + } + + // need to decide if the data is xml text, or a zip file + // if the first 5 characters are <?xml then treat it like text + if(s[0] == '<' && s[1] == '?' && s[2] == 'x' && s[3] == 'm' && s[4] == 'l') { + m_format = XML; + loadXMLData(source() == URL ? fileRef().file()->readAll() : data(), true); + } else { + m_format = Zip; + loadZipData(); + } + return m_coll; +} + +void TellicoImporter::loadXMLData(const QByteArray& data_, bool loadImages_) { + ProgressItem& item = ProgressManager::self()->newProgressItem(this, progressLabel(), true); + item.setTotalSteps(100); + connect(&item, SIGNAL(signalCancelled(ProgressItem*)), SLOT(slotCancel())); + ProgressItem::Done done(this); + + QDomDocument dom; + QString errorMsg; + int errorLine, errorColumn; + if(!dom.setContent(data_, true, &errorMsg, &errorLine, &errorColumn)) { + QString str = i18n(errorLoad).arg(url().fileName()) + QChar('\n'); + str += i18n("There is an XML parsing error in line %1, column %2.").arg(errorLine).arg(errorColumn); + str += QString::fromLatin1("\n"); + str += i18n("The error message from Qt is:"); + str += QString::fromLatin1("\n\t") + errorMsg; + myDebug() << str << endl; + setStatusMessage(str); + m_format = Error; + return; + } + + QDomElement root = dom.documentElement(); + + // the syntax version field name changed from "version" to "syntaxVersion" in version 3 + uint syntaxVersion; + if(root.hasAttribute(QString::fromLatin1("syntaxVersion"))) { + syntaxVersion = root.attribute(QString::fromLatin1("syntaxVersion")).toInt(); + } else if (root.hasAttribute(QString::fromLatin1("version"))) { + syntaxVersion = root.attribute(QString::fromLatin1("version")).toInt(); + } else { + if(!url().isEmpty()) { + setStatusMessage(i18n(errorLoad).arg(url().fileName())); + } + m_format = Error; + return; + } +// myDebug() << "TellicoImporter::loadXMLData() - syntaxVersion = " << syntaxVersion << endl; + + if((syntaxVersion > 6 && root.tagName() != Latin1Literal("tellico")) + || (syntaxVersion < 7 && root.tagName() != Latin1Literal("bookcase"))) { + if(!url().isEmpty()) { + setStatusMessage(i18n(errorLoad).arg(url().fileName())); + } + m_format = Error; + return; + } + + if(syntaxVersion > XML::syntaxVersion) { + if(!url().isEmpty()) { + QString str = i18n(errorLoad).arg(url().fileName()) + QChar('\n'); + str += i18n("It is from a future version of Tellico."); + myDebug() << str << endl; + setStatusMessage(str); + } else { + myDebug() << "Unable to load collection, from a future version (" << syntaxVersion << ")" << endl; + } + m_format = Error; + return; + } else if(versionConversion(syntaxVersion, XML::syntaxVersion)) { + // going form version 9 to 10, there's no conversion needed + QString str = i18n("Tellico is converting the file to a more recent document format. " + "Information loss may occur if an older version of Tellico is used " + "to read this file in the future."); + myDebug() << str << endl; +// setStatusMessage(str); + m_modified = true; // mark as modified + } + + m_namespace = syntaxVersion > 6 ? XML::nsTellico : XML::nsBookcase; + + // the collection item should be the first dom element child of the root + QDomElement collelem; + for(QDomNode n = root.firstChild(); !n.isNull(); n = n.nextSibling()) { + if(n.namespaceURI() != m_namespace) { + continue; + } + if(n.isElement() && n.localName() == Latin1Literal("collection")) { + collelem = n.toElement(); + break; + } + } + if(collelem.isNull()) { + kdWarning() << "TellicoImporter::loadDomDocument() - No collection item found." << endl; + return; + } + + QString title = collelem.attribute(QString::fromLatin1("title")); + + // be careful not to have element name collision + // for fields, each true field element is a child of a fields element + QDomNodeList fieldelems; + for(QDomNode n = collelem.firstChild(); !n.isNull(); n = n.nextSibling()) { + if(n.namespaceURI() != m_namespace) { + continue; + } + // Latin1Literal is a macro, so can't say Latin1Literal(syntaxVersion > 3 ? "fields" : "attributes") + if((syntaxVersion > 3 && n.localName() == Latin1Literal("fields")) + || (syntaxVersion < 4 && n.localName() == Latin1Literal("attributes"))) { + QDomElement e = n.toElement(); + fieldelems = e.elementsByTagNameNS(m_namespace, (syntaxVersion > 3) ? QString::fromLatin1("field") + : QString::fromLatin1("attribute")); + break; + } + } +// myDebug() << "TellicoImporter::loadXMLData() - " << fieldelems.count() << " field(s)" << endl; + + // the dilemma is when to force the new collection to have all the default attributes + // if there are no attributes or if the first one has the special name of _default + bool addFields = (fieldelems.count() == 0); + if(!addFields) { + QString name = fieldelems.item(0).toElement().attribute(QString::fromLatin1("name")); + addFields = (name == Latin1Literal("_default")); + // removeChild only works for immediate children + // remove _default field + if(addFields) { + fieldelems.item(0).parentNode().removeChild(fieldelems.item(0)); + } + } + + QString entryName; + // in syntax 4, the element name was changed to "entry", always, rather than depending on + // on the entryName of the collection. A type field was added to the collection element + // to specify what type of collection it is. + if(syntaxVersion > 3) { + entryName = QString::fromLatin1("entry"); + QString typeStr = collelem.attribute(QString::fromLatin1("type")); + Data::Collection::Type type = static_cast<Data::Collection::Type>(typeStr.toInt()); + m_coll = CollectionFactory::collection(type, addFields); + } else { + entryName = collelem.attribute(QString::fromLatin1("unit")); + m_coll = CollectionFactory::collection(entryName, addFields); + } + + if(!title.isEmpty()) { + m_coll->setTitle(title); + } + + for(uint j = 0; j < fieldelems.count(); ++j) { + readField(syntaxVersion, fieldelems.item(j).toElement()); + } + + if(m_coll->type() == Data::Collection::Bibtex) { + Data::BibtexCollection* c = static_cast<Data::BibtexCollection*>(m_coll.data()); + QDomNodeList macroelems; + for(QDomNode n = collelem.firstChild(); !n.isNull(); n = n.nextSibling()) { + if(n.namespaceURI() != m_namespace) { + continue; + } + if(n.localName() == Latin1Literal("macros")) { + macroelems = n.toElement().elementsByTagNameNS(m_namespace, QString::fromLatin1("macro")); + break; + } + } +// myDebug() << "TellicoImporter::loadXMLData() - found " << macroelems.count() << " macros" << endl; + for(uint j = 0; c && j < macroelems.count(); ++j) { + QDomElement elem = macroelems.item(j).toElement(); + c->addMacro(elem.attribute(QString::fromLatin1("name")), elem.text()); + } + + for(QDomNode n = collelem.firstChild(); !n.isNull(); n = n.nextSibling()) { + if(n.namespaceURI() != m_namespace) { + continue; + } + if(n.localName() == Latin1Literal("bibtex-preamble")) { + c->setPreamble(n.toElement().text()); + break; + } + } + } + + if(m_cancelled) { + m_coll = 0; + return; + } + +// as a special case, for old book collections with a bibtex-id field, convert to Bibtex + if(syntaxVersion < 4 && m_coll->type() == Data::Collection::Book + && m_coll->hasField(QString::fromLatin1("bibtex-id"))) { + m_coll = Data::BibtexCollection::convertBookCollection(m_coll); + } + + const uint count = collelem.childNodes().count(); + const uint stepSize = QMAX(s_stepSize, count/100); + const bool showProgress = options() & ImportProgress; + + item.setTotalSteps(count); + + // have to read images before entries so we can figure out if + // linkOnly() is true + // m_loadAllImages only pertains to zip files + QDomNodeList imgelems; + for(QDomNode n = collelem.firstChild(); !n.isNull(); n = n.nextSibling()) { + if(n.namespaceURI() != m_namespace) { + continue; + } + if(n.localName() == Latin1Literal("images")) { + imgelems = n.toElement().elementsByTagNameNS(m_namespace, QString::fromLatin1("image")); + break; + } + } + for(uint j = 0; j < imgelems.count(); ++j) { + readImage(imgelems.item(j).toElement(), loadImages_); + } + + if(m_cancelled) { + m_coll = 0; + return; + } + + uint j = 0; + for(QDomNode n = collelem.firstChild(); !n.isNull() && !m_cancelled; n = n.nextSibling(), ++j) { + if(n.namespaceURI() != m_namespace) { + continue; + } + if(n.localName() == entryName) { + readEntry(syntaxVersion, n.toElement()); + + // not exactly right, but close enough + if(showProgress && j%stepSize == 0) { + ProgressManager::self()->setProgress(this, j); + kapp->processEvents(); + } + } else { +// myDebug() << "...skipping " << n.localName() << " (" << n.namespaceURI() << ")" << endl; + } + } // end entry loop + + if(m_cancelled) { + m_coll = 0; + return; + } + + // filters and borrowers are at document root level, not collection + for(QDomNode n = root.firstChild(); !n.isNull() && !m_cancelled; n = n.nextSibling()) { + if(n.namespaceURI() != m_namespace) { + continue; + } + if(n.localName() == Latin1Literal("borrowers")) { + QDomNodeList borrowerElems = n.toElement().elementsByTagNameNS(m_namespace, QString::fromLatin1("borrower")); + for(uint j = 0; j < borrowerElems.count(); ++j) { + readBorrower(borrowerElems.item(j).toElement()); + } + } else if(n.localName() == Latin1Literal("filters")) { + QDomNodeList filterElems = n.toElement().elementsByTagNameNS(m_namespace, QString::fromLatin1("filter")); + for(uint j = 0; j < filterElems.count(); ++j) { + readFilter(filterElems.item(j).toElement()); + } + } + } + + // special for user, if using an older document format, add some nice new filters + if(syntaxVersion < 8) { + addDefaultFilters(); + } + + if(m_cancelled) { + m_coll = 0; + } +} + +void TellicoImporter::readField(uint syntaxVersion_, const QDomElement& elem_) { + // special case: if the i18n attribute equals true, then translate the title, description, and category + bool isI18n = elem_.attribute(QString::fromLatin1("i18n")) == Latin1Literal("true"); + + QString name = elem_.attribute(QString::fromLatin1("name"), QString::fromLatin1("unknown")); + QString title = elem_.attribute(QString::fromLatin1("title"), i18n("Unknown")); + if(isI18n) { + title = i18n(title.utf8()); + } + + QString typeStr = elem_.attribute(QString::fromLatin1("type"), QString::number(Data::Field::Line)); + Data::Field::Type type = static_cast<Data::Field::Type>(typeStr.toInt()); + + Data::FieldPtr field; + if(type == Data::Field::Choice) { + QStringList allowed = QStringList::split(QString::fromLatin1(";"), + elem_.attribute(QString::fromLatin1("allowed"))); + if(isI18n) { + for(QStringList::Iterator it = allowed.begin(); it != allowed.end(); ++it) { + (*it) = i18n((*it).utf8()); + } + } + field = new Data::Field(name, title, allowed); + } else { + field = new Data::Field(name, title, type); + } + + if(elem_.hasAttribute(QString::fromLatin1("category"))) { + // at one point, the categories had keyboard accels + QString cat = elem_.attribute(QString::fromLatin1("category")); + if(syntaxVersion_ < 9 && cat.find('&') > -1) { + cat.remove('&'); + } + if(isI18n) { + cat = i18n(cat.utf8()); + } + field->setCategory(cat); + } + + if(elem_.hasAttribute(QString::fromLatin1("flags"))) { + int flags = elem_.attribute(QString::fromLatin1("flags")).toInt(); + // I also changed the enum values for syntax 3, but the only custom field + // would have been bibtex-id + if(syntaxVersion_ < 3 && field->name() == Latin1Literal("bibtex-id")) { + flags = 0; + } + + // in syntax version 4, added a flag to disallow deleting attributes + // if it's a version before that and is the title, then add the flag + if(syntaxVersion_ < 4 && field->name() == Latin1Literal("title")) { + flags |= Data::Field::NoDelete; + } + field->setFlags(flags); + } + + QString formatStr = elem_.attribute(QString::fromLatin1("format"), QString::number(Data::Field::FormatNone)); + Data::Field::FormatFlag format = static_cast<Data::Field::FormatFlag>(formatStr.toInt()); + field->setFormatFlag(format); + + if(elem_.hasAttribute(QString::fromLatin1("description"))) { + QString desc = elem_.attribute(QString::fromLatin1("description")); + if(isI18n) { + desc = i18n(desc.utf8()); + } + field->setDescription(desc); + } + + if(syntaxVersion_ >= 5) { + QDomNodeList props = elem_.elementsByTagNameNS(m_namespace, QString::fromLatin1("prop")); + for(uint i = 0; i < props.count(); ++i) { + QDomElement e = props.item(i).toElement(); + field->setProperty(e.attribute(QString::fromLatin1("name")), e.text()); + } + // all track fields in music collections prior to version 9 get converted to three columns + if(syntaxVersion_ < 9) { + if(m_coll->type() == Data::Collection::Album && field->name() == Latin1Literal("track")) { + field->setProperty(QString::fromLatin1("columns"), QChar('3')); + field->setProperty(QString::fromLatin1("column1"), i18n("Title")); + field->setProperty(QString::fromLatin1("column2"), i18n("Artist")); + field->setProperty(QString::fromLatin1("column3"), i18n("Length")); + } else if(m_coll->type() == Data::Collection::Video && field->name() == Latin1Literal("cast")) { + field->setProperty(QString::fromLatin1("column1"), i18n("Actor/Actress")); + field->setProperty(QString::fromLatin1("column2"), i18n("Role")); + } + } + } else if(elem_.hasAttribute(QString::fromLatin1("bibtex-field"))) { + field->setProperty(QString::fromLatin1("bibtex"), elem_.attribute(QString::fromLatin1("bibtex-field"))); + } + + // Table2 is deprecated + if(field->type() == Data::Field::Table2) { + field->setType(Data::Field::Table); + field->setProperty(QString::fromLatin1("columns"), QChar('2')); + } + // for syntax 8, rating fields got their own type + if(syntaxVersion_ < 8) { + Data::Field::convertOldRating(field); // does all its own checking + } + m_coll->addField(field); +// myDebug() << QString(" Added field: %1, %2").arg(field->name()).arg(field->title()) << endl; +} + +void TellicoImporter::readEntry(uint syntaxVersion_, const QDomElement& entryElem_) { + const int id = entryElem_.attribute(QString::fromLatin1("id")).toInt(); + Data::EntryPtr entry; + if(id > 0) { + entry = new Data::Entry(m_coll, id); + } else { + entry = new Data::Entry(m_coll); + } + + bool oldMusic = (syntaxVersion_ < 9 && m_coll->type() == Data::Collection::Album); + + // iterate over all field value children + for(QDomNode node = entryElem_.firstChild(); !node.isNull(); node = node.nextSibling()) { + QDomElement elem = node.toElement(); + if(elem.isNull()) { + continue; + } + + bool isI18n = elem.attribute(QString::fromLatin1("i18n")) == Latin1Literal("true"); + + // Entry::setField checks to see if an field of 'name' is allowed + // in version 3 and prior, checkbox attributes had no text(), set it to "true" now + if(syntaxVersion_ < 4 && elem.text().isEmpty()) { + // "true" means checked + entry->setField(elem.localName(), QString::fromLatin1("true")); + continue; + } + + QString name = elem.localName(); + Data::FieldPtr f = m_coll->fieldByName(name); + + // if the first child of the node is a text node, just set the attribute text + // otherwise, recurse over the node's children + // this is the case for <authors><author>..</author></authors> + // but if there's nothing but white space, then it's a BaseNode for some reason +// if(node.firstChild().nodeType() == QDomNode::TextNode) { + if(f) { + // if it's a derived value, no field value is added + if(f->type() == Data::Field::Dependent) { + continue; + } + + // special case for Date fields + if(f->type() == Data::Field::Date) { + if(elem.hasChildNodes()) { + QString value; + QDomNode yNode = elem.elementsByTagNameNS(m_namespace, QString::fromLatin1("year")).item(0); + if(!yNode.isNull()) { + value += yNode.toElement().text(); + } + value += '-'; + QDomNode mNode = elem.elementsByTagNameNS(m_namespace, QString::fromLatin1("month")).item(0); + if(!mNode.isNull()) { + value += mNode.toElement().text(); + } + value += '-'; + QDomNode dNode = elem.elementsByTagNameNS(m_namespace, QString::fromLatin1("day")).item(0); + if(!dNode.isNull()) { + value += dNode.toElement().text(); + } + entry->setField(name, value); + } else { + // if no child nodes, the code will later assume the value to be the year + entry->setField(name, elem.text()); + } + // go to next value in loop + continue; + } + + // this may be a performance hit to be stripping white space all the time + // unfortunately, text() will include a carriage-return in cases like + // <value> + // text + // </value + // so we arbitrarily decide that only paragraphs get to have CRs? + QString value = elem.text(); + if(f->type() != Data::Field::Para) { + value = value.stripWhiteSpace(); + } + + if(value.isEmpty()) { + continue; + } + + if(f->type() == Data::Field::Image) { + // image info should have already been loaded + const Data::ImageInfo& info = ImageFactory::imageInfo(value); + // possible that value needs to be cleaned first in which case info is null + if(info.isNull() || !info.linkOnly) { + // for local files only, allow paths here + KURL u = KURL::fromPathOrURL(value); + if(u.isValid() && u.isLocalFile()) { + QString result = ImageFactory::addImage(u, false /* quiet */); + if(!result.isEmpty()) { + value = result; + } + } + value = Data::Image::idClean(value); + } + } + + // in version 8, old rating fields get changed + if(syntaxVersion_ < 8 && f->type() == Data::Field::Rating) { + bool ok; + uint i = Tellico::toUInt(value, &ok); + if(ok) { + value = QString::number(i); + } + } else if(syntaxVersion_ < 2 && name == Latin1Literal("keywords")) { + // in version 2, "keywords" changed to "keyword" + name = QString::fromLatin1("keyword"); + } + // special case: if the i18n attribute equals true, then translate the title, description, and category + if(isI18n) { + entry->setField(name, i18n(value.utf8())); + } else { + // special case for isbn fields, go ahead and validate + if(name == Latin1Literal("isbn")) { + const ISBNValidator val(0); + if(elem.attribute(QString::fromLatin1("validate")) != Latin1Literal("no")) { + val.fixup(value); + } + } + entry->setField(name, value); + } + } else { // if no field by the tag name, then it has children, iterate through them + // the field name has the final 's', so remove it + name.truncate(name.length() - 1); + f = m_coll->fieldByName(name); + + // if it's a derived value, no field value is added + if(!f || f->type() == Data::Field::Dependent) { + continue; + } + + const bool oldTracks = (oldMusic && name == Latin1Literal("track")); + + QStringList values; + // concatenate values + for(QDomNode childNode = node.firstChild(); !childNode.isNull(); childNode = childNode.nextSibling()) { + QString value; + // don't worry about i18n here, Tables are never translated + QDomNodeList cols = childNode.toElement().elementsByTagNameNS(m_namespace, QString::fromLatin1("column")); + if(cols.count() > 0) { + for(uint i = 0; i < cols.count(); ++i) { + // special case for old tracks + if(oldTracks && i == 1) { + // if the second column holds the track length, bump it to next column + QRegExp rx(QString::fromLatin1("\\d+:\\d\\d")); + if(rx.exactMatch(cols.item(i).toElement().text())) { + value += entry->field(QString::fromLatin1("artist")); + value += QString::fromLatin1("::"); + } + } + value += cols.item(i).toElement().text().stripWhiteSpace(); + if(i < cols.count()-1) { + value += QString::fromLatin1("::"); + } else if(oldTracks && cols.count() == 1) { + value += QString::fromLatin1("::"); + value += entry->field(QString::fromLatin1("artist")); + } + } + values += value; + } else { + // really loose here, we don't even check that the element name + // is what we think it is + QString s = childNode.toElement().text().stripWhiteSpace(); + if(isI18n && !s.isEmpty()) { + value += i18n(s.utf8()); + } else { + value += s; + } + if(oldTracks) { + value += QString::fromLatin1("::"); + value += entry->field(QString::fromLatin1("artist")); + } + if(values.findIndex(value) == -1) { + values += value; + } + } + } + entry->setField(name, values.join(QString::fromLatin1("; "))); + } + } // end field value loop + + m_coll->addEntries(entry); +} + +void TellicoImporter::readImage(const QDomElement& elem_, bool loadImage_) { + QString format = elem_.attribute(QString::fromLatin1("format")); + const bool link = elem_.attribute(QString::fromLatin1("link")) == Latin1Literal("true"); + QString id = shareString(link ? elem_.attribute(QString::fromLatin1("id")) + : Data::Image::idClean(elem_.attribute(QString::fromLatin1("id")))); + + bool readInfo = true; + if(loadImage_) { + QByteArray ba; + KCodecs::base64Decode(QCString(elem_.text().latin1()), ba); + if(!ba.isEmpty()) { + QString result = ImageFactory::addImage(ba, format, id); + if(result.isEmpty()) { + myDebug() << "TellicoImporter::readImage(XML) - null image for " << id << endl; + } + m_hasImages = true; + readInfo = false; + } + } + if(readInfo) { + // a width or height of 0 is ok here + int width = elem_.attribute(QString::fromLatin1("width")).toInt(); + int height = elem_.attribute(QString::fromLatin1("height")).toInt(); + Data::ImageInfo info(id, format.latin1(), width, height, link); + ImageFactory::cacheImageInfo(info); + } +} + +void TellicoImporter::readFilter(const QDomElement& elem_) { + FilterPtr f = new Filter(Filter::MatchAny); + f->setName(elem_.attribute(QString::fromLatin1("name"))); + + QString match = elem_.attribute(QString::fromLatin1("match")); + if(match == Latin1Literal("all")) { + f->setMatch(Filter::MatchAll); + } + + QDomNodeList rules = elem_.elementsByTagNameNS(m_namespace, QString::fromLatin1("rule")); + for(uint i = 0; i < rules.count(); ++i) { + QDomElement e = rules.item(i).toElement(); + if(e.isNull()) { + continue; + } + + QString field = e.attribute(QString::fromLatin1("field")); + // empty field means match any of them + QString pattern = e.attribute(QString::fromLatin1("pattern")); + // empty pattern is bad + if(pattern.isEmpty()) { + kdWarning() << "TellicoImporter::readFilter() - empty rule!" << endl; + continue; + } + QString function = e.attribute(QString::fromLatin1("function")).lower(); + FilterRule::Function func; + if(function == Latin1Literal("contains")) { + func = FilterRule::FuncContains; + } else if(function == Latin1Literal("notcontains")) { + func = FilterRule::FuncNotContains; + } else if(function == Latin1Literal("equals")) { + func = FilterRule::FuncEquals; + } else if(function == Latin1Literal("notequals")) { + func = FilterRule::FuncNotEquals; + } else if(function == Latin1Literal("regexp")) { + func = FilterRule::FuncRegExp; + } else if(function == Latin1Literal("notregexp")) { + func = FilterRule::FuncNotRegExp; + } else { + kdWarning() << "TellicoImporter::readFilter() - invalid rule function: " << function << endl; + continue; + } + f->append(new FilterRule(field, pattern, func)); + } + + if(!f->isEmpty()) { + m_coll->addFilter(f); + } +} + +void TellicoImporter::readBorrower(const QDomElement& elem_) { + QString name = elem_.attribute(QString::fromLatin1("name")); + QString uid = elem_.attribute(QString::fromLatin1("uid")); + Data::BorrowerPtr b = new Data::Borrower(name, uid); + + QDomNodeList loans = elem_.elementsByTagNameNS(m_namespace, QString::fromLatin1("loan")); + for(uint i = 0; i < loans.count(); ++i) { + QDomElement e = loans.item(i).toElement(); + if(e.isNull()) { + continue; + } + long id = e.attribute(QString::fromLatin1("entryRef")).toLong(); + Data::EntryPtr entry = m_coll->entryById(id); + if(!entry) { + myDebug() << "TellicoImporter::readBorrower() - no entry with id = " << id << endl; + continue; + } + QString uid = e.attribute(QString::fromLatin1("uid")); + QDate loanDate, dueDate; + QString s = e.attribute(QString::fromLatin1("loanDate")); + if(!s.isEmpty()) { + loanDate = QDate::fromString(s, Qt::ISODate); + } + s = e.attribute(QString::fromLatin1("dueDate")); + if(!s.isEmpty()) { + dueDate = QDate::fromString(s, Qt::ISODate); + } + Data::LoanPtr loan = new Data::Loan(entry, loanDate, dueDate, e.text()); + loan->setUID(uid); + b->addLoan(loan); + s = e.attribute(QString::fromLatin1("calendar")); + loan->setInCalendar(s == Latin1Literal("true")); + } + if(!b->isEmpty()) { + m_coll->addBorrower(b); + } +} + +void TellicoImporter::loadZipData() { + delete m_buffer; + delete m_zip; + if(source() == URL) { + m_buffer = 0; + m_zip = new KZip(fileRef().fileName()); + } else { + m_buffer = new QBuffer(data()); + m_zip = new KZip(m_buffer); + } + if(!m_zip->open(IO_ReadOnly)) { + setStatusMessage(i18n(errorLoad).arg(url().fileName())); + m_format = Error; + delete m_zip; + m_zip = 0; + delete m_buffer; + m_buffer = 0; + return; + } + + const KArchiveDirectory* dir = m_zip->directory(); + if(!dir) { + QString str = i18n(errorLoad).arg(url().fileName()) + QChar('\n'); + str += i18n("The file is empty."); + setStatusMessage(str); + m_format = Error; + m_zip->close(); + delete m_zip; + m_zip = 0; + delete m_buffer; + m_buffer = 0; + return; + } + + // main file was changed from bookcase.xml to tellico.xml as of version 0.13 + const KArchiveEntry* entry = dir->entry(QString::fromLatin1("tellico.xml")); + if(!entry) { + entry = dir->entry(QString::fromLatin1("bookcase.xml")); + } + if(!entry || !entry->isFile()) { + QString str = i18n(errorLoad).arg(url().fileName()) + QChar('\n'); + str += i18n("The file contains no collection data."); + setStatusMessage(str); + m_format = Error; + m_zip->close(); + delete m_zip; + m_zip = 0; + delete m_buffer; + m_buffer = 0; + return; + } + + const QByteArray xmlData = static_cast<const KArchiveFile*>(entry)->data(); + loadXMLData(xmlData, false); + if(!m_coll) { + m_format = Error; + m_zip->close(); + delete m_zip; + m_zip = 0; + delete m_buffer; + m_buffer = 0; + return; + } + + if(m_cancelled) { + m_zip->close(); + delete m_zip; + m_zip = 0; + delete m_buffer; + m_buffer = 0; + return; + } + + const KArchiveEntry* imgDirEntry = dir->entry(QString::fromLatin1("images")); + if(!imgDirEntry || !imgDirEntry->isDirectory()) { + m_zip->close(); + delete m_zip; + m_zip = 0; + delete m_buffer; + m_buffer = 0; + return; + } + m_imgDir = static_cast<const KArchiveDirectory*>(imgDirEntry); + m_images.clear(); + m_images.add(m_imgDir->entries()); + m_hasImages = !m_images.isEmpty(); + + // if all the images are not to be loaded, then we're done + if(!m_loadAllImages) { +// myLog() << "TellicoImporter::loadZipData() - delayed loading for " << m_images.count() << " images" << endl; + return; + } + + const QStringList images = static_cast<const KArchiveDirectory*>(imgDirEntry)->entries(); + const uint stepSize = QMAX(s_stepSize, images.count()/100); + + uint j = 0; + for(QStringList::ConstIterator it = images.begin(); !m_cancelled && it != images.end(); ++it, ++j) { + const KArchiveEntry* file = m_imgDir->entry(*it); + if(file && file->isFile()) { + ImageFactory::addImage(static_cast<const KArchiveFile*>(file)->data(), + (*it).section('.', -1).upper(), (*it)); + m_images.remove(*it); + } + if(j%stepSize == 0) { + kapp->processEvents(); + } + } + + if(m_images.isEmpty()) { + // give it some time + QTimer::singleShot(3000, this, SLOT(deleteLater())); + } +} + +bool TellicoImporter::loadImage(const QString& id_) { +// myLog() << "TellicoImporter::loadImage() - id = " << id_ << endl; + if(m_format != Zip || !m_imgDir) { + return false; + } + const KArchiveEntry* file = m_imgDir->entry(id_); + if(!file || !file->isFile()) { + return false; + } + QString newID = ImageFactory::addImage(static_cast<const KArchiveFile*>(file)->data(), + id_.section('.', -1).upper(), id_); + m_images.remove(id_); + if(m_images.isEmpty()) { + // give it some time + QTimer::singleShot(3000, this, SLOT(deleteLater())); + } + return !newID.isEmpty(); +} + +// static +bool TellicoImporter::loadAllImages(const KURL& url_) { + // only local files are allowed + if(url_.isEmpty() || !url_.isValid() || !url_.isLocalFile()) { +// myDebug() << "TellicoImporter::loadAllImages() - returning" << endl; + return false; + } + + // keep track of url for error reporting + static KURL u; + + KZip zip(url_.path()); + if(!zip.open(IO_ReadOnly)) { + if(u != url_) { + Kernel::self()->sorry(i18n(errorImageLoad).arg(url_.fileName())); + } + u = url_; + return false; + } + + const KArchiveDirectory* dir = zip.directory(); + if(!dir) { + if(u != url_) { + Kernel::self()->sorry(i18n(errorImageLoad).arg(url_.fileName())); + } + u = url_; + zip.close(); + return false; + } + + const KArchiveEntry* imgDirEntry = dir->entry(QString::fromLatin1("images")); + if(!imgDirEntry || !imgDirEntry->isDirectory()) { + zip.close(); + return false; + } + const QStringList images = static_cast<const KArchiveDirectory*>(imgDirEntry)->entries(); + for(QStringList::ConstIterator it = images.begin(); it != images.end(); ++it) { + const KArchiveEntry* file = static_cast<const KArchiveDirectory*>(imgDirEntry)->entry(*it); + if(file && file->isFile()) { + ImageFactory::addImage(static_cast<const KArchiveFile*>(file)->data(), + (*it).section('.', -1).upper(), (*it)); + } + } + zip.close(); + return true; +} + +void TellicoImporter::addDefaultFilters() { + switch(m_coll->type()) { + case Data::Collection::Book: + if(m_coll->hasField(QString::fromLatin1("read"))) { + FilterPtr f = new Filter(Filter::MatchAny); + f->setName(i18n("Unread Books")); + f->append(new FilterRule(QString::fromLatin1("read"), QString::fromLatin1("true"), FilterRule::FuncNotContains)); + m_coll->addFilter(f); + m_modified = true; + } + break; + + case Data::Collection::Video: + if(m_coll->hasField(QString::fromLatin1("year"))) { + FilterPtr f = new Filter(Filter::MatchAny); + f->setName(i18n("Old Movies")); + // old movies from before 1960 + f->append(new FilterRule(QString::fromLatin1("year"), QString::fromLatin1("19[012345]\\d"), FilterRule::FuncRegExp)); + m_coll->addFilter(f); + m_modified = true; + } + if(m_coll->hasField(QString::fromLatin1("widescreen"))) { + FilterPtr f = new Filter(Filter::MatchAny); + f->setName(i18n("Widescreen")); + f->append(new FilterRule(QString::fromLatin1("widescreen"), QString::fromLatin1("true"), FilterRule::FuncContains)); + m_coll->addFilter(f); + m_modified = true; + } + break; + + case Data::Collection::Album: + if(m_coll->hasField(QString::fromLatin1("year"))) { + FilterPtr f = new Filter(Filter::MatchAny); + f->setName(i18n("80's Music")); + f->append(new FilterRule(QString::fromLatin1("year"), QString::fromLatin1("198\\d"),FilterRule::FuncRegExp)); + m_coll->addFilter(f); + m_modified = true; + } + break; + + default: + break; + } + if(m_coll->hasField(QString::fromLatin1("rating"))) { + FilterPtr filter = new Filter(Filter::MatchAny); + filter->setName(i18n("Favorites")); + // check all the numbers, and use top 20% or so + Data::FieldPtr field = m_coll->fieldByName(QString::fromLatin1("rating")); + bool ok; + uint min = Tellico::toUInt(field->property(QString::fromLatin1("minimum")), &ok); + if(!ok) { + min = 1; + } + uint max = Tellico::toUInt(field->property(QString::fromLatin1("maximum")), &ok); + if(!ok) { + min = 5; + } + for(uint i = QMAX(min, static_cast<uint>(0.8*(max-min+1))); i <= max; ++i) { + filter->append(new FilterRule(QString::fromLatin1("rating"), QString::number(i), FilterRule::FuncContains)); + } + if(!filter->isEmpty()) { + m_coll->addFilter(filter); + m_modified = true; + } + } +} + +void TellicoImporter::slotCancel() { + m_cancelled = true; + m_format = Cancel; +} + +#include "tellicoimporter.moc" diff --git a/src/translators/tellicoimporter.h b/src/translators/tellicoimporter.h new file mode 100644 index 0000000..d4c6e13 --- /dev/null +++ b/src/translators/tellicoimporter.h @@ -0,0 +1,100 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICO_IMPORTER_H +#define TELLICO_IMPORTER_H + +class QBuffer; +class KZip; +class KArchiveDirectory; + +#include "dataimporter.h" +#include "../datavectors.h" +#include "../stringset.h" + +class QDomElement; + +namespace Tellico { + namespace Import { + +/** + * Reading the @ref Tellico data files is done by the TellicoImporter. + * + * @author Robby Stephenson + */ +class TellicoImporter : public DataImporter { +Q_OBJECT + +public: + enum Format { Unknown, Error, XML, Zip, Cancel }; + + /** + * @param url The tellico data file. + */ + TellicoImporter(const KURL& url, bool loadAllImages=true); + /** + * Constructor used to convert arbitrary text to a @ref Collection + * + * @param text The text + */ + TellicoImporter(const QString& text); + virtual ~TellicoImporter(); + + /** + * sometimes, a new document format might add data + */ + bool modifiedOriginal() const { return m_modified; } + + /** + */ + virtual Data::CollPtr collection(); + Format format() const { return m_format; } + + bool hasImages() const { return m_hasImages; } + bool loadImage(const QString& id_); + + static bool loadAllImages(const KURL& url); + +public slots: + void slotCancel(); + +private: + static bool versionConversion(uint from, uint to); + + void loadXMLData(const QByteArray& data, bool loadImages); + void loadZipData(); + + void readField(uint syntaxVersion, const QDomElement& elem); + void readEntry(uint syntaxVersion, const QDomElement& elem); + void readImage(const QDomElement& elem, bool loadImage); + void readFilter(const QDomElement& elem); + void readBorrower(const QDomElement& elem); + void addDefaultFilters(); + + Data::CollPtr m_coll; + bool m_loadAllImages; + QString m_namespace; + Format m_format; + bool m_modified : 1; + bool m_cancelled : 1; + bool m_hasImages : 1; + StringSet m_images; + + QBuffer* m_buffer; + KZip* m_zip; + const KArchiveDirectory* m_imgDir; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/tellicoxmlexporter.cpp b/src/translators/tellicoxmlexporter.cpp new file mode 100644 index 0000000..6335ed1 --- /dev/null +++ b/src/translators/tellicoxmlexporter.cpp @@ -0,0 +1,505 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "tellicoxmlexporter.h" +#include "../collections/bibtexcollection.h" +#include "../imagefactory.h" +#include "../image.h" +#include "../controller.h" // needed for getting groupView pointer +#include "../entryitem.h" +#include "../latin1literal.h" +#include "../filehandler.h" +#include "../groupiterator.h" +#include "../tellico_utils.h" +#include "../tellico_kernel.h" +#include "../tellico_debug.h" +#include "tellico_xml.h" +#include "../document.h" // needed for sorting groups +#include "../translators/bibtexhandler.h" // needed for cleaning text + +#include <klocale.h> +#include <kconfig.h> +#include <kmdcodec.h> +#include <kglobal.h> +#include <kcalendarsystem.h> + +#include <qlayout.h> +#include <qgroupbox.h> +#include <qcheckbox.h> +#include <qwhatsthis.h> +#include <qdom.h> +#include <qtextcodec.h> + +using Tellico::Export::TellicoXMLExporter; + +TellicoXMLExporter::TellicoXMLExporter() : Exporter(), + m_includeImages(false), m_includeGroups(false), m_widget(0) { + setOptions(options() | Export::ExportImages | Export::ExportImageSize); // not included by default +} + +TellicoXMLExporter::TellicoXMLExporter(Data::CollPtr coll) : Exporter(coll), + m_includeImages(false), m_includeGroups(false), m_widget(0) { + setOptions(options() | Export::ExportImages | Export::ExportImageSize); // not included by default +} + +QString TellicoXMLExporter::formatString() const { + return i18n("XML"); +} + +QString TellicoXMLExporter::fileFilter() const { + return i18n("*.xml|XML Files (*.xml)") + QChar('\n') + i18n("*|All Files"); +} + +bool TellicoXMLExporter::exec() { + QDomDocument doc = exportXML(); + if(doc.isNull()) { + return false; + } + return FileHandler::writeTextURL(url(), doc.toString(), + options() & ExportUTF8, + options() & Export::ExportForce); +} + +QDomDocument TellicoXMLExporter::exportXML() const { + // don't be hard on people with older versions. The only difference with DTD 10 was adding + // a board game collection, so use 9 still unless it's a board game + int exportVersion = (XML::syntaxVersion == 10 && collection()->type() != Data::Collection::BoardGame) + ? 9 + : XML::syntaxVersion; + + QDomImplementation impl; + QDomDocumentType doctype = impl.createDocumentType(QString::fromLatin1("tellico"), + XML::pubTellico(exportVersion), + XML::dtdTellico(exportVersion)); + //default namespace + const QString& ns = XML::nsTellico; + + QDomDocument dom = impl.createDocument(ns, QString::fromLatin1("tellico"), doctype); + + // root tellico element + QDomElement root = dom.documentElement(); + + QString encodeStr = QString::fromLatin1("version=\"1.0\" encoding=\""); + if(options() & Export::ExportUTF8) { + encodeStr += QString::fromLatin1("UTF-8"); + } else { + encodeStr += QString::fromLatin1(QTextCodec::codecForLocale()->mimeName()); + } + encodeStr += QChar('"'); + + // createDocument creates a root node, insert the processing instruction before it + dom.insertBefore(dom.createProcessingInstruction(QString::fromLatin1("xml"), encodeStr), root); + + root.setAttribute(QString::fromLatin1("syntaxVersion"), exportVersion); + + exportCollectionXML(dom, root, options() & Export::ExportFormatted); + + // clear image list + m_images.clear(); + + return dom; +} + +QString TellicoXMLExporter::exportXMLString() const { + return exportXML().toString(); +} + +void TellicoXMLExporter::exportCollectionXML(QDomDocument& dom_, QDomElement& parent_, bool format_) const { + Data::CollPtr coll = collection(); + if(!coll) { + kdWarning() << "TellicoXMLExporter::exportCollectionXML() - no collection pointer!" << endl; + return; + } + + QDomElement collElem = dom_.createElement(QString::fromLatin1("collection")); + collElem.setAttribute(QString::fromLatin1("type"), coll->type()); + collElem.setAttribute(QString::fromLatin1("title"), coll->title()); + + QDomElement fieldsElem = dom_.createElement(QString::fromLatin1("fields")); + collElem.appendChild(fieldsElem); + + Data::FieldVec fields = coll->fields(); + for(Data::FieldVec::Iterator fIt = fields.begin(); fIt != fields.end(); ++fIt) { + exportFieldXML(dom_, fieldsElem, fIt); + } + + if(coll->type() == Data::Collection::Bibtex) { + const Data::BibtexCollection* c = static_cast<const Data::BibtexCollection*>(coll.data()); + if(!c->preamble().isEmpty()) { + QDomElement preElem = dom_.createElement(QString::fromLatin1("bibtex-preamble")); + preElem.appendChild(dom_.createTextNode(c->preamble())); + collElem.appendChild(preElem); + } + + QDomElement macrosElem = dom_.createElement(QString::fromLatin1("macros")); + for(StringMap::ConstIterator macroIt = c->macroList().constBegin(); macroIt != c->macroList().constEnd(); ++macroIt) { + if(!macroIt.data().isEmpty()) { + QDomElement macroElem = dom_.createElement(QString::fromLatin1("macro")); + macroElem.setAttribute(QString::fromLatin1("name"), macroIt.key()); + macroElem.appendChild(dom_.createTextNode(macroIt.data())); + macrosElem.appendChild(macroElem); + } + } + if(macrosElem.childNodes().count() > 0) { + collElem.appendChild(macrosElem); + } + } + + Data::EntryVec evec = entries(); + for(Data::EntryVec::Iterator entry = evec.begin(); entry != evec.end(); ++entry) { + exportEntryXML(dom_, collElem, entry, format_); + } + + if(!m_images.isEmpty() && (options() & Export::ExportImages)) { + QDomElement imgsElem = dom_.createElement(QString::fromLatin1("images")); + collElem.appendChild(imgsElem); + const QStringList imageIds = m_images.toList(); + for(QStringList::ConstIterator it = imageIds.begin(); it != imageIds.end(); ++it) { + exportImageXML(dom_, imgsElem, *it); + } + } + + if(m_includeGroups) { + exportGroupXML(dom_, collElem); + } + + parent_.appendChild(collElem); + + // the borrowers and filters are in the tellico object, not the collection + if(options() & Export::ExportComplete) { + QDomElement bElem = dom_.createElement(QString::fromLatin1("borrowers")); + Data::BorrowerVec borrowers = coll->borrowers(); + for(Data::BorrowerVec::Iterator bIt = borrowers.begin(); bIt != borrowers.end(); ++bIt) { + exportBorrowerXML(dom_, bElem, bIt); + } + if(bElem.hasChildNodes()) { + parent_.appendChild(bElem); + } + + QDomElement fElem = dom_.createElement(QString::fromLatin1("filters")); + FilterVec filters = coll->filters(); + for(FilterVec::Iterator fIt = filters.begin(); fIt != filters.end(); ++fIt) { + exportFilterXML(dom_, fElem, fIt); + } + if(fElem.hasChildNodes()) { + parent_.appendChild(fElem); + } + } +} + +void TellicoXMLExporter::exportFieldXML(QDomDocument& dom_, QDomElement& parent_, Data::FieldPtr field_) const { + QDomElement elem = dom_.createElement(QString::fromLatin1("field")); + + elem.setAttribute(QString::fromLatin1("name"), field_->name()); + elem.setAttribute(QString::fromLatin1("title"), field_->title()); + elem.setAttribute(QString::fromLatin1("category"), field_->category()); + elem.setAttribute(QString::fromLatin1("type"), field_->type()); + elem.setAttribute(QString::fromLatin1("flags"), field_->flags()); + elem.setAttribute(QString::fromLatin1("format"), field_->formatFlag()); + + if(field_->type() == Data::Field::Choice) { + elem.setAttribute(QString::fromLatin1("allowed"), field_->allowed().join(QString::fromLatin1(";"))); + } + + // only save description if it's not equal to title, which is the default + // title is never empty, so this indirectly checks for empty descriptions + if(field_->description() != field_->title()) { + elem.setAttribute(QString::fromLatin1("description"), field_->description()); + } + + for(StringMap::ConstIterator it = field_->propertyList().begin(); it != field_->propertyList().end(); ++it) { + if(it.data().isEmpty()) { + continue; + } + QDomElement e = dom_.createElement(QString::fromLatin1("prop")); + e.setAttribute(QString::fromLatin1("name"), it.key()); + e.appendChild(dom_.createTextNode(it.data())); + elem.appendChild(e); + } + + parent_.appendChild(elem); +} + +void TellicoXMLExporter::exportEntryXML(QDomDocument& dom_, QDomElement& parent_, Data::EntryPtr entry_, bool format_) const { + QDomElement entryElem = dom_.createElement(QString::fromLatin1("entry")); + entryElem.setAttribute(QString::fromLatin1("id"), entry_->id()); + + // iterate through every field for the entry + Data::FieldVec fields = entry_->collection()->fields(); + for(Data::FieldVec::Iterator fIt = fields.begin(); fIt != fields.end(); ++fIt) { + QString fieldName = fIt->name(); + + // Date fields are special, don't format in export + QString fieldValue = (format_ && fIt->type() != Data::Field::Date) ? entry_->formattedField(fieldName) + : entry_->field(fieldName); + if(options() & ExportClean) { + BibtexHandler::cleanText(fieldValue); + } + + // if empty, then no field element is added and just continue + if(fieldValue.isEmpty()) { + continue; + } + + // optionally, verify images exist + if(fIt->type() == Data::Field::Image && (options() & Export::ExportVerifyImages)) { + if(!ImageFactory::validImage(fieldValue)) { + myDebug() << "TellicoXMLExporter::exportEntryXML() - entry: " << entry_->title() << endl; + myDebug() << "TellicoXMLExporter::exportEntryXML() - skipping image: " << fieldValue << endl; + continue; + } + } + + // if multiple versions are allowed, split them into separate elements + if(fIt->flags() & Data::Field::AllowMultiple) { + // parent element if field contains multiple values, child of entryElem + // who cares about grammar, just add an 's' to the name + QDomElement parElem = dom_.createElement(fieldName + 's'); + entryElem.appendChild(parElem); + + // the space after the semi-colon is enforced when the field is set for the entry + QStringList fields = QStringList::split(QString::fromLatin1("; "), fieldValue, true); + for(QStringList::ConstIterator it = fields.begin(); it != fields.end(); ++it) { + // element for field value, child of either entryElem or ParentElem + QDomElement fieldElem = dom_.createElement(fieldName); + // special case for multi-column tables + int ncols = 0; + if(fIt->type() == Data::Field::Table) { + bool ok; + ncols = Tellico::toUInt(fIt->property(QString::fromLatin1("columns")), &ok); + if(!ok) { + ncols = 1; + } + } + if(ncols > 1) { + for(int col = 0; col < ncols; ++col) { + QDomElement elem; + elem = dom_.createElement(QString::fromLatin1("column")); + elem.appendChild(dom_.createTextNode((*it).section(QString::fromLatin1("::"), col, col))); + fieldElem.appendChild(elem); + } + } else { + fieldElem.appendChild(dom_.createTextNode(*it)); + } + parElem.appendChild(fieldElem); + } + } else { + QDomElement fieldElem = dom_.createElement(fieldName); + entryElem.appendChild(fieldElem); + // Date fields get special treatment + if(fIt->type() == Data::Field::Date) { + fieldElem.setAttribute(QString::fromLatin1("calendar"), KGlobal::locale()->calendar()->calendarName()); + QStringList s = QStringList::split('-', fieldValue, true); + if(s.count() > 0 && !s[0].isEmpty()) { + QDomElement e = dom_.createElement(QString::fromLatin1("year")); + fieldElem.appendChild(e); + e.appendChild(dom_.createTextNode(s[0])); + } + if(s.count() > 1 && !s[1].isEmpty()) { + QDomElement e = dom_.createElement(QString::fromLatin1("month")); + fieldElem.appendChild(e); + e.appendChild(dom_.createTextNode(s[1])); + } + if(s.count() > 2 && !s[2].isEmpty()) { + QDomElement e = dom_.createElement(QString::fromLatin1("day")); + fieldElem.appendChild(e); + e.appendChild(dom_.createTextNode(s[2])); + } + } else if(fIt->type() == Data::Field::URL && + fIt->property(QString::fromLatin1("relative")) == Latin1Literal("true") && + !url().isEmpty()) { + // if a relative URL and url() is not empty, change the value! + KURL old_url(Kernel::self()->URL(), fieldValue); + fieldElem.appendChild(dom_.createTextNode(KURL::relativeURL(url(), old_url))); + } else { + fieldElem.appendChild(dom_.createTextNode(fieldValue)); + } + } + + if(fIt->type() == Data::Field::Image) { + // possible to have more than one entry with the same image + // only want to include it in the output xml once + m_images.add(fieldValue); + } + } // end field loop + + parent_.appendChild(entryElem); +} + +void TellicoXMLExporter::exportImageXML(QDomDocument& dom_, QDomElement& parent_, const QString& id_) const { + if(id_.isEmpty()) { + myDebug() << "TellicoXMLExporter::exportImageXML() - empty image!" << endl; + return; + } +// myLog() << "TellicoXMLExporter::exportImageXML() - id = " << id_ << endl; + + QDomElement imgElem = dom_.createElement(QString::fromLatin1("image")); + if(m_includeImages) { + const Data::Image& img = ImageFactory::imageById(id_); + if(img.isNull()) { + myDebug() << "TellicoXMLExporter::exportImageXML() - null image - " << id_ << endl; + return; + } + imgElem.setAttribute(QString::fromLatin1("format"), img.format()); + imgElem.setAttribute(QString::fromLatin1("id"), img.id()); + imgElem.setAttribute(QString::fromLatin1("width"), img.width()); + imgElem.setAttribute(QString::fromLatin1("height"), img.height()); + if(img.linkOnly()) { + imgElem.setAttribute(QString::fromLatin1("link"), QString::fromLatin1("true")); + } + QCString imgText = KCodecs::base64Encode(img.byteArray()); + imgElem.appendChild(dom_.createTextNode(QString::fromLatin1(imgText))); + } else { + const Data::ImageInfo& info = ImageFactory::imageInfo(id_); + if(info.isNull()) { + return; + } + imgElem.setAttribute(QString::fromLatin1("format"), info.format); + imgElem.setAttribute(QString::fromLatin1("id"), info.id); + // only load the images to read the size if necessary + const bool loadImageIfNecessary = options() & Export::ExportImageSize; + imgElem.setAttribute(QString::fromLatin1("width"), info.width(loadImageIfNecessary)); + imgElem.setAttribute(QString::fromLatin1("height"), info.height(loadImageIfNecessary)); + if(info.linkOnly) { + imgElem.setAttribute(QString::fromLatin1("link"), QString::fromLatin1("true")); + } + } + parent_.appendChild(imgElem); +} + +void TellicoXMLExporter::exportGroupXML(QDomDocument& dom_, QDomElement& parent_) const { + Data::EntryVec vec = entries(); // need a copy for ::contains(); + bool exportAll = collection()->entries().count() == vec.count(); + // iterate over each group, which are the first children + for(GroupIterator gIt = Controller::self()->groupIterator(); gIt.group(); ++gIt) { + if(gIt.group()->isEmpty()) { + continue; + } + QDomElement groupElem = dom_.createElement(QString::fromLatin1("group")); + groupElem.setAttribute(QString::fromLatin1("title"), gIt.group()->groupName()); + // now iterate over all entry items in the group + Data::EntryVec sorted = Data::Document::self()->sortEntries(*gIt.group()); + for(Data::EntryVec::Iterator eIt = sorted.begin(); eIt != sorted.end(); ++eIt) { + if(!exportAll && !vec.contains(eIt)) { + continue; + } + QDomElement entryRefElem = dom_.createElement(QString::fromLatin1("entryRef")); + entryRefElem.setAttribute(QString::fromLatin1("id"), eIt->id()); + groupElem.appendChild(entryRefElem); + } + if(groupElem.hasChildNodes()) { + parent_.appendChild(groupElem); + } + } +} + +void TellicoXMLExporter::exportFilterXML(QDomDocument& dom_, QDomElement& parent_, FilterPtr filter_) const { + QDomElement filterElem = dom_.createElement(QString::fromLatin1("filter")); + filterElem.setAttribute(QString::fromLatin1("name"), filter_->name()); + + QString match = (filter_->op() == Filter::MatchAll) ? QString::fromLatin1("all") : QString::fromLatin1("any"); + filterElem.setAttribute(QString::fromLatin1("match"), match); + + for(QPtrListIterator<FilterRule> it(*filter_); it.current(); ++it) { + QDomElement ruleElem = dom_.createElement(QString::fromLatin1("rule")); + ruleElem.setAttribute(QString::fromLatin1("field"), it.current()->fieldName()); + ruleElem.setAttribute(QString::fromLatin1("pattern"), it.current()->pattern()); + switch(it.current()->function()) { + case FilterRule::FuncContains: + ruleElem.setAttribute(QString::fromLatin1("function"), QString::fromLatin1("contains")); + break; + case FilterRule::FuncNotContains: + ruleElem.setAttribute(QString::fromLatin1("function"), QString::fromLatin1("notcontains")); + break; + case FilterRule::FuncEquals: + ruleElem.setAttribute(QString::fromLatin1("function"), QString::fromLatin1("equals")); + break; + case FilterRule::FuncNotEquals: + ruleElem.setAttribute(QString::fromLatin1("function"), QString::fromLatin1("notequals")); + break; + case FilterRule::FuncRegExp: + ruleElem.setAttribute(QString::fromLatin1("function"), QString::fromLatin1("regexp")); + break; + case FilterRule::FuncNotRegExp: + ruleElem.setAttribute(QString::fromLatin1("function"), QString::fromLatin1("notregexp")); + break; + default: + kdWarning() << "TellicoXMLExporter::exportFilterXML() - no matching rule function!" << endl; + } + filterElem.appendChild(ruleElem); + } + + parent_.appendChild(filterElem); +} + +void TellicoXMLExporter::exportBorrowerXML(QDomDocument& dom_, QDomElement& parent_, + Data::BorrowerPtr borrower_) const { + if(borrower_->isEmpty()) { + return; + } + + QDomElement bElem = dom_.createElement(QString::fromLatin1("borrower")); + parent_.appendChild(bElem); + + bElem.setAttribute(QString::fromLatin1("name"), borrower_->name()); + bElem.setAttribute(QString::fromLatin1("uid"), borrower_->uid()); + + const Data::LoanVec& loans = borrower_->loans(); + for(Data::LoanVec::ConstIterator it = loans.constBegin(); it != loans.constEnd(); ++it) { + QDomElement lElem = dom_.createElement(QString::fromLatin1("loan")); + bElem.appendChild(lElem); + + lElem.setAttribute(QString::fromLatin1("uid"), it->uid()); + lElem.setAttribute(QString::fromLatin1("entryRef"), it->entry()->id()); + lElem.setAttribute(QString::fromLatin1("loanDate"), it->loanDate().toString(Qt::ISODate)); + lElem.setAttribute(QString::fromLatin1("dueDate"), it->dueDate().toString(Qt::ISODate)); + if(it->inCalendar()) { + lElem.setAttribute(QString::fromLatin1("calendar"), QString::fromLatin1("true")); + } + + lElem.appendChild(dom_.createTextNode(it->note())); + } +} + +QWidget* TellicoXMLExporter::widget(QWidget* parent_, const char* name_/*=0*/) { + if(m_widget && m_widget->parent() == parent_) { + return m_widget; + } + + m_widget = new QWidget(parent_, name_); + QVBoxLayout* l = new QVBoxLayout(m_widget); + + QGroupBox* box = new QGroupBox(1, Qt::Horizontal, i18n("Tellico XML Options"), m_widget); + l->addWidget(box); + + m_checkIncludeImages = new QCheckBox(i18n("Include images in XML document"), box); + m_checkIncludeImages->setChecked(m_includeImages); + QWhatsThis::add(m_checkIncludeImages, i18n("If checked, the images in the document will be included " + "in the XML stream as base64 encoded elements.")); + + return m_widget; +} + +void TellicoXMLExporter::readOptions(KConfig* config_) { + KConfigGroup group(config_, QString::fromLatin1("ExportOptions - %1").arg(formatString())); + m_includeImages = group.readBoolEntry("Include Images", m_includeImages); +} + +void TellicoXMLExporter::saveOptions(KConfig* config_) { + m_includeImages = m_checkIncludeImages->isChecked(); + + KConfigGroup group(config_, QString::fromLatin1("ExportOptions - %1").arg(formatString())); + group.writeEntry("Include Images", m_includeImages); +} + +#include "tellicoxmlexporter.moc" diff --git a/src/translators/tellicoxmlexporter.h b/src/translators/tellicoxmlexporter.h new file mode 100644 index 0000000..705c2dc --- /dev/null +++ b/src/translators/tellicoxmlexporter.h @@ -0,0 +1,80 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICOXMLEXPORTER_H +#define TELLICOXMLEXPORTER_H + +namespace Tellico { + class Filter; +} + +class QDomDocument; +class QDomElement; +class QCheckBox; + +#include "exporter.h" +#include "../stringset.h" + +namespace Tellico { + namespace Export { + +/** + * @author Robby Stephenson + */ +class TellicoXMLExporter : public Exporter { +Q_OBJECT + +public: + TellicoXMLExporter(); + TellicoXMLExporter(Data::CollPtr coll); + + virtual bool exec(); + virtual QString formatString() const; + virtual QString fileFilter() const; + + QDomDocument exportXML() const; + QString exportXMLString() const; + + void setIncludeImages(bool b) { m_includeImages = b; } + void setIncludeGroups(bool b) { m_includeGroups = b; } + + virtual QWidget* widget(QWidget*, const char*); + virtual void readOptions(KConfig* cfg); + virtual void saveOptions(KConfig* cfg); + + /** + * An integer indicating format version. + */ + static const unsigned syntaxVersion; + +private: + void exportCollectionXML(QDomDocument& doc, QDomElement& parent, bool format) const; + void exportFieldXML(QDomDocument& doc, QDomElement& parent, Data::FieldPtr field) const; + void exportEntryXML(QDomDocument& doc, QDomElement& parent, Data::EntryPtr entry, bool format) const; + void exportImageXML(QDomDocument& doc, QDomElement& parent, const QString& imageID) const; + void exportGroupXML(QDomDocument& doc, QDomElement& parent) const; + void exportFilterXML(QDomDocument& doc, QDomElement& parent, FilterPtr filter) const; + void exportBorrowerXML(QDomDocument& doc, QDomElement& parent, Data::BorrowerPtr borrower) const; + + // keep track of which images were written, since some entries could have same image + mutable StringSet m_images; + bool m_includeImages : 1; + bool m_includeGroups : 1; + + QWidget* m_widget; + QCheckBox* m_checkIncludeImages; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/tellicozipexporter.cpp b/src/translators/tellicozipexporter.cpp new file mode 100644 index 0000000..42e0e70 --- /dev/null +++ b/src/translators/tellicozipexporter.cpp @@ -0,0 +1,133 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "tellicozipexporter.h" +#include "tellicoxmlexporter.h" +#include "../collection.h" +#include "../imagefactory.h" +#include "../image.h" +#include "../filehandler.h" +#include "../stringset.h" +#include "../tellico_debug.h" +#include "../progressmanager.h" + +#include <klocale.h> +#include <kconfig.h> +#include <kzip.h> +#include <kapplication.h> + +#include <qdom.h> +#include <qbuffer.h> + +using Tellico::Export::TellicoZipExporter; + +QString TellicoZipExporter::formatString() const { + return i18n("Tellico Zip File"); +} + +QString TellicoZipExporter::fileFilter() const { + return i18n("*.tc *.bc|Tellico Files (*.tc)") + QChar('\n') + i18n("*|All Files"); +} + +bool TellicoZipExporter::exec() { + m_cancelled = false; + Data::CollPtr coll = collection(); + if(!coll) { + return false; + } + + // TODO: maybe need label? + ProgressItem& item = ProgressManager::self()->newProgressItem(this, QString::null, true); + item.setTotalSteps(100); + connect(&item, SIGNAL(signalCancelled(ProgressItem*)), SLOT(slotCancel())); + ProgressItem::Done done(this); + + TellicoXMLExporter exp; + exp.setEntries(entries()); + exp.setURL(url()); // needed in case of relative URL values + long opt = options(); + opt |= Export::ExportUTF8; // always export to UTF-8 + opt |= Export::ExportImages; // always list the images in the xml + opt &= ~Export::ExportProgress; // don't show progress for xml export + exp.setOptions(opt); + exp.setIncludeImages(false); // do not include the images themselves in XML + QCString xml = exp.exportXML().toCString(); // encoded in utf-8 + ProgressManager::self()->setProgress(this, 5); + + QByteArray data; + QBuffer buf(data); + + if(m_cancelled) { + return true; // intentionally cancelled + } + + KZip zip(&buf); + zip.open(IO_WriteOnly); + zip.writeFile(QString::fromLatin1("tellico.xml"), QString::null, QString::null, xml.length(), xml); + + if(m_includeImages) { + ProgressManager::self()->setProgress(this, 10); + // gonna be lazy and just increment progress every 3 images + // it might be less, might be more + uint j = 0; + const QString imagesDir = QString::fromLatin1("images/"); + StringSet imageSet; + Data::FieldVec imageFields = coll->imageFields(); + // already took 10%, only 90% left + const uint stepSize = QMAX(1, (coll->entryCount() * imageFields.count()) / 90); + for(Data::EntryVec::ConstIterator it = entries().begin(); it != entries().end() && !m_cancelled; ++it) { + for(Data::FieldVec::Iterator fIt = imageFields.begin(); fIt != imageFields.end(); ++fIt, ++j) { + const QString id = it->field(fIt); + if(id.isEmpty() || imageSet.has(id)) { + continue; + } + const Data::ImageInfo& info = ImageFactory::imageInfo(id); + if(info.linkOnly) { + myLog() << "TellicoZipExporter::exec() - not copying linked image: " << id << endl; + continue; + } + const Data::Image& img = ImageFactory::imageById(id); + // if no image, continue + if(img.isNull()) { + kdWarning() << "TellicoZipExporter::exec() - no image found for " << fIt->title() << " field" << endl; + kdWarning() << "...for the entry titled " << it->title() << endl; + continue; + } + QByteArray ba = img.byteArray(); +// myDebug() << "TellicoZipExporter::data() - adding image id = " << it->field(fIt) << endl; + zip.writeFile(imagesDir + id, QString::null, QString::null, ba.size(), ba); + imageSet.add(id); + if(j%stepSize == 0) { + ProgressManager::self()->setProgress(this, QMIN(10+j/stepSize, 99)); + kapp->processEvents(); + } + } + } + } else { + ProgressManager::self()->setProgress(this, 80); + } + + zip.close(); + if(m_cancelled) { + return true; + } + + bool success = FileHandler::writeDataURL(url(), data, options() & Export::ExportForce); + return success; +} + +void TellicoZipExporter::slotCancel() { + m_cancelled = true; +} + +#include "tellicozipexporter.moc" diff --git a/src/translators/tellicozipexporter.h b/src/translators/tellicozipexporter.h new file mode 100644 index 0000000..da167d5 --- /dev/null +++ b/src/translators/tellicozipexporter.h @@ -0,0 +1,50 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TELLICOZIPEXPORTER_H +#define TELLICOZIPEXPORTER_H + +#include "exporter.h" + +namespace Tellico { + namespace Export { + +/** + * @author Robby Stephenson + */ +class TellicoZipExporter : public Exporter { +Q_OBJECT + +public: + TellicoZipExporter() : Exporter(), m_includeImages(true), m_cancelled(false) {} + + virtual bool exec(); + virtual QString formatString() const; + virtual QString fileFilter() const; + + // no options + virtual QWidget* widget(QWidget*, const char*) { return 0; } + + void setIncludeImages(bool b) { m_includeImages = b; } + +public slots: + void slotCancel(); + +private: + bool m_includeImages : 1; + bool m_cancelled : 1; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/textimporter.cpp b/src/translators/textimporter.cpp new file mode 100644 index 0000000..3130a0f --- /dev/null +++ b/src/translators/textimporter.cpp @@ -0,0 +1,29 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "textimporter.h" +#include "../filehandler.h" + +using Tellico::Import::TextImporter; + +TextImporter::TextImporter(const KURL& url_, bool useUTF8_) + : Import::Importer(url_) { + if(url_.isValid()) { + setText(FileHandler::readTextFile(url_, false, useUTF8_)); + } +} + +TextImporter::TextImporter(const QString& text_) : Import::Importer(text_) { +} + +#include "textimporter.moc" diff --git a/src/translators/textimporter.h b/src/translators/textimporter.h new file mode 100644 index 0000000..c4500e5 --- /dev/null +++ b/src/translators/textimporter.h @@ -0,0 +1,42 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TEXTIMPORTER_H +#define TEXTIMPORTER_H + +#include "importer.h" + +namespace Tellico { + namespace Import { + +/** + * The TextImporter class is meant as an abstract class for any importer which reads text files. + * + * @author Robby Stephenson + */ +class TextImporter : public Importer { +Q_OBJECT + +public: + /** + * In the constructor, the contents of the file are read. + * + * @param url The file to be imported + */ + TextImporter(const KURL& url, bool useUTF8_=false); + TextImporter(const QString& text); +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/translators.h b/src/translators/translators.h new file mode 100644 index 0000000..c6c3bc3 --- /dev/null +++ b/src/translators/translators.h @@ -0,0 +1,77 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef TRANSLATORS_H +#define TRANSLATORS_H + +namespace Tellico { + namespace Import { + enum Format { + TellicoXML = 0, + Bibtex, + Bibtexml, + CSV, + XSLT, + AudioFile, + MODS, + Alexandria, + FreeDB, + RIS, + GCfilms, + FileListing, + GRS1, + AMC, + Griffith, + PDF, + Referencer, + Delicious + }; + + enum Action { + Replace, + Append, + Merge + }; + + enum Target { + None, + File, + Dir + }; + } + + namespace Export { + enum Format { + TellicoXML = 0, + TellicoZip, + Bibtex, + Bibtexml, + HTML, + CSV, + XSLT, + Text, + PilotDB, + Alexandria, + ONIX, + GCfilms + }; + + enum Target { + None, + File, + Dir + }; + } +} + +#endif diff --git a/src/translators/xmlimporter.cpp b/src/translators/xmlimporter.cpp new file mode 100644 index 0000000..ce345c4 --- /dev/null +++ b/src/translators/xmlimporter.cpp @@ -0,0 +1,72 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "xmlimporter.h" +#include "../filehandler.h" +#include "../collection.h" + +#include <klocale.h> + +using Tellico::Import::XMLImporter; + +XMLImporter::XMLImporter(const KURL& url_) : Import::Importer(url_) { + if(!url_.isEmpty() && url_.isValid()) { + m_dom = FileHandler::readXMLFile(url_, true); + } +} + +XMLImporter::XMLImporter(const QString& text_) : Import::Importer(text_) { + if(text_.isEmpty()) { + return; + } + setText(text_); +} + +XMLImporter::XMLImporter(const QByteArray& data_) : Import::Importer(KURL()) { + if(data_.isEmpty()) { + return; + } + + QString errorMsg; + int errorLine, errorColumn; + if(!m_dom.setContent(data_, true, &errorMsg, &errorLine, &errorColumn)) { + QString str = i18n("There is an XML parsing error in line %1, column %2.").arg(errorLine).arg(errorColumn); + str += QString::fromLatin1("\n"); + str += i18n("The error message from Qt is:"); + str += QString::fromLatin1("\n\t") + errorMsg; + setStatusMessage(str); + return; + } +} + +XMLImporter::XMLImporter(const QDomDocument& dom_) : Import::Importer(KURL()), m_dom(dom_) { +} + +void XMLImporter::setText(const QString& text_) { + Importer::setText(text_); + QString errorMsg; + int errorLine, errorColumn; + if(!m_dom.setContent(text_, true, &errorMsg, &errorLine, &errorColumn)) { + QString str = i18n("There is an XML parsing error in line %1, column %2.").arg(errorLine).arg(errorColumn); + str += QString::fromLatin1("\n"); + str += i18n("The error message from Qt is:"); + str += QString::fromLatin1("\n\t") + errorMsg; + setStatusMessage(str); + } +} + +Tellico::Data::CollPtr XMLImporter::collection() { + return 0; +} + +#include "xmlimporter.moc" diff --git a/src/translators/xmlimporter.h b/src/translators/xmlimporter.h new file mode 100644 index 0000000..743a1c1 --- /dev/null +++ b/src/translators/xmlimporter.h @@ -0,0 +1,74 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef XMLIMPORTER_H +#define XMLIMPORTER_H + +#include "importer.h" + +#include <qdom.h> + +namespace Tellico { + namespace Import { + +/** + * The XMLImporter class is meant as an abstract class for any importer which reads xml files. + * + * @author Robby Stephenson + */ +class XMLImporter : public Importer { +Q_OBJECT + +public: + /** + * In the constructor, the contents of the file are read. + * + * @param url The file to be imported + */ + XMLImporter(const KURL& url); + /** + * Imports xml text. + * + * @param text The text + */ + XMLImporter(const QString& text); + /** + * Imports xml text from a byte array. + * + * @param data The Data + */ + XMLImporter(const QByteArray& data); + XMLImporter(const QDomDocument& dom); + + virtual void setText(const QString& text); + + /** + * This class gets used as a utility XML loader. This should never get called, + * but cannot be abstract. + */ + virtual Data::CollPtr collection(); + + /** + * Returns the contents of the imported file. + * + * @return The file contents + */ + const QDomDocument& domDocument() const { return m_dom; } + +private: + QDomDocument m_dom; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/xsltexporter.cpp b/src/translators/xsltexporter.cpp new file mode 100644 index 0000000..54ca8aa --- /dev/null +++ b/src/translators/xsltexporter.cpp @@ -0,0 +1,80 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "xsltexporter.h" +#include "xslthandler.h" +#include "tellicoxmlexporter.h" +#include "../filehandler.h" + +#include <klocale.h> +#include <kurlrequester.h> + +#include <qlabel.h> +#include <qgroupbox.h> +#include <qlayout.h> +#include <qhbox.h> +#include <qdom.h> +#include <qwhatsthis.h> + +using Tellico::Export::XSLTExporter; + +XSLTExporter::XSLTExporter() : Export::Exporter(), + m_widget(0), + m_URLRequester(0) { +} + +QString XSLTExporter::formatString() const { + return i18n("XSLT"); +} + +QString XSLTExporter::fileFilter() const { + return i18n("*|All Files"); +} + + +bool XSLTExporter::exec() { + KURL u = m_URLRequester->url(); + if(u.isEmpty() || !u.isValid()) { + return QString::null; + } + // XSLTHandler handler(FileHandler::readXMLFile(url)); + XSLTHandler handler(u); + + TellicoXMLExporter exporter; + exporter.setEntries(entries()); + exporter.setOptions(options()); + QDomDocument dom = exporter.exportXML(); + return FileHandler::writeTextURL(url(), handler.applyStylesheet(dom.toString()), + options() & ExportUTF8, options() & Export::ExportForce); +} + +QWidget* XSLTExporter::widget(QWidget* parent_, const char* name_/*=0*/) { + if(m_widget && m_widget->parent() == parent_) { + return m_widget; + } + + m_widget = new QWidget(parent_, name_); + QVBoxLayout* l = new QVBoxLayout(m_widget); + + QGroupBox* group = new QGroupBox(1, Qt::Horizontal, i18n("XSLT Options"), m_widget); + l->addWidget(group); + + QHBox* box = new QHBox(group); + box->setSpacing(4); + (void) new QLabel(i18n("XSLT file:"), box); + m_URLRequester = new KURLRequester(box); + QWhatsThis::add(m_URLRequester, i18n("Choose the XSLT file used to transform the Tellico XML data.")); + + l->addStretch(1); + return m_widget; +} diff --git a/src/translators/xsltexporter.h b/src/translators/xsltexporter.h new file mode 100644 index 0000000..ae353d2 --- /dev/null +++ b/src/translators/xsltexporter.h @@ -0,0 +1,44 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef XSLTEXPORTER_H +#define XSLTEXPORTER_H + +class KURLRequester; + +#include "exporter.h" + +namespace Tellico { + namespace Export { + +/** + * @author Robby Stephenson + */ +class XSLTExporter : public Exporter { +public: + XSLTExporter(); + + virtual bool exec(); + virtual QString formatString() const; + virtual QString fileFilter() const; + + virtual QWidget* widget(QWidget* parent, const char* name=0); + +private: + QWidget* m_widget; + KURLRequester* m_URLRequester; +}; + + } // end namespace +} // end namespace +#endif diff --git a/src/translators/xslthandler.cpp b/src/translators/xslthandler.cpp new file mode 100644 index 0000000..e25eef5 --- /dev/null +++ b/src/translators/xslthandler.cpp @@ -0,0 +1,267 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "xslthandler.h" +#include "../latin1literal.h" +#include "../tellico_debug.h" +#include "../tellico_utils.h" + +#include <qdom.h> +#include <qtextcodec.h> + +#include <kurl.h> + +extern "C" { +#include <libxslt/xslt.h> +#include <libxslt/transform.h> +#include <libxslt/xsltutils.h> +#include <libxslt/extensions.h> + +#include <libexslt/exslt.h> +} + +// I don't want any network I/O at all +static const int xml_options = XML_PARSE_NOENT | XML_PARSE_NONET | XML_PARSE_NOCDATA; +static const int xslt_options = xml_options; + +/* some functions to pass to the XSLT libs */ +static int writeToQString(void* context, const char* buffer, int len) { + QString* t = static_cast<QString*>(context); + *t += QString::fromUtf8(buffer, len); + return len; +} + +static void closeQString(void* context) { + QString* t = static_cast<QString*>(context); + *t += QString::fromLatin1("\n"); +} + +using Tellico::XSLTHandler; + +XSLTHandler::XMLOutputBuffer::XMLOutputBuffer() : m_res(QString::null) { + m_buf = xmlOutputBufferCreateIO((xmlOutputWriteCallback)writeToQString, + (xmlOutputCloseCallback)closeQString, + &m_res, 0); + if(m_buf) { + m_buf->written = 0; + } else { + myDebug() << "XMLOutputBuffer::XMLOutputBuffer() - error writing output buffer!" << endl; + } +} + +XSLTHandler::XMLOutputBuffer::~XMLOutputBuffer() { + if(m_buf) { + xmlOutputBufferClose(m_buf); //also flushes + m_buf = 0; + } +} + +int XSLTHandler::s_initCount = 0; + +XSLTHandler::XSLTHandler(const QCString& xsltFile_) : + m_stylesheet(0), + m_docIn(0), + m_docOut(0) { + init(); + QString file = KURL::encode_string(QString::fromLocal8Bit(xsltFile_)); + if(!file.isEmpty()) { + xmlDocPtr xsltDoc = xmlReadFile(file.utf8(), NULL, xslt_options); + m_stylesheet = xsltParseStylesheetDoc(xsltDoc); + if(!m_stylesheet) { + myDebug() << "XSLTHandler::applyStylesheet() - null stylesheet pointer for " << xsltFile_ << endl; + } + } +} + +XSLTHandler::XSLTHandler(const KURL& xsltURL_) : + m_stylesheet(0), + m_docIn(0), + m_docOut(0) { + init(); + if(xsltURL_.isValid() && xsltURL_.isLocalFile()) { + xmlDocPtr xsltDoc = xmlReadFile(xsltURL_.encodedPathAndQuery().utf8(), NULL, xslt_options); + m_stylesheet = xsltParseStylesheetDoc(xsltDoc); + if(!m_stylesheet) { + myDebug() << "XSLTHandler::applyStylesheet() - null stylesheet pointer for " << xsltURL_.path() << endl; + } + } +} + +XSLTHandler::XSLTHandler(const QDomDocument& xsltDoc_, const QCString& xsltFile_, bool translate_) : + m_stylesheet(0), + m_docIn(0), + m_docOut(0) { + init(); + QString file = KURL::encode_string(QString::fromLocal8Bit(xsltFile_)); + if(!xsltDoc_.isNull() && !file.isEmpty()) { + setXSLTDoc(xsltDoc_, file.utf8(), translate_); + } +} + +XSLTHandler::~XSLTHandler() { + if(m_stylesheet) { + xsltFreeStylesheet(m_stylesheet); + } + + if(m_docIn) { + xmlFreeDoc(m_docIn); + } + + if(m_docOut) { + xmlFreeDoc(m_docOut); + } + + --s_initCount; + if(s_initCount == 0) { + xsltUnregisterExtModule(EXSLT_STRINGS_NAMESPACE); + xsltUnregisterExtModule(EXSLT_DYNAMIC_NAMESPACE); + xsltCleanupGlobals(); + xmlCleanupParser(); + } +} + +void XSLTHandler::init() { + if(s_initCount == 0) { + xmlSubstituteEntitiesDefault(1); + xmlLoadExtDtdDefaultValue = 0; + + // register all exslt extensions + exsltRegisterAll(); + } + ++s_initCount; + + m_params.clear(); +} + +void XSLTHandler::setXSLTDoc(const QDomDocument& dom_, const QCString& xsltFile_, bool translate_) { + bool utf8 = true; // XML defaults to utf-8 + + // need to find out if utf-8 or not + const QDomNodeList childs = dom_.childNodes(); + for(uint j = 0; j < childs.count(); ++j) { + if(childs.item(j).isProcessingInstruction()) { + QDomProcessingInstruction pi = childs.item(j).toProcessingInstruction(); + if(pi.data().lower().contains(QString::fromLatin1("encoding"))) { + if(!pi.data().lower().contains(QString::fromLatin1("utf-8"))) { + utf8 = false; +// } else { +// myDebug() << "XSLTHandler::setXSLTDoc() - PI = " << pi.data() << endl; + } + break; + } + } + } + + QString s; + if(translate_) { + s = Tellico::i18nReplace(dom_.toString(0 /* indent */)); + } else { + s = dom_.toString(); + } + + xmlDocPtr xsltDoc; + if(utf8) { + xsltDoc = xmlReadDoc(reinterpret_cast<xmlChar*>(s.utf8().data()), xsltFile_.data(), NULL, xslt_options); + } else { + xsltDoc = xmlReadDoc(reinterpret_cast<xmlChar*>(s.local8Bit().data()), xsltFile_.data(), NULL, xslt_options); + } + + if(m_stylesheet) { + xsltFreeStylesheet(m_stylesheet); + } + m_stylesheet = xsltParseStylesheetDoc(xsltDoc); + if(!m_stylesheet) { + myDebug() << "XSLTHandler::applyStylesheet() - null stylesheet pointer for " << xsltFile_ << endl; + } +// xmlFreeDoc(xsltDoc); // this causes a crash for some reason +} + +void XSLTHandler::addStringParam(const QCString& name_, const QCString& value_) { + QCString value = value_; + value.replace('\'', "'"); + addParam(name_, QCString("'") + value + QCString("'")); +} + +void XSLTHandler::addParam(const QCString& name_, const QCString& value_) { + m_params.insert(name_, value_); +// myDebug() << "XSLTHandler::addParam() - " << name_ << ":" << value_ << endl; +} + +void XSLTHandler::removeParam(const QCString& name_) { + m_params.remove(name_); +} + +const QCString& XSLTHandler::param(const QCString& name_) { + return m_params[name_]; +} + +QString XSLTHandler::applyStylesheet(const QString& text_) { + if(!m_stylesheet) { + myDebug() << "XSLTHandler::applyStylesheet() - null stylesheet pointer!" << endl; + return QString::null; + } + + m_docIn = xmlReadDoc(reinterpret_cast<xmlChar*>(text_.utf8().data()), NULL, NULL, xml_options); + + return process(); +} + +QString XSLTHandler::process() { + if(!m_docIn) { + myDebug() << "XSLTHandler::process() - error parsing input string!" << endl; + return QString::null; + } + + QMemArray<const char*> params(2*m_params.count() + 1); + params[0] = NULL; + QMap<QCString, QCString>::ConstIterator it = m_params.constBegin(); + QMap<QCString, QCString>::ConstIterator end = m_params.constEnd(); + for(uint i = 0; it != end; ++it) { + params[i ] = qstrdup(it.key()); + params[i+1] = qstrdup(it.data()); + params[i+2] = NULL; + i += 2; + } + // returns NULL on error + m_docOut = xsltApplyStylesheet(m_stylesheet, m_docIn, params.data()); + for(uint i = 0; i < 2*m_params.count(); ++i) { + delete[] params[i]; + } + if(!m_docOut) { + myDebug() << "XSLTHandler::applyStylesheet() - error applying stylesheet!" << endl; + return QString::null; + } + + XMLOutputBuffer output; + if(output.isValid()) { + int num_bytes = xsltSaveResultTo(output.buffer(), m_docOut, m_stylesheet); + if(num_bytes == -1) { + myDebug() << "XSLTHandler::applyStylesheet() - error saving output buffer!" << endl; + } + } + return output.result(); +} + +//static +QDomDocument& XSLTHandler::setLocaleEncoding(QDomDocument& dom_) { + const QDomNodeList childs = dom_.documentElement().childNodes(); + for(unsigned j = 0; j < childs.count(); ++j) { + if(childs.item(j).isElement() && childs.item(j).nodeName() == Latin1Literal("xsl:output")) { + QDomElement e = childs.item(j).toElement(); + const QString encoding = QString::fromLatin1(QTextCodec::codecForLocale()->name()); + e.setAttribute(QString::fromLatin1("encoding"), encoding); + break; + } + } + return dom_; +} diff --git a/src/translators/xslthandler.h b/src/translators/xslthandler.h new file mode 100644 index 0000000..f51b47c --- /dev/null +++ b/src/translators/xslthandler.h @@ -0,0 +1,112 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef XSLTHANDLER_H +#define XSLTHANDLER_H + +#include <qmap.h> + +extern "C" { +// for xmlDocPtr +#include <libxml/tree.h> +// for xsltStyleSheetPtr +#include <libxslt/xsltInternals.h> +} + +class KURL; +class QDomDocument; + +namespace Tellico { + +/** + * The XSLTHandler contains all the code which uses XSLT processing to generate HTML or to + * translate to other formats. + * + * @author Robby Stephenson + */ +class XSLTHandler { + +public: + class XMLOutputBuffer { + public: + XMLOutputBuffer(); + ~XMLOutputBuffer(); + bool isValid() const { return (m_buf != 0); } + xmlOutputBuffer* buffer() const { return m_buf; } + QString result() const { return m_res; } + private: + xmlOutputBuffer* m_buf; + QString m_res; + }; + + /** + * @param xsltFile The XSLT file + */ + XSLTHandler(const QCString& xsltFile); + /** + * @param xsltURL The XSLT URL + */ + XSLTHandler(const KURL& xsltURL); + /** + * @param xsltDoc The XSLT DOM document + * @param xsltFile The XSLT file, should be a url? + */ + XSLTHandler(const QDomDocument& xsltDoc, const QCString& xsltFile, bool translate=false); + /** + */ + ~XSLTHandler(); + + bool isValid() const { return (m_stylesheet != NULL); } + /** + * Set the XSLT text + * + * @param dom The XSLT DOM document + * @param xsltFile The XSLT file, should be a url? + */ + void setXSLTDoc(const QDomDocument& dom, const QCString& xsltFile, bool translate=false); + /** + * Adds a param + */ + void addParam(const QCString& name, const QCString& value); + /** + * Adds a string param + */ + void addStringParam(const QCString& name, const QCString& value); + void removeParam(const QCString& name); + const QCString& param(const QCString& name); + /** + * Processes text through the XSLT transformation. + * + * @param text The text to be transformed + * @param encodedUTF8 Whether the text is encoded in utf-8 or not + * @return The transformed text + */ + QString applyStylesheet(const QString& text); + + static QDomDocument& setLocaleEncoding(QDomDocument& dom); + +private: + void init(); + QString process(); + + xsltStylesheetPtr m_stylesheet; + xmlDocPtr m_docIn; + xmlDocPtr m_docOut; + + QMap<QCString, QCString> m_params; + + static int s_initCount; +}; + +} // end namespace +#endif diff --git a/src/translators/xsltimporter.cpp b/src/translators/xsltimporter.cpp new file mode 100644 index 0000000..67f1fd2 --- /dev/null +++ b/src/translators/xsltimporter.cpp @@ -0,0 +1,112 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "xsltimporter.h" +#include "xslthandler.h" +#include "tellicoimporter.h" +#include "../filehandler.h" +#include "../collection.h" + +#include <klocale.h> +#include <kurlrequester.h> + +#include <qhbox.h> +#include <qlabel.h> +#include <qlayout.h> +#include <qgroupbox.h> + +#include <memory> + +using Tellico::Import::XSLTImporter; + +namespace { + +static bool isUTF8(const KURL& url_) { + // read first line to check encoding + std::auto_ptr<Tellico::FileHandler::FileRef> ref(Tellico::FileHandler::fileRef(url_)); + if(!ref->isValid()) { + return false; + } + + ref->open(); + QTextStream stream(ref->file()); + QString line = stream.readLine().lower(); + return line.find(QString::fromLatin1("utf-8")) > 0; +} + +} + +// always use utf8 for xslt +XSLTImporter::XSLTImporter(const KURL& url_) : Tellico::Import::TextImporter(url_, isUTF8(url_)), + m_coll(0), + m_widget(0), + m_URLRequester(0) { +} + +Tellico::Data::CollPtr XSLTImporter::collection() { + if(m_coll) { + return m_coll; + } + + if(m_xsltURL.isEmpty()) { + // if there's also no widget, then something went wrong + if(!m_widget) { + setStatusMessage(i18n("A valid XSLT file is needed to import the file.")); + return 0; + } + m_xsltURL = m_URLRequester->url(); + } + if(m_xsltURL.isEmpty() || !m_xsltURL.isValid()) { + setStatusMessage(i18n("A valid XSLT file is needed to import the file.")); + return 0; + } + + XSLTHandler handler(m_xsltURL); + if(!handler.isValid()) { + setStatusMessage(i18n("Tellico encountered an error in XSLT processing.")); + return 0; + } +// kdDebug() << text() << endl; + QString str = handler.applyStylesheet(text()); +// kdDebug() << str << endl; + + Import::TellicoImporter imp(str); + m_coll = imp.collection(); + setStatusMessage(imp.statusMessage()); + return m_coll; +} + +QWidget* XSLTImporter::widget(QWidget* parent_, const char* name_) { + // if the url has already been set, then there's no widget + if(!m_xsltURL.isEmpty()) { + return 0; + } + + m_widget = new QWidget(parent_, name_); + QVBoxLayout* l = new QVBoxLayout(m_widget); + + QGroupBox* box = new QGroupBox(1, Qt::Vertical, i18n("XSLT Options"), m_widget); + l->addWidget(box); + + (void) new QLabel(i18n("XSLT file:"), box); + m_URLRequester = new KURLRequester(box); + + QString filter = i18n("*.xsl|XSL Files (*.xsl)") + QChar('\n'); + filter += i18n("*|All Files"); + m_URLRequester->setFilter(filter); + + l->addStretch(1); + return m_widget; +} + +#include "xsltimporter.moc" diff --git a/src/translators/xsltimporter.h b/src/translators/xsltimporter.h new file mode 100644 index 0000000..578b552 --- /dev/null +++ b/src/translators/xsltimporter.h @@ -0,0 +1,56 @@ +/*************************************************************************** + copyright : (C) 2003-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#ifndef XSLTIMPORTER_H +#define XSLTIMPORTER_H + +class KURLRequester; + +#include "textimporter.h" +#include "../datavectors.h" + +namespace Tellico { + namespace Import { + +/** + * The XSLTImporter class takes care of transforming XML data using an XSL stylesheet. + * + * @author Robby Stephenson + */ +class XSLTImporter : public TextImporter { +Q_OBJECT + +public: + /** + */ + XSLTImporter(const KURL& url); + + /** + */ + virtual Data::CollPtr collection(); + /** + */ + virtual QWidget* widget(QWidget* parent, const char* name=0); + void setXSLTURL(const KURL& url) { m_xsltURL = url; } + +private: + Data::CollPtr m_coll; + + QWidget* m_widget; + KURLRequester* m_URLRequester; + KURL m_xsltURL; +}; + + } // end namespace +} // end namespace +#endif |