summaryrefslogtreecommitdiffstats
path: root/src/findduplicates.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/findduplicates.h')
-rw-r--r--src/findduplicates.h85
1 files changed, 85 insertions, 0 deletions
diff --git a/src/findduplicates.h b/src/findduplicates.h
new file mode 100644
index 0000000..e1a2fce
--- /dev/null
+++ b/src/findduplicates.h
@@ -0,0 +1,85 @@
+/***************************************************************************
+ * Copyright (C) 2004-2009 by Thomas Fischer *
+ * fischer@unix-ag.uni-kl.de *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
+#ifndef KBIBTEXFINDDUPLICATES_H
+#define KBIBTEXFINDDUPLICATES_H
+
+#include <qobject.h>
+
+class KProgressDialog;
+class QWidget;
+
+namespace KBibTeX
+{
+
+ /**
+ @author Thomas Fischer <fischer@unix-ag.uni-kl.de>
+ */
+ class FindDuplicates : public QObject
+ {
+ Q_OBJECT
+ public:
+ typedef QValueList<BibTeX::Element*> DuplicateClique;
+ typedef QValueList<DuplicateClique> DuplicateCliqueList;
+
+ /**
+ * Find duplicates in a given BibTeX file. The sensitivity parameter controls the distance between two elements where both elements are considered to be duplicates. The parent object is used as a progress dialog's parent.
+ * @param file
+ * @param sensitivity
+ * @param parent
+ * @return
+ */
+ FindDuplicates( DuplicateCliqueList &result, unsigned int sensitivity, BibTeX::File *file, QWidget *parent );
+
+ ~FindDuplicates();
+
+ /**
+ * Maximum sensitivity
+ */
+ static const unsigned int maxDistance;
+
+ protected:
+ void determineDistances( BibTeX::File *file, unsigned int *distVector, QMap<BibTeX::Element*, int> &mapElementToIndex, KProgressDialog *progDlg );
+ void buildClique( DuplicateCliqueList &result, BibTeX::File *file, unsigned int *distVector, QMap<BibTeX::Element*, int> &mapElementToIndex, unsigned int sensitivity );
+ unsigned int entryDistance( BibTeX::Entry *entryA, BibTeX::Entry *entryB );
+ unsigned int macroDistance( BibTeX::Macro *macroA, BibTeX::Macro *macroB );
+ unsigned int preambleDistance( BibTeX::Preamble *preambleA, BibTeX::Preamble *preambleB );
+
+ static QString extractTitle( BibTeX::Entry *entry );
+ static QStringList authorsLastName( BibTeX::Entry *entry );
+ static int extractYear( BibTeX::Entry *entry );
+ static QString extractMacroKey( BibTeX::Macro *macro );
+ static QString extractMacroValue( BibTeX::Macro *macro );
+
+ private:
+ bool m_doCancel;
+
+ double levenshteinDistance( const QStringList &s, const QStringList &t );
+ double levenshteinDistance( const QString &s, const QString &t );
+ double levenshteinDistanceWord( const QString &s, const QString &t );
+ int arrayOffset( int a, int b );
+ void sort( unsigned int *array, int len );
+
+ private slots:
+ void slotCancel();
+ };
+
+}
+
+#endif