summaryrefslogtreecommitdiffstats
path: root/kompare/libdiff2/levenshteintable.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'kompare/libdiff2/levenshteintable.cpp')
-rw-r--r--kompare/libdiff2/levenshteintable.cpp332
1 files changed, 332 insertions, 0 deletions
diff --git a/kompare/libdiff2/levenshteintable.cpp b/kompare/libdiff2/levenshteintable.cpp
new file mode 100644
index 00000000..54525aef
--- /dev/null
+++ b/kompare/libdiff2/levenshteintable.cpp
@@ -0,0 +1,332 @@
+/*******************************************************************************
+**
+** Filename : levenshteintable.cpp
+** Created on : 08 november, 2003
+** Copyright : (c) 2003 Otto Bruggeman
+** Email : bruggie@home.nl
+**
+*******************************************************************************/
+
+/*******************************************************************************
+**
+** This program is free software; you can redistribute it and/or modify
+** it under the terms of the GNU General Public License as published by
+** the Free Software Foundation; either version 2 of the License, or
+** (at your option) any later version.
+**
+*******************************************************************************/
+
+#include <iostream>
+
+#include <qstring.h>
+
+#include <kdebug.h>
+#include <kglobal.h>
+
+#include "levenshteintable.h"
+
+#include "difference.h"
+
+using namespace Diff2;
+
+LevenshteinTable::LevenshteinTable()
+ : m_width( 256 ),
+ m_height( 256 ),
+ m_size( m_height * m_width ),
+ m_table( new unsigned int[ m_size ] ),
+ m_source( 0 ),
+ m_destination( 0 )
+{
+}
+
+LevenshteinTable::LevenshteinTable( unsigned int width, unsigned int height )
+ : m_width( width ),
+ m_height( height ),
+ m_size( m_width * m_height ),
+ m_table( new unsigned int[ m_size ] ),
+ m_source( 0 ),
+ m_destination( 0 )
+{
+}
+
+LevenshteinTable::~LevenshteinTable()
+{
+ delete[] m_table;
+ m_source = 0;
+ m_destination = 0;
+}
+
+int LevenshteinTable::getContent( unsigned int posX, unsigned int posY ) const
+{
+// kdDebug(8101) << "Width = " << m_width << ", height = " << m_height << ", posX = " << posX << ", posY = " << posY << endl;
+ return m_table[ posY * m_width + posX ];
+}
+
+int LevenshteinTable::setContent( unsigned int posX, unsigned int posY, int value )
+{
+ m_table[ posY * m_width + posX ] = value;
+
+ return 0;
+}
+
+bool LevenshteinTable::setSize( unsigned int width, unsigned int height )
+{
+ // Set a limit of 16.7 million entries, will be about 64 MB of ram, that should be plenty
+ if ( ( ( width ) * ( height ) ) > ( 256 * 256 * 256 ) )
+ return false;
+
+ if ( ( ( width ) * ( height ) ) > m_size )
+ {
+ delete[] m_table;
+
+ m_size = width * height;
+ m_table = new unsigned int[ m_size ];
+ }
+
+ m_width = width;
+ m_height = height;
+
+ return true;
+}
+
+void LevenshteinTable::dumpLevenshteinTable()
+{
+ for ( unsigned int i = 0; i < m_height; ++i )
+ {
+ for ( unsigned int j = 0; j < m_width; ++j )
+ {
+ std::cout.width( 3 );
+ std::cout << getContent( j, i );
+ }
+ std::cout << std::endl;
+ }
+}
+
+unsigned int LevenshteinTable::createTable( DifferenceString* source, DifferenceString* destination )
+{
+ m_source = source;
+ m_destination = destination;
+
+ QString s = ' ' + source->string(); // Optimization, so i dont have to subtract 1 from the indexes every
+ QString d = ' ' + destination->string(); // single time and add 1 to the width and height of the table
+
+ unsigned int m = s.length();
+ unsigned int n = d.length();
+
+ const QChar* sq = s.unicode();
+ const QChar* dq = d.unicode();
+
+ if ( m == 1 )
+ return --n;
+
+ if ( n == 1 )
+ return --m;
+
+ if ( !setSize( m, n ) )
+ return 0;
+
+ unsigned int i;
+ unsigned int j;
+
+ // initialize first row
+ for ( i = 0; i < m; ++i )
+ setContent( i, 0, i );
+ // initialize first column
+ for ( j = 0; j < n; ++j )
+ setContent( 0, j, j );
+
+ int cost = 0, north = 0, west = 0, northwest = 0;
+
+ ushort si, dj;
+ // Optimization, calculate row wise instead of column wise, wont trash the cache so much with large strings
+ for ( j = 1; j < n; ++j )
+ {
+ dj = dq[ j ];
+
+ for ( i = 1; i < m; ++i )
+ {
+ si = sq[ i ];
+ if ( si == dj )
+ cost = 0;
+ else
+ cost = 1;
+
+ north = getContent( i, j-1 ) + 1;
+ west = getContent( i-1, j ) + 1;
+ northwest = getContent( i-1, j-1 ) + cost;
+
+ setContent( i, j, kMin( north, kMin( west, northwest ) ) );
+ }
+ }
+
+ return getContent( m-1, n-1 );
+}
+
+int LevenshteinTable::chooseRoute( int c1, int c2, int c3 )
+{
+// kdDebug(8101) << "c1 = " << c1 << ", c2 = " << c2 << ", c3 = " << c3 << endl;
+ // preference order: c2, c3, c1, hopefully this will work out for me
+ if ( c2 <= c1 && c2 <= c3 )
+ return 1;
+
+ if ( c3 <= c2 && c3 <= c1 )
+ return 2;
+
+ return 0;
+}
+
+void LevenshteinTable::createListsOfMarkers()
+{
+// std::cout << source.latin1() << std::endl;
+// std::cout << destination.latin1() << std::endl;
+// dumpLevenshteinTable();
+
+ unsigned int x = m_width-1;
+ unsigned int y = m_height-1;
+
+ Marker* c = 0;
+
+ int n, nw, w, direction, currentValue;
+ while ( x > 0 && y > 0 )
+ {
+ currentValue = getContent( x, y );
+
+ nw = getContent( x - 1, y - 1 );
+ n = getContent( x, y - 1 );
+ w = getContent( x - 1, y );
+
+ direction = chooseRoute( n, nw, w );
+
+ switch ( direction )
+ {
+ case 0: // north
+// kdDebug(8101) << "Picking north" << endl;
+// kdDebug(8101) << "Source[" << ( x - 1 ) << "] = " << QString( source[ x-1 ] ) << ", destination[" << ( y - 1 ) << "] = " << QString( destination[ y-1 ] ) << endl;
+
+ if ( !m_destination->markerList().isEmpty() )
+ c = m_destination->markerList().first();
+ else
+ c = 0;
+
+ if ( c && c->type() == Marker::End )
+ {
+// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
+ if ( n == currentValue )
+ m_destination->prepend( new Marker( Marker::Start, y ) );
+ // else: the change continues, dont do anything
+ }
+ else
+ {
+// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
+ if ( n < currentValue )
+ m_destination->prepend( new Marker( Marker::End, y ) );
+ }
+
+ --y;
+ break;
+ case 1: // northwest
+// kdDebug(8101) << "Picking northwest" << endl;
+// kdDebug(8101) << "Source[" << ( x - 1 ) << "] = " << QString( source[ x-1 ] ) << ", destination[" << ( y - 1 ) << "] = " << QString( destination[ y-1 ] ) << endl;
+
+ if ( !m_destination->markerList().isEmpty() )
+ c = m_destination->markerList().first();
+ else
+ c = 0;
+
+ if ( c && c->type() == Marker::End )
+ {
+// kdDebug(8101) << "End found: CurrentValue: " << currentValue << endl;
+ if ( nw == currentValue )
+ m_destination->prepend( new Marker( Marker::Start, y ) );
+ // else: the change continues, dont do anything
+ }
+ else
+ {
+// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
+ if ( nw < currentValue )
+ m_destination->prepend( new Marker( Marker::End, y ) );
+ }
+
+ if ( !m_source->markerList().isEmpty() )
+ c = m_source->markerList().first();
+ else
+ c = 0;
+
+ if ( c && c->type() == Marker::End )
+ {
+// kdDebug(8101) << "End found: CurrentValue: " << currentValue << endl;
+ if ( nw == currentValue )
+ m_source->prepend( new Marker( Marker::Start, x ) );
+ // else: the change continues, dont do anything
+ }
+ else
+ {
+// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
+ if ( nw < currentValue )
+ m_source->prepend( new Marker( Marker::End, x ) );
+ }
+
+ --y;
+ --x;
+ break;
+ case 2: // west
+// kdDebug(8101) << "Picking west" << endl;
+// kdDebug(8101) << "Source[" << ( x - 1 ) << "] = " << QString( source[ x-1 ] ) << ", destination[" << ( y - 1 ) << "] = " << QString( destination[ y-1 ] ) << endl;
+
+ if ( !m_source->markerList().isEmpty() )
+ c = m_source->markerList().first();
+ else
+ c = 0;
+
+ if ( c && c->type() == Marker::End )
+ {
+// kdDebug(8101) << "End found: CurrentValue: " << currentValue << endl;
+ if ( w == currentValue )
+ m_source->prepend( new Marker( Marker::Start, x ) );
+ // else: the change continues, dont do anything
+ }
+ else
+ {
+// kdDebug(8101) << "CurrentValue: " << currentValue << endl;
+ if ( w < currentValue )
+ m_source->prepend( new Marker( Marker::End, x ) );
+ }
+
+ --x;
+ break;
+ }
+ }
+
+// kdDebug(8101) << "Source string: " << m_source->string() << endl;
+// c = m_source->markerList()->first();
+// QStringList list;
+// unsigned int prevValue = 0;
+// for ( ; c; c = m_source->markerList()->next() )
+// {
+// kdDebug(8101) << "Source Marker Entry : Type: " << c->type() << ", Offset: " << c->offset() << endl;
+// list.append( m_source->string().mid( prevValue, c->offset() - prevValue ) );
+// prevValue = c->offset();
+// }
+// if ( prevValue < m_source->string().length() - 1 )
+// {
+// list.append( m_source->string().mid( prevValue, m_source->string().length() - prevValue ) );
+// }
+// kdDebug(8101) << "Source Resulting stringlist : " << list.join("\n") << endl;
+
+// list.clear();
+// prevValue = 0;
+
+// kdDebug(8101) << "Destination string: " << m_destination->string() << endl;
+// for ( ; c; c = m_destination->markerList()->next() )
+// {
+// kdDebug(8101) << "Destination Marker Entry : Type: " << c->type() << ", Offset: " << c->offset() << endl;
+// list.append( m_destination->string().mid( prevValue, c->offset() - prevValue ) );
+// prevValue = c->offset();
+// }
+// if ( prevValue < m_destination->string().length() - 1 )
+// {
+// list.append( m_destination->string().mid( prevValue, m_destination->string().length() - prevValue ) );
+// }
+// kdDebug(8101) << "Destination Resulting string : " << list.join("\n") << endl;
+}
+