summaryrefslogtreecommitdiffstats
path: root/lib/interfaces/hashedstring.h
blob: c1bfd4ad88a493660eca126d77e4c4934ada00b0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/***************************************************************************
   copyright            : (C) 2006 by David Nolden
   email                : david.nolden.tdevelop@art-master.de
***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#ifndef HASHED_STRING_H
#define HASHED_STRING_H

#include<tqstring.h>
#include<tqdatastream.h>
#include<ksharedptr.h>
#include<set>
#include <ext/hash_map>
#include <string>

///A simple class that stores a string together with it's appropriate hash-key
class HashedString {
  public:
    HashedString() : m_hash( 0 ) {}

    HashedString( const TQString& str ) : m_str( str ) {
      initHash();
    }
    
    HashedString( const char* str ) : m_str( str ) {
      initHash();
    }

    inline size_t hash() const {
      return m_hash;
    }

    TQString str() const {
      return m_str;
    }

    bool operator == ( const HashedString& rhs ) const {
      if ( m_hash != rhs.m_hash )
        return false;
      return m_str == rhs.m_str;
    }

    ///Does not compare alphabetically, uses the hash-key for ordering.
    bool operator < ( const HashedString& rhs ) const {
      if ( m_hash < rhs.m_hash )
        return true;
      if ( m_hash == rhs.m_hash )
        return m_str < rhs.m_str;
      return false;
    }

    static size_t hashString( const TQString& str );

  private:
    void initHash();

    TQString m_str;
    size_t m_hash;

    friend TQDataStream& operator << ( TQDataStream& stream, const HashedString& str );
    friend TQDataStream& operator >> ( TQDataStream& stream, HashedString& str );
};

TQDataStream& operator << ( TQDataStream& stream, const HashedString& str );

TQDataStream& operator >> ( TQDataStream& stream, HashedString& str );

class HashedStringSetData;
class HashedStringSetGroup;

///This is a reference-counting string-set optimized for fast lookup of hashed strings
class HashedStringSet {
  public:
    HashedStringSet();

    ~HashedStringSet();

    ///Constructs a string-set from one single file
    HashedStringSet( const HashedString& file );

    HashedStringSet( const HashedStringSet& rhs );

    int size() const;

    HashedStringSet& operator = ( const HashedStringSet& rhs );
    ///@return whether the given file-name was included
    bool operator[] ( const HashedString& rhs ) const;

    void insert( const HashedString& str );

    HashedStringSet& operator +=( const HashedStringSet& );
    
    HashedStringSet& operator -=( const HashedStringSet& );

    ///intersection-test
    ///Returns true if all files that are part of this set are also part of the given set
    bool operator <= ( const HashedStringSet& rhs ) const;

    bool operator == ( const HashedStringSet& rhs ) const;

    void read( TQDataStream& stream );
    void write( TQDataStream& stream ) const;

    std::string print() const;

  size_t hash() const;
  private:
    friend class HashedStringSetGroup;
    void makeDataPrivate();
    TDESharedPtr<HashedStringSetData> m_data; //this implies some additional cost because TDEShared's destructor is virtual. Maybe change that by copying TDEShared without the virtual destructor.
    friend HashedStringSet operator + ( const HashedStringSet& lhs, const HashedStringSet& rhs );
};

HashedStringSet operator + ( const HashedStringSet& lhs, const HashedStringSet& rhs );

namespace __gnu_cxx {
template<>
struct hash<HashedString> {
  size_t operator () ( const HashedString& str ) const {
    return str.hash();
  }
};
}

///Used to find all registered HashedStringSet's that contain all strings given to findGroups(..)
class HashedStringSetGroup {
  public:
    typedef std::set<size_t> ItemSet;
    void addSet( size_t id, const HashedStringSet& set );
    void enableSet( size_t id );
    bool isDisabled( size_t id ) const;
    void disableSet( size_t id );
    void removeSet( size_t id );

    //Writes the ids of all registered and not disabled HashedStringSet's that are completely included in the given HashedStringSet efficiently)
    void findGroups( HashedStringSet strings, ItemSet& target ) const;

  private:
    typedef __gnu_cxx::hash_map<HashedString, ItemSet> GroupMap;
    typedef __gnu_cxx::hash_map<size_t, size_t> SizeMap;
    GroupMap m_map;
    SizeMap m_sizeMap;
    ItemSet m_disabled;
    ItemSet m_global;
};
#endif