1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
|
//
// SplitMatches.cc
//
// SplitMatches:
// Holds a list of lists with the matches, as specified in
// search_results_order.
//
// Part of the ht://Dig package <http://www.htdig.org/>
// Copyright (c) 2000-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: SplitMatches.cc,v 1.6 2004/05/28 13:15:24 lha Exp $
#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */
#include "StringList.h"
#include "HtRegex.h"
#include "SplitMatches.h"
#include <stdio.h>
#include <ctype.h>
// This class is only used in private members of SplitMatches.
// The OO-right thing would be to nest this inside the private
// declaration of SplitMatches, but that would cause portability
// problems according to
// <URL:http://www.mozilla.org/hacking/portable-cpp.html#inner_classes>.
//
// It is used as a container for a key (String) and a list.
//
class MatchArea : public Object
{
public:
// Construct from a string applicable to StringMatch.
MatchArea(const String &);
~MatchArea();
// Does this item match?
// Fail if template is empty, since explicit "*" maps to empty template
inline bool Match(char *s)
{ return match.match(s, 0, 0) != 0; }
// Return the contained list.
List *MatchList() { return &myList; }
private:
HtRegex match;
List myList;
// These member functions are not supposed to be implemented, but
// mentioned here as private so the compiler will not generate them if
// someone puts in buggy code that would use them.
MatchArea();
MatchArea(const MatchArea &);
void operator= (const MatchArea &);
};
MatchArea::MatchArea(const String &url_regex)
{
// We do not want to "install" the catch-the-rest pattern as a real
// pattern; it must always return false for the "Match" operator.
if (strcmp("*", url_regex.get()) != 0)
{
StringList l(url_regex.get(),'|');
match.setEscaped(l);
}
}
MatchArea::~MatchArea()
{
}
SplitMatches::SplitMatches(Configuration &config)
{
char *config_item = "search_results_order";
StringList sl(config[config_item], "\t \r\n");
mySubAreas = new List();
myDefaultList = 0;
// Parse each as in TemplateList::createFromString.
for (int i = 0; i < sl.Count(); i++)
{
String sub_area_pattern = sl[i];
MatchArea *match_item = new MatchArea(sub_area_pattern);
mySubAreas->Add(match_item);
// If this is the magic catch-rest sub-area-pattern, we want to
// use its list-pointer to store all URLs that do not match
// anything else.
// We will iterate over a list where one of the patterns is
// known to not match, but that's a small penalty for keeping
// the code simple.
if (strcmp("*", sub_area_pattern.get()) == 0)
myDefaultList = match_item->MatchList();
}
// If we did not have a catch-the-rest pattern, install one at the
// end of the list.
if (myDefaultList == 0)
{
MatchArea *match_item = new MatchArea(String("*"));
mySubAreas->Add(match_item);
myDefaultList = match_item->MatchList();
}
}
SplitMatches::~SplitMatches()
{
// myDefaultList is a pointer to one of the items in mySubAreas and
// must not be explicitly deleted here.
delete mySubAreas;
}
void
SplitMatches::Add(ResultMatch *match, char *url)
{
List *area_list = mySubAreas;
MatchArea *area_item;
area_list->Start_Get();
// This is a linear search. If there's a problem with that, we
// can improve it. For now, a list with tens of areas seems lots,
// and break-even with a more clever search-scheme is probably in
// the hundreds.
while ((area_item = (MatchArea *) area_list->Get_Next()))
{
// Use the first match only.
if (area_item->Match(url))
{
area_item->MatchList()->Add(match);
return;
}
}
// We'll get here if no match was found, so we add to the
// catch-the-rest list.
myDefaultList->Add(match);
}
// Just a simple iterator function.
List *
SplitMatches::Get_Next()
{
MatchArea *next_area = (MatchArea *) mySubAreas->Get_Next();
List *next_area_list = 0;
if (next_area != 0)
next_area_list = next_area->MatchList();
return next_area_list;
}
// Rip out the sub-areas lists and concatenate them into one list.
List *
SplitMatches::JoinedLists()
{
// We make a new list here, so we don't have to worry about
// mySubAreas being dangling or null.
List *all_areas = new List();
List *sub_areas = mySubAreas;
MatchArea *area;
sub_areas->Start_Get();
while ((area = (MatchArea *) sub_areas->Get_Next()))
{
// "Destructively" move the contents of the list,
// leaving the original list empty.
all_areas->AppendList(*(area->MatchList()));
}
return all_areas;
}
|