summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htlib/HtRegexReplace.cc
blob: eb409e808c0a4dbd6b60c973f8b77e09b5242852 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
//
// HtRegexReplace.cc
//
// HtRegexReplace: A subclass of HtRegex that can perform replacements
//
// Part of the ht://Dig package   <http://www.htdig.org/>
// Copyright (c) 2000-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: HtRegexReplace.cc,v 1.4 2004/05/28 13:15:21 lha Exp $
//

#include "HtRegexReplace.h"
#include <locale.h>


HtRegexReplace::HtRegexReplace()
{
}

HtRegexReplace::HtRegexReplace(const char *from, const char *to, int case_sensitive)
	: HtRegex(from, case_sensitive)
{
	memset(&regs, 0, sizeof(regs));
	repBuf		= 0;
	segSize		=
	segUsed		= 0;
	segMark		= 0;
	repLen		= 0;

	setReplace(to);
}

HtRegexReplace::~HtRegexReplace()
{
	empty();
}

int HtRegexReplace::replace(String &str, int nullpattern, int nullstr)
{
	const int regCount = sizeof(regs) / sizeof(regs[0]);
	if (compiled == 0 || repBuf == 0) return nullpattern;
	if (str.length() == 0) return nullstr;

	if (regexec(&re, str.get(), regCount, regs, 0) == 0)
	{
		// Firstly work out how long the result string will be. We think this will be more effecient
		// than letting the buffer grow in stages as we build the result, but who knows?
		//cout << "!!! Match !!!" << endl;
		size_t resLen = repLen;
		int i, reg, repPos;
		const char *src = str.get();

		for (i = 1; i < (int) segUsed; i += 2)
		{
			reg = segMark[i];
			if (reg < regCount && regs[reg].rm_so != -1)
				resLen += regs[reg].rm_eo - regs[reg].rm_so;
		}
		//cout << "result will be " << resLen << " chars long" << endl;
		String result(resLen);	// Make the result string preallocating the buffer size
		for (i = 0, repPos = 0;; )
		{
			//cout << "appending segment " << i << endl;
			result.append(repBuf + repPos, segMark[i] - repPos);		// part of the replace string
			repPos = segMark[i];		// move forward
			if (++i == (int) segUsed) break;	// was that the last segment?
			reg = segMark[i++];			// get the register number
			if (reg < regCount && regs[reg].rm_so != -1)
				result.append((char *) src + regs[reg].rm_so, regs[reg].rm_eo - regs[reg].rm_so);
		}
		str = result;
		//cout << "return " << result.get() << endl;

		return 1;
	}

	return 0;
}

// Private: place a mark in the mark buffer growing it if necessary.
void HtRegexReplace::putMark(int n)
{
	// assert(segUsed <= segSize);
	if (segUsed == segSize)
	{
		size_t newSize = segSize * 2 + 5;		// grow in chunks
		int *newMark = new int[newSize];		// do we assume that new can't fail?
		memcpy(newMark, segMark, segSize * sizeof(int));
		delete segMark;
		segMark = newMark;
		segSize = newSize;
	}
	segMark[segUsed++] = n;
}

void HtRegexReplace::empty()
{
	// Destroy any existing replace pattern
    delete repBuf; repBuf = 0;
    segSize = segUsed = 0;
    delete segMark; segMark = 0;
    repLen = 0;
}

void HtRegexReplace::setReplace(const char *to)
{
	empty();

	repBuf = new char[strlen(to)];		// replace buffer can never contain more text than to string
	int bufPos = 0;			// our position within the output buffer

	while (*to)
	{
		if (*to == '\\')
		{
			if (*++to == '\0') break;
			if (*to >= '0' && *to <= '9')
			{
				putMark(bufPos);
				putMark(*to - '0');
			}
			else
			{
				// We could handle some C style escapes here, but instead we just pass the character
				// after the backslash through. This means that \\, \" and \' will do the right thing.
				// It's unlikely that anyone will need any C style escapes in ht://Dig anyway.
				repBuf[bufPos++] = *to;
			}
			to++;
		}
		else
		{
			repBuf[bufPos++] = *to++;
		}
	}
	putMark(bufPos);
	repLen = (size_t) bufPos;
}