blob: d549bf4e401ae02b63931bde673b6266d71958f4 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
#
# Part of the ht://Dig package <http://www.htdig.org/>
# Copyright (c) 1999-2004 The ht://Dig Group
# For copyright details, see the file COPYING in your distribution
# or the GNU Library General Public License (LGPL) version 2 or later
# <http://www.gnu.org/copyleft/lgpl.html>
#
# $Id: t_htdig,v 1.16 2004/06/05 06:26:22 lha Exp $
#
# Tests the following config attributes:
# case_sensitive
# common_url_parts
# limit_urls_to
# robotstxt_name
# url_part_aliases
# url_rewrite_rules
flags="$@"
try() {
config=$1
expected=$2
# $htdig "$flags" -t -i -c $config # crashes on Solaris, HP-UX -- lha
$htdig $flags -t -i -c $config
# One test gives empty database -- suppress this warning.
$htpurge -c $config 2&> /dev/null
# only used when url_part_aliases was set before the call...
set_attr url_part_aliases "bar foo"
got=`./document -c $config -u | sort`
if [ "$expected" != "$got" ]
then
fail "running htdig: expected
$expected
but got
$got"
fi
}
test_functions_action=--start-apache
. ./test_functions
conf=$testdir/conf/htdig.conf.tmp
cp $testdir/conf/htdig.conf $conf
# complete dig of set 1
try $conf \
'http://localhost:7400/set1/
http://localhost:7400/set1/bad_local.htm
http://localhost:7400/set1/script.html
http://localhost:7400/set1/site%201.html
http://localhost:7400/set1/site2.html
http://localhost:7400/set1/site3.html
http://localhost:7400/set1/site4.html
http://localhost:7400/set1/sub%2520dir/
http://localhost:7400/set1/sub%2520dir/empty%20file.html
http://localhost:7400/set1/title.html'
# Check common_url_parts being encoded properly
set_attr common_url_parts "dummy1 dummy2 dummy3 dummy4 dummy5 dummy6 dummy7 dummy8 dummy9 dummy10 dummy11 dummy12"
got=`./document -c $config -u | sort`
expected='dummy1localhost:7400/set1/
dummy1localhost:7400/set1/bad_localdummy7
dummy1localhost:7400/set1/scriptdummy6
dummy1localhost:7400/set1/site%201dummy6
dummy1localhost:7400/set1/site2dummy6
dummy1localhost:7400/set1/site3dummy6
dummy1localhost:7400/set1/site4dummy6
dummy1localhost:7400/set1/sub%2520dir/
dummy1localhost:7400/set1/sub%2520dir/empty%20filedummy6
dummy1localhost:7400/set1/titledummy6'
if [ "$expected" != "$got" ]
then
fail "running htdig: expected
$expected
but got
$got"
fi
# Pretend we are another user; robots.txt bans us from seeing 'site*'
set_attr robotstxt_name other
# (Reverse mapping from 'foo' to 'bar' implemented in try.)
set_attr url_part_aliases "http://localhost:7400/set1 foo"
try $conf \
'bar/
bar/bad_local.htm
bar/script.html
bar/sub%2520dir/
bar/sub%2520dir/empty%20file.html
bar/title.html'
# back to default.
set_attr url_part_aliases
# check limit_urls_to obeys case sensitive
set_attr start_url HTTP://LocalHost:7400/Set1/
try $conf ""
set_attr case_sensitive false
set_attr robotstxt_name htdig
# common_url_parts is case sensitive, despite case_sensitive=false
set_attr common_url_parts "http:// http://local HTTP://LocalHost 7400/set1"
# Replace site4.html by a file:/// URL. Must explicitly add leading chars
set_attr url_rewrite_rules '(.*)si[a-z]*[4-9]*\.([a-z]*)tml file:///'$PWD'/htdocs/set1/site4.\\2tml'
set_attr limit_urls_to '${start_url} site4.html'
try $conf \
'file://'$PWD'/htdocs/set1/site4.html
http://localhost:7400/set1/
http://localhost:7400/set1/bad_local.htm
http://localhost:7400/set1/script.html
http://localhost:7400/set1/site%201.html
http://localhost:7400/set1/site2.html
http://localhost:7400/set1/site3.html
http://localhost:7400/set1/sub%2520dir/
http://localhost:7400/set1/sub%2520dir/empty%20file.html
http://localhost:7400/set1/title.html'
# Check common_url_parts being encoded properly
set_attr common_url_parts "dummy1 dummy2 dummy3 dummy4"
got=`./document -c $config -u | sort`
expected='dummy2host:dummy4/
dummy2host:dummy4/bad_local.htm
dummy2host:dummy4/script.html
dummy2host:dummy4/site%201.html
dummy2host:dummy4/site2.html
dummy2host:dummy4/site3.html
dummy2host:dummy4/sub%2520dir/
dummy2host:dummy4/sub%2520dir/empty%20file.html
dummy2host:dummy4/title.html
file://'$PWD'/htdocs/set1/site4.html'
if [ "$expected" != "$got" ]
then
fail "running htdig: expected
$expected
but got
$got"
fi
test_functions_action=--stop-apache
. ./test_functions
|