summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/test/t_htdig_local
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/test/t_htdig_local')
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/test/t_htdig_local359
1 files changed, 359 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/test/t_htdig_local b/debian/htdig/htdig-3.2.0b6/test/t_htdig_local
new file mode 100755
index 00000000..8405ee07
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/test/t_htdig_local
@@ -0,0 +1,359 @@
+#
+# Part of the ht://Dig package <http://www.htdig.org/>
+# Copyright (c) 1999-2004 The ht://Dig Group
+# For copyright details, see the file COPYING in your distribution
+# or the GNU Library General Public License (LGPL) version 2 or later
+# <http://www.gnu.org/copyleft/lgpl.html>
+#
+# $Id: t_htdig_local,v 1.10 2004/05/28 13:15:30 lha Exp $
+#
+
+# Tests the following config attributes:
+# bad_local_extensions
+# check_unique_md5
+# content_classifier
+# database_dir
+# exclude_urls
+# limit_normalized
+# limit_urls_to
+# local_extensions
+# local_urls
+# local_urls_only
+# local_user_urls
+# max_hop_count
+# md5_db
+# mime_types
+# remove_default_doc
+# server_aliases
+# start_url
+
+test_functions_action=--start-apache
+. ./test_functions
+
+# set up config file with chosen non-default values
+config=$testdir/conf/htdig.conf.tmp
+cp $testdir/conf/htdig.conf2 $config
+
+################################################################################
+#test for local-file-system access to <http://...> URLs
+
+/bin/rm -f var/htdig2/*
+set_attr start_url "http://localhost:7400/set1/ http://localhost:7400/set1/title.html?site3.html http://localhost:7400/set1/title.html?site4.html"
+# ban ite3.htm from query, but not from main URL.
+# Allow site3.html, but not title.html?site3.html
+set_attr bad_querystr ite3.htm
+expected='bad_local.htm'
+got=`$htdig "$@" -t -i -vv -c $config | grep "Bad local extension:" | sed -e"s-.*/--"`
+if [ "$expected" != "$got" ]
+then
+ fail "first htdig: expected
+$expected
+but got
+$got"
+fi
+
+expected='db.docdb
+db.docs
+db.docs.index
+db.excerpts
+db.worddump
+db.words.db
+db.words.db_weakcmpr'
+got=`/bin/ls var/htdig2`
+if [ "$expected" != "$got" ]
+then
+ fail "created files: expected
+$expected
+but got
+$got"
+fi
+
+$htpurge -c $config
+
+# should http://localhost:7400/set1/sub%2520dir be purged?
+expected='http://localhost:7400/set1/
+http://localhost:7400/set1/bad_local.htm
+http://localhost:7400/set1/script.html
+http://localhost:7400/set1/site%201.html
+http://localhost:7400/set1/site2.html
+http://localhost:7400/set1/site3.html
+http://localhost:7400/set1/site4.html
+http://localhost:7400/set1/sub%2520dir/
+http://localhost:7400/set1/sub%2520dir/empty%20file.html
+http://localhost:7400/set1/title.html
+http://localhost:7400/set1/title.html?site4.html'
+
+got=`./document -c $config -u | sort`
+
+if [ "$expected" != "$got" ]
+then
+ fail "first document: expected
+$expected
+but got
+$got"
+fi
+
+set_attr bad_query_str
+
+
+################################################################################
+# limit_urls_to applies before server alias expansion
+set_attr start_url http://myhost/set1/index.html
+set_attr limit_urls_to "http://myhost/set1/"
+set_attr server_aliases myhost=localhost:7400
+$htdig "$@" -t -i -c $config || fail "couldn't dig second time"
+$htpurge -c $config || fail "couldn't purge second time"
+# only start_url uses alias, so only it passes the limit_urls_to test
+expected='http://localhost:7400/set1/'
+
+got=`./document -c $config -u | sort`
+
+if [ "$expected" != "$got" ]
+then
+ fail "second document: expected
+$expected
+but got
+$got"
+fi
+
+
+
+################################################################################
+# Check remote URLs not retrieved if local_urls_only specified
+set_attr local_urls_only true
+set_attr remove_default_doc site2.html
+# Note: local_urls_only doesn't handle directories without a default doc
+set_attr local_default_doc "site2.html empty%20file.html"
+set_attr start_url http://myhost/set1/index.html
+# don't care what the aliased URL is; only check the normalized one
+set_attr limit_urls_to
+set_attr limit_normalized "http://localhost:7400/set1/"
+set_attr server_aliases myhost=localhost:7400
+$htdig "$@" -t -i -c $config || fail "couldn't dig third time"
+$htpurge -c $config || fail "couldn't purge third time"
+expected='http://localhost:7400/set1/
+http://localhost:7400/set1/index.html
+http://localhost:7400/set1/script.html
+http://localhost:7400/set1/site%201.html
+http://localhost:7400/set1/site3.html
+http://localhost:7400/set1/site4.html
+http://localhost:7400/set1/sub%2520dir/
+http://localhost:7400/set1/title.html'
+
+got=`./document -c $config -u | sort`
+
+if [ "$expected" != "$got" ]
+then
+ fail "third document: expected
+$expected
+but got
+$got"
+fi
+set_attr remove_default_doc index.html
+set_attr local_urls_only false
+set_attr limit_normalized
+
+
+################################################################################
+#test for <file:///...> URLs
+
+expected='' # no "bad local" extensions for file:///
+# Check only one "title.html" found...
+set_attr check_unique_md5 true
+set_attr start_url "http://localhost:7400/set1/title.html file://$PWD/htdocs/set1/"
+set_attr limit_urls_to '${start_url}'
+got=`$htdig "$@" -t -i -vv -c $config | grep "Bad local extension:" | sed -e"s-.*/--"`
+if [ "$expected" != "$got" ]
+then
+ fail "fourth htdig: expected
+$expected
+but got
+$got"
+fi
+
+expected='db.docdb
+db.docs
+db.docs.index
+db.excerpts
+db.md5hash.db
+db.worddump
+db.words.db
+db.words.db_weakcmpr'
+got=`/bin/ls var/htdig2`
+if [ "$expected" != "$got" ]
+then
+ fail "fourth created files: expected
+$expected
+but got
+$got"
+fi
+
+$htpurge -c $config || fail "couldn't purge fourth time"
+
+expected='file:///set1/bad_local.htm
+file:///set1/index.html
+file:///set1/script.html
+file:///set1/site%201.html
+file:///set1/site2.html
+file:///set1/site3.html
+file:///set1/site4.html
+file:///set1/sub%2520dir/empty%20file.html
+/title.html'
+
+got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort | sed "s#.*/title.html#/title.html#"`
+
+if [ "$expected" != "$got" ]
+then
+ fail "fourth document: expected
+$expected
+but got
+$got"
+fi
+
+
+################################################################################
+#test mime types handling
+
+expected='' # no "bad local" extensions for file:///
+set_attr max_hop_count 1 # removes "empty%20file.html"
+set_attr exclude_urls "site4.html script.html site[3].html"
+set_attr bad_extensions .foo
+set_attr local_urls_only false
+
+rm -f var/htdig2/db.md5hash.db
+set_attr md5_db '${database_base}.md5.db'
+
+set_attr mime_types $PWD/mime-without-htm
+set_attr content_classifier $PWD/say-text
+echo 'text/html html' > mime-without-htm
+echo '#!/bin/sh
+ echo text/plain' > say-text
+chmod 700 say-text
+got=`$htdig "$@" -t -i -vv -c $config | grep "MIME type:" | sed -e"s-.*/--"`
+if [ "$expected" != "$got" ]
+then
+ fail "fifth htdig: expected
+$expected
+but got
+$got"
+fi
+
+expected='db.docdb
+db.docs
+db.docs.index
+db.excerpts
+db.md5.db
+db.worddump
+db.words.db
+db.words.db_weakcmpr'
+got=`/bin/ls var/htdig2`
+if [ "$expected" != "$got" ]
+then
+ fail "fifth created files: expected
+$expected
+but got
+$got"
+fi
+
+$htpurge -c $config || fail "couldn't purge fifth time"
+
+expected='file:///set1/bad_local.htm
+file:///set1/index.html
+file:///set1/nph-location.cgi
+file:///set1/site%201.html
+file:///set1/site2.html
+file:///set1/site3.html
+file:///set1/title.html'
+
+got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort`
+
+if [ "$expected" != "$got" ]
+then
+ fail "fifth document: expected
+$expected
+but got
+$got"
+fi
+
+################################################################################
+expected='' # no "bad local" extensions for file:///
+set_attr max_hop_count # removes "empty%20file.html"
+set_attr exclude_urls /CVS/
+set_attr valid_extensions ".foo .html"
+set_attr bad_extensions
+
+set_attr mime_types $PWD/mime-without-htm
+set_attr content_classifier $PWD/say-text
+echo 'text/html html' > mime-without-htm
+echo '#!/bin/sh
+ echo text/plain' > say-text
+chmod 700 say-text
+got=`$htdig "$@" -t -i -vv -c $config | grep "MIME type:" | sed -e"s-.*/--"`
+if [ "$expected" != "$got" ]
+then
+ fail "sixth htdig: expected
+$expected
+but got
+$got"
+fi
+
+$htpurge -c $config || fail "couldn't purge sixth time"
+
+expected='file:///set1/index.html
+file:///set1/nph-location.foo
+file:///set1/script.html
+file:///set1/site%201.html
+file:///set1/site2.html
+file:///set1/site3.html
+file:///set1/site4.html
+file:///set1/sub%2520dir/empty%20file.html
+file:///set1/title.html'
+
+got=`./document -c $config -u | sed "s#${PWD}/htdocs##" | sort`
+
+if [ "$expected" != "$got" ]
+then
+ fail "sixth document: expected
+$expected
+but got
+$got"
+fi
+
+
+################################################################################
+set_attr local_urls_only
+set_attr local_urls "http://somewhere/=$PWD/htdocs/"
+set_attr local_user_urls "http://somewhere/=$PWD/,/set1/"
+set_attr start_url "http://somewhere/~htdocs/"
+
+set_attr valid_extensions
+set_attr local_default_doc index.html
+set_attr remove_default_doc index.html
+
+$htdig "$@" -t -i -c $config || fail "couldn't dig seventh time"
+$htpurge -c $config || fail "couldn't purge seventh time"
+
+#local_urls_only can't handle .../~htdocs/sub%2520dir/empty%20file.html
+expected='http://somewhere/~htdocs/
+http://somewhere/~htdocs/script.html
+http://somewhere/~htdocs/site%201.html
+http://somewhere/~htdocs/site2.html
+http://somewhere/~htdocs/site3.html
+http://somewhere/~htdocs/site4.html
+http://somewhere/~htdocs/title.html'
+
+got=`./document -c $config -u | sort`
+
+if [ "$expected" != "$got" ]
+then
+ fail "seventh document: expected
+$expected
+but got
+$got"
+fi
+
+
+/bin/rm mime-without-htm say-text
+
+test_functions_action=--stop-apache
+. ./test_functions