summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/test/t_parsing
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/test/t_parsing')
-rwxr-xr-xdebian/htdig/htdig-3.2.0b6/test/t_parsing189
1 files changed, 189 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/test/t_parsing b/debian/htdig/htdig-3.2.0b6/test/t_parsing
new file mode 100755
index 00000000..d11a7068
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/test/t_parsing
@@ -0,0 +1,189 @@
+#
+# Part of the ht://Dig package <http://www.htdig.org/>
+# Copyright (c) 1999-2004 The ht://Dig Group
+# For copyright details, see the file COPYING in your distribution
+# or the GNU Library General Public License (LGPL) version 2 or later
+# <http://www.gnu.org/copyleft/lgpl.html>
+#
+# $Id: t_parsing,v 1.4 2004/05/28 13:15:30 lha Exp $
+#
+
+
+# Tests (or should eventually test) the following config attributes:
+# description_meta_tag_names
+# ignore_alt_text
+# max_doc_size
+# max_keywords
+# max_meta_description_length
+# max_description_length
+# max_descriptions
+# max_head_length
+# noindex_end
+# noindex_start
+# external_parsers
+# external_protocols
+# use_meta_description
+
+
+test_functions_action=--start-apache
+. ./test_functions
+
+config=$testdir/conf/htdig.conf.tmp
+tmp=/tmp/t_htsearch$$
+
+# set up config file with chosen non-default values
+cp $testdir/conf/htdig.conf $config
+
+try() {
+ comment="$1"
+ shift
+ query="$1"
+ shift
+ $htsearch -c $config "$query" > $tmp
+ for pattern
+ do
+ if grep "$pattern" $tmp > /dev/null
+ then :
+ else
+ $htsearch -v -c $config "$query" > /dev/null
+ echo "Output doesn't match \"$pattern\""
+ fail "$htsearch -c $config '$query' >> $tmp --
+ $comment"
+ fi
+ done
+}
+
+
+# Tests (or should eventually test) the following config attributes:
+# description_meta_tag_names
+# ignore_alt_text
+# max_doc_size
+# max_keywords
+# max_meta_description_length
+# max_description_length (May put in t_templates)
+# max_descriptions (May put in t_templates)
+# max_head_length
+# noindex_end
+# noindex_start
+# external_parsers (TODO)
+# external_protocols
+# use_meta_description
+
+$htdig "$@" -t -i -c $config || fail "Couldn't do first dig"
+$htpurge -c $config || fail "Couldn't do first purge"
+
+try "Search for alt text 'earth'" \
+ "words=earth" \
+ '1 matches' 'site3.html'
+
+try "'claims and collections', unlimited doc size" \
+ "words=%22claims+and+collections%22" \
+ '1 matches' 'site4.html'
+
+try "Search for keyword 'martial', default max_keywords" \
+ "words=martial" \
+ '1 matches' 'site2.html'
+
+try "Search for 'service', default noindex_start/end" \
+ "words=technical" \
+ '1 matches' 'site%201.html'
+
+set_attr use_meta_description true
+try "Search for 'call handling' with default max_meta_description_length" \
+ "words=%22call+handling%22" \
+ '1 matches' 'script.html' 'call handling.*signalling'
+
+set_attr ignore_alt_text true
+set_attr max_doc_size 15112
+set_attr max_keywords 5
+set_attr noindex_start "'Software Distribution'"
+set_attr noindex_end "'Contact Information'"
+set_attr max_meta_description_length 80
+set_attr description_meta_tag_names "description generator"
+set_attr max_head_length 30
+
+$htdig "$@" -t -i -c $config || fail "Couldn't do second dig"
+$htpurge -c $config || fail "Couldn't do second purge"
+
+try "Search for alt text 'earth' with ignore_alt_text=true" \
+ "words=earth" \
+ 'No matches'
+
+try "'claims and collections', max_doc_size 15112" \
+ "words=%22claims+and+collections%22" \
+ '1 matches' 'site4.html'
+
+# (Martial is 6th keyword listed in site 2, but "Fu" is too short and omitted.)
+try "Search for keyword 'martial', max_keywords = 5" \
+ "words=martial" \
+ 'No matches'
+
+# Only occurrence of "technical" is between noindex_start and _end in site 1
+try "Search for 'technical', noindex_start=Software Distribution, noindex_end=Contact Information" \
+ "words=technical" \
+ 'No matches'
+
+# Visitor occurs after noindex_end
+try "Search for 'visitor', noindex_start=Software Distribution, noindex_end=Contact Information" \
+ "words=visitor" \
+ '2 matches' 'site%201.html' 'site3.html'
+
+# Displaying meta description instead of excerpt, check it is truncated
+try "Search for 'call handling' with max_meta_description_length=80" \
+ "words=%22call+handling%22" \
+ '1 matches' 'script.html' 'means of<br>'
+
+# Check <meta name="generator"...> counts as a description
+try "Search for 'category', description_meta_tag_names includes 'generator'" \
+ "words=category" \
+ '1 matches' 'site3.html' 'FrontPage'
+
+# Check that only specified number of bytes of header is stored.
+# Header size is rounded up to contain the whole of the last word.
+try "Search for 'also', max_head_length=30" \
+ "words=also" \
+ '4 matches' 'bad_local.htm' 'site2.html' 'script.html' 'site4.html' \
+ 'WHERE.*Copyright<br>'
+
+set_attr max_doc_size 15042
+set_attr max_keywords 6
+set_attr noindex_start "'software distribution'"
+set_attr noindex_end "'contact information'"
+
+$htdig "$@" -t -i -c $config || fail "Couldn't do third dig"
+$htpurge -c $config || fail "Couldn't do third purge"
+
+try "Search for keyword 'martial', max_keywords = 6" \
+ "words=martial" \
+ '1 matches' 'site2.html'
+
+try "'claims and collections', max_doc_size 15042" \
+ "words=%22claims+and+collections%22" \
+ 'No matches'
+
+# Check noindex_start/end are case-insensitive
+try "Search for 'technical', noindex_start=software distribution, noindex_end=contact information" \
+ "words=technical" \
+ 'No matches'
+
+PROTOCOL=my-protocol
+echo '#!/bin/sh
+ echo "s 200"
+ echo "t text/html"
+ echo
+ echo "<html>$2</html>"' > $PROTOCOL
+chmod 755 $PROTOCOL
+set_attr external_protocols "echo: $PWD/$PROTOCOL"
+set_attr start_url "echo:foo.html"
+$htdig "$@" -t -i -c $config || fail "Couldn't do fourth dig"
+try "trying external protocol echo" \
+ "words=foo" \
+ "1 matches" "echo:foo.html"
+
+
+test_functions_action=--stop-apache
+. ./test_functions
+
+rm -f $tmp $PROTOCOL
+
+exit 0