DEB htdig: Added to repository.

Signed-off-by: Slávek Banko <slavek.banko@axis.cz>
author: Slávek Banko <slavek.banko@axis.cz> 2021-11-05 13:28:23 +0100
committer: Slávek Banko <slavek.banko@axis.cz> 2021-11-05 13:28:23 +0100
commit: 8c787c3591c1c885b91a54128835b400858c5cca (patch)
tree: eca1b776912a305c4d45b3964038278a2fae1ead /debian/htdig/htdig-3.2.0b6/htcommon/URL.cc
parent: fe188b907cdf30dfdfe0eba9412e7f8749fec158 (diff)
download: extra-dependencies-8c787c3591c1c885b91a54128835b400858c5cca.tar.gz
extra-dependencies-8c787c3591c1c885b91a54128835b400858c5cca.zip
1 files changed, 936 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htcommon/URL.cc b/debian/htdig/htdig-3.2.0b6/htcommon/URL.cc
new file mode 100644
index 00000000..9ccbe5d5
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htcommon/URL.cc
@@ -0,0 +1,936 @@
+//
+// URL.cc
+//
+// URL: A URL parsing class, implementing as closely as possible the standard
+//      laid out in RFC2396 (e.g. http://www.faqs.org/rfcs/rfc2396.html)
+//      including support for multiple services. (schemes in the RFC)
+//
+// Part of the ht://Dig package   <http://www.htdig.org/>
+// Copyright (c) 1995-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later 
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: URL.cc,v 1.16 2004/06/04 08:51:01 angusgb Exp $
+//
+
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "URL.h"
+#include "QuotedStringList.h"
+#include "Dictionary.h"
+#include "HtConfiguration.h"
+#include "StringMatch.h"
+#include "StringList.h"
+#include "HtURLRewriter.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifdef HAVE_STD
+#include <fstream>
+#ifdef HAVE_NAMESPACES
+using namespace std;
+#endif
+#else
+#include <fstream.h>
+#endif /* HAVE_STD */
+
+#include <sys/types.h>
+#include <ctype.h>
+
+#ifndef _MSC_VER /* _WIN32 */
+#include <sys/socket.h>
+#include <netdb.h>
+#include <arpa/inet.h>
+#endif
+
+#define NNTP_DEFAULT_PORT 119
+
+static Dictionary	*slashCount = 0;
+
+//*****************************************************************************
+// URL::URL()
+// Default Constructor
+//
+URL::URL()
+: _url(0),
+    _path(0),
+    _service(0),
+    _host(0),
+    _port(0),
+    _normal(0),
+    _hopcount(0),
+    _signature(0),
+    _user(0)
+{
+}
+
+
+//*****************************************************************************
+// URL::URL(const URL& rhs)
+// Copy constructor
+//
+URL::URL(const URL& rhs)
+: _url(rhs._url),
+    _path(rhs._path),
+    _service(rhs._service),
+    _host(rhs._host),
+    _port(rhs._port),
+    _normal(rhs._normal),
+    _hopcount(rhs._hopcount),
+    _signature(rhs._signature),
+    _user(rhs._user)
+{
+}
+
+
+//*****************************************************************************
+// URL::URL(const String &nurl)
+// Construct a URL from a String (obviously parses the string passed in)
+// 
+URL::URL(const String &nurl)
+: _url(0),
+    _path(0),
+    _service(0),
+    _host(0),
+    _port(0),
+    _normal(0),
+    _hopcount(0),
+    _signature(0),
+    _user(0)
+{
+    parse(nurl);
+}
+
+
+//*****************************************************************************
+// Assignment operator
+const URL &URL::operator = (const URL &rhs)
+{
+	if (this == &rhs)
+		return *this;
+
+	// Copy the attributes
+	_url = rhs._url;
+	_path = rhs._path;
+	_service = rhs._service;
+	_host = rhs._host;
+	_port = rhs._port;
+	_normal = rhs._normal;
+	_hopcount = rhs._hopcount;
+	_signature = rhs._signature;
+	_user = rhs._user;
+
+	return *this;
+}
+
+//*****************************************************************************
+// URL::URL(const String &url, const URL &parent)
+//   Parse a reference given a parent url.  This is needed to resolve relative
+//   references which do NOT have a full url.
+//
+URL::URL(const String &url, const URL &parent)
+: _url(0),
+    _path(0),
+    _service(parent._service),
+    _host(parent._host),
+    _port(parent._port),
+    _normal(parent._normal),
+    _hopcount(parent._hopcount + 1), // Since this is one hop *after* the parent, we should account for this
+    _signature(parent._signature),
+    _user(parent._user)
+{
+	HtConfiguration* config= HtConfiguration::config();
+    int  allowspace = config->Boolean("allow_space_in_url", 0);
+    String      temp;
+    const char *urp = url.get();
+    while (*urp)
+    {
+	if (*urp == ' ' && temp.length() > 0 && allowspace)
+	{
+	    // Replace space character with %20 if there's more non-space
+	    // characters to come...
+	    const char *s = urp+1;
+	    while (*s && isspace(*s))
+		s++;
+	    if (*s)
+		temp << "%20";
+	}
+	else if (!isspace(*urp))
+	    temp << *urp;
+	urp++;
+    }
+    char* ref = temp;
+
+    //
+    // Strip any optional anchor from the reference.  If, however, the
+    // reference contains CGI parameters after the anchor, the parameters
+    // will be moved left to replace the anchor.  The overall effect is that
+    // the anchor is removed.
+    // Thanks goes to David Filiatrault <dwf@WebThreads.Com> for suggesting
+    // this removal process.
+    //
+    char	*anchor = strchr(ref, '#');
+    char	*params = strchr(ref, '?');
+    if (anchor)
+    {
+	*anchor = '\0';
+	if (params)
+	{
+	    if (anchor < params)
+	    {
+		while (*params)
+		{
+		    *anchor++ = *params++;
+		}
+		*anchor = '\0';
+	    }
+	}
+    }
+
+    //
+    // If, after the removal of a possible '#' we have nothing left,
+    // we just want to use the base URL (we're on the same page but
+    // different anchors)
+    //
+    if (!*ref)
+    {
+        // We've already copied much of the info
+	_url = parent._url;
+	_path = parent._path;
+	// Since this is on the same page, we want the same hopcount
+	_hopcount = parent._hopcount;
+	return;
+    }
+
+    // OK, now we need to work out what type of child URL this is
+    char	*p = ref;
+    while (isalpha(*p))  // Skip through the service portion
+	p++;
+    int	hasService = (*p == ':');
+    	// Why single out http?  Shouldn't others be the same?
+	// Child URL of the form  https:/child  or  ftp:child  called "full"
+	// How about using slashes()?
+    if (hasService && ((strncmp(ref, "http://", 7) == 0) ||
+		       (strncmp(ref, "http:", 5) != 0)))
+    {
+	//
+	// No need to look at the parent url since this is a complete url...
+	//
+	parse(ref);
+    }
+    else if (strncmp(ref, "//", 2) == 0)
+    {
+	// look at the parent url's _service, to make this is a complete url...
+	String	fullref(parent._service);
+	fullref << ':' << ref;
+	parse((char*)fullref);
+    }
+    else
+    {
+	if (hasService)
+	    ref = p + 1;	// Relative URL, skip "http:"
+
+	if (*ref == '/')
+	{
+	    //
+	    // The reference is on the same server as the parent, but
+	    // an absolute path was given...
+	    //
+	    _path = ref;
+
+            //
+            // Get rid of loop-causing constructs in the path
+            //
+            normalizePath();
+	}
+	else
+	{
+	    //
+	    // The reference is relative to the parent
+	    //
+
+	    _path = parent._path;
+	    int i = _path.indexOf('?');
+	    if (i >= 0)
+	    {
+		_path.chop(_path.length() - i);
+	    }
+
+	    //
+	    // Remove any leading "./" sequences which could get us into
+	    // recursive loops.
+	    //
+	    while (strncmp(ref, "./", 2) == 0)
+		ref += 2;
+
+	    if (_path.last() == '/')
+	    {
+		//
+		// Parent was a directory.  Easy enough: just append
+		// the current ref to it
+		//
+		_path << ref;
+	    }
+	    else
+	    {
+		//
+		// Parent was a file.  We need to strip the last part
+		// of the path before we add the reference to it.
+		//
+		String	temp = _path;
+		p = strrchr((char*)temp, '/');
+		if (p)
+		{
+		    p[1] = '\0';
+		    _path = temp.get();
+		    _path << ref;
+		}
+		else
+		{
+		    //
+		    // Something must be wrong since there were no '/'
+		    // found in the parent url.
+		    //
+		    // We do nothing here.  The new url is the parent.
+		    //
+		}
+	    }
+
+            //
+            // Get rid of loop-causing constructs in the path
+            //
+            normalizePath();
+	}
+
+	//
+	// Build the url.  (Note, the host name has NOT been normalized!)
+	// No need for this if we have called URL::parse.
+	//
+	constructURL();
+    }
+}
+
+
+//*****************************************************************************
+// void URL::rewrite()
+//
+void URL::rewrite()
+{
+	if (HtURLRewriter::instance()->replace(_url) > 0)
+		parse(_url.get());
+}
+
+
+//*****************************************************************************
+// void URL::parse(const String &u)
+//   Given a URL string, extract the service, host, port, and path from it.
+//
+void URL::parse(const String &u)
+{
+	HtConfiguration* config= HtConfiguration::config();
+    int  allowspace = config->Boolean("allow_space_in_url", 0);
+    String	temp;
+    const char *urp = u.get();
+    while (*urp)
+    {
+	if (*urp == ' ' && temp.length() > 0 && allowspace)
+	{
+	    // Replace space character with %20 if there's more non-space
+	    // characters to come...
+	    const char *s = urp+1;
+	    while (*s && isspace(*s))
+		s++;
+	    if (*s)
+		temp << "%20";
+	}
+	else if (!isspace(*urp))
+	    temp << *urp;
+	urp++;
+    }
+    char	*nurl = temp;
+
+    //
+    // Ignore any part of the URL that follows the '#' since this is just
+    // an index into a document.
+    //
+    char	*p = strchr(nurl, '#');
+    if (p)
+	*p = '\0';
+
+    // Some members need to be reset.  If not, the caller would
+    // have used URL::URL(char *ref, URL &parent)
+    // (which may call us, if the URL is found to be absolute).
+    _normal = 0;
+    _signature = 0;
+    _user = 0;
+
+    //
+    // Extract the service
+    //
+    p = strchr(nurl, ':');
+    if (p)
+    {
+	_service = strtok(nurl, ":");
+	p = strtok(0, "\n");
+    }
+    else
+    {
+	_service = "http";
+	p = strtok(nurl, "\n");
+    }
+    _service.lowercase();
+
+    //
+    // Extract the host
+    //
+    if (!p || strncmp(p, "//", 2) != 0)
+    {
+	// No host specified, it's all a path.
+	_host = 0;
+	_port = 0;
+	_url = 0;
+	if (p)		// if non-NULL, skip (some) leading slashes in path
+	{
+	    int i;
+	    for (i = slashes (_service); i > 0 && *p == '/'; i--)
+		p++;
+	    if (i)	// if fewer slashes than specified for protocol don't
+			// delete any. -> Backwards compatible (necessary??)
+		p -= slashes (_service) - i;
+	}
+	_path = p;
+	if (strcmp((char*)_service, "file") == 0 || slashes (_service) < 2)
+	  _host = "localhost";
+    }
+    else
+    {
+	p += 2;
+
+	//
+	// p now points to the host
+	//
+	char	*q = strchr(p, ':');
+	char	*slash = strchr(p, '/');
+    
+	_path = "/";
+	if (strcmp((char*)_service, "file") == 0)
+	  {
+	    // These should be of the form file:/// (i.e. no host)
+	    // if there is a file://host/path then strip the host
+	    if (strncmp(p, "/", 1) != 0)
+	      {
+		p = strtok(p, "/");
+		_path << strtok(0, "\n");
+	      }
+	    else
+	      _path << strtok(p+1, "\n");	// _path is "/" - don't double
+	    _host = "localhost";
+	    _port = 0;
+	  }
+	else if (q && ((slash && slash > q) || !slash))
+	{
+	    _host = strtok(p, ":");
+	    p = strtok(0, "/");
+	    if (p)
+	      _port = atoi(p);
+	    if (!p || _port <= 0)
+               _port = DefaultPort();
+	    //
+	    // The rest of the input string is the path.
+	    //
+	    _path << strtok(0, "\n");
+
+	}
+	else
+	{
+	    _host = strtok(p, "/");
+	    _host.chop(" \t");
+            _port = DefaultPort();
+
+	    //
+	    // The rest of the input string is the path.
+	    //
+	    _path << strtok(0, "\n");
+
+	}
+
+	// Check to see if host contains a user@ portion
+	int atMark = _host.indexOf('@');
+	if (atMark != -1)
+	  {
+	    _user = _host.sub(0, atMark);
+	    _host = _host.sub(atMark + 1);
+	  }
+    }
+
+    //
+    // Get rid of loop-causing constructs in the path
+    //
+    normalizePath();
+
+    //
+    // Build the url.  (Note, the host name has NOT been normalized!)
+    //
+    constructURL();
+}
+
+
+//*****************************************************************************
+// void URL::normalizePath()
+// Called from: URL(const String &url, const URL &parent)
+//
+void URL::normalizePath()
+{
+    //
+    // Rewrite the path to be the minimal.
+    // Remove "//", "/../" and "/./" components
+    //
+	HtConfiguration* config= HtConfiguration::config();
+
+    int	i, limit;
+    int	leadingdotdot = 0;
+    String	newPath;
+    int	pathend = _path.indexOf('?');	// Don't mess up query strings.
+    if (pathend < 0)
+        pathend = _path.length();
+
+    //
+    // get rid of "//" first, or "/foo//../" will become "/foo/" not "/"
+    // Some database lookups interpret empty paths (// != /), so give
+    // the use the option to turn this off.
+    //
+    if (!config->Boolean ("allow_double_slash"))
+	while ((i = _path.indexOf("//")) >= 0 && i < pathend)
+	{
+	    newPath = _path.sub(0, i).get();
+	    newPath << _path.sub(i + 1).get();
+	    _path = newPath;
+	    pathend = _path.indexOf('?');
+	    if (pathend < 0)
+		pathend = _path.length();
+	}
+
+    //
+    // Next get rid of redundant "/./".  This could cause infinite
+    // loops.  Moreover, "/foo/./../" should become "/", not "/foo/"
+    //
+    while ((i = _path.indexOf("/./")) >= 0 && i < pathend)
+    {
+        newPath = _path.sub(0, i).get();
+        newPath << _path.sub(i + 2).get();
+        _path = newPath;
+        pathend = _path.indexOf('?');
+        if (pathend < 0)
+            pathend = _path.length();
+    }
+    if ((i = _path.indexOf("/.")) >= 0 && i == pathend-2)
+    {
+        newPath = _path.sub(0, i+1).get();		// keep trailing slash
+        newPath << _path.sub(i + 2).get();
+        _path = newPath;
+        pathend--;
+    }
+
+    //
+    // Now that "empty" path components are gone, remove ("/../").
+    //
+    while ((i = _path.indexOf("/../")) >= 0 && i < pathend)
+    {
+        if ((limit = _path.lastIndexOf('/', i - 1)) >= 0)
+        {
+            newPath = _path.sub(0, limit).get();
+            newPath << _path.sub(i + 3).get();
+            _path = newPath;
+        }
+        else
+        {
+            _path = _path.sub(i + 3).get();
+            leadingdotdot++;
+        }
+        pathend = _path.indexOf('?');
+        if (pathend < 0)
+            pathend = _path.length();
+    }
+    if ((i = _path.indexOf("/..")) >= 0 && i == pathend-3)
+    {
+        if ((limit = _path.lastIndexOf('/', i - 1)) >= 0)
+            newPath = _path.sub(0, limit+1).get();	// keep trailing slash
+        else
+        {
+            newPath = '/';
+            leadingdotdot++;
+        }
+        newPath << _path.sub(i + 3).get();
+        _path = newPath;
+        pathend = _path.indexOf('?');
+        if (pathend < 0)
+            pathend = _path.length();
+    }
+    // The RFC gives us a choice of what to do when we have .. left and
+    // we're at the top level. By principle of least surprise, we'll just
+    // toss any "leftovers" Otherwise, we'd have a loop here to add them.
+
+    // Finally change all "%7E" to "~" for sanity
+    while ((i = _path.indexOf("%7E")) >= 0 && i < pathend)
+      {
+        newPath = _path.sub(0, i).get();
+	newPath << "~";
+        newPath << _path.sub(i + 3).get();
+        _path = newPath;
+        pathend = _path.indexOf('?');
+        if (pathend < 0)
+            pathend = _path.length();
+      }
+
+    // If the server *isn't* case sensitive, we want to lowercase the path
+    if (!config->Boolean("case_sensitive", 1))
+      _path.lowercase();
+
+    // And don't forget to remove index.html or similar file.
+//    if (strcmp((char*)_service, "file") != 0)  (check is now internal)
+	removeIndex(_path, _service);
+}
+
+//*****************************************************************************
+// void URL::dump()
+//
+void URL::dump()
+{
+    cout << "service = " << _service.get() << endl;
+    cout << "user = " << _user.get() << endl;
+    cout << "host = " << _host.get() << endl;
+    cout << "port = " << _port << endl;
+    cout << "path = " << _path << endl;
+    cout << "url = " << _url << endl;
+}
+
+
+//*****************************************************************************
+// void URL::path(const String &newpath)
+//
+void URL::path(const String &newpath)
+{
+	HtConfiguration* config= HtConfiguration::config();
+    _path = newpath;
+    if (!config->Boolean("case_sensitive",1))
+      _path.lowercase();
+    constructURL();
+}
+
+
+//*****************************************************************************
+// void URL::removeIndex(String &path, String &service)
+//   Attempt to remove the remove_default_doc from the end of a URL path if
+//   the service allows that.  (File, ftp don't.  Do others?)
+//   This needs to be done to normalize the paths and make .../ the
+//   same as .../index.html
+// Called from: URL::normalize() from URL::signature()  [redundant?]
+// 		URL::normalizePath()
+//
+void URL::removeIndex(String &path, String &service)
+{
+	HtConfiguration* config= HtConfiguration::config();
+    static StringMatch *defaultdoc = 0;
+
+    if (strcmp((char*)_service, "file") == 0 ||
+        strcmp((char*)_service, "ftp")  == 0)
+	return;
+
+    if (path.length() == 0 || strchr((char*)path, '?'))
+	return;
+
+    int filename = path.lastIndexOf('/') + 1;
+    if (filename == 0)
+        return;
+
+    if (! defaultdoc)
+    {
+      StringList  l(config->Find("remove_default_doc"), " \t");
+      defaultdoc = new StringMatch();
+      defaultdoc->IgnoreCase();
+      defaultdoc->Pattern(l.Join('|'));
+    }
+    int which, length;
+    if (defaultdoc->hasPattern() &&
+	    defaultdoc->CompareWord((char*)path.sub(filename), which, length) &&
+	    filename+length == path.length())
+	path.chop(path.length() - filename);
+}
+
+
+//*****************************************************************************
+// void URL::normalize()
+//   Make sure that URLs are always in the same format.
+//
+void URL::normalize()
+{
+	HtConfiguration* config= HtConfiguration::config();
+    static int	hits = 0, misses = 0;
+
+    if (_service.length() == 0 || _normal)
+	return;
+
+    
+//  if (strcmp((char*)_service, "http") != 0)
+    // if service specifies "doesn't specify an IP host", don't normalize it
+    if (slashes (_service) != 2)
+	return;
+
+//    if (strcmp ((char*)_service, "http") == 0)  (check is now internal)
+	removeIndex(_path, _service);
+
+    //
+    // Convert a hostname to an IP address
+    //
+    _host.lowercase();
+
+    if (!config->Boolean("allow_virtual_hosts", 1))
+    {
+	static Dictionary	hostbyname;
+	unsigned long		addr;
+	struct hostent		*hp;
+
+	String	*ip = (String *) hostbyname[_host];
+	if (ip)
+	{
+	    memcpy((char *) &addr, ip->get(), ip->length());
+	    hits++;
+	}
+	else
+	{
+	    addr = inet_addr(_host.get());
+	    if (addr == 0xffffffff)
+	    {
+		hp = gethostbyname(_host.get());
+		if (hp == NULL)
+		{
+		    return;
+		}
+		memcpy((char *)&addr, (char *)hp->h_addr, hp->h_length);
+		ip = new String((char *) &addr, hp->h_length);
+		hostbyname.Add(_host, ip);
+		misses++;
+	    }
+	}
+
+	static Dictionary	machines;
+	String			key;
+	key << int(addr);
+	String			*realname = (String *) machines[key];
+	if (realname)
+	    _host = realname->get();
+	else
+	    machines.Add(key, new String(_host));
+    }
+    ServerAlias();
+    
+    //
+    // Reconstruct the url
+    //
+    constructURL();
+    _normal = 1;
+    _signature = 0;
+}
+
+
+//*****************************************************************************
+// const String &URL::signature()
+//   Return a string which uniquely identifies the server the current
+//   URL is refering to.
+//   This is the first portion of a url: service://user@host:port/
+//   (in short this is the URL pointing to the root of this server)
+//
+const String &URL::signature()
+{
+    if (_signature.length())
+	return _signature;
+
+    if (!_normal)
+	normalize();
+    _signature = _service;
+    _signature << "://";
+    if (_user.length())
+      _signature << _user << '@';
+    _signature << _host;
+    _signature << ':' << _port << '/';
+    return _signature;
+}
+
+//*****************************************************************************
+// void URL::ServerAlias()
+// Takes care of the server aliases, which attempt to simplify virtual
+// host problems
+//
+void URL::ServerAlias()
+{
+  HtConfiguration* config= HtConfiguration::config();
+  static Dictionary *serveraliases= 0;
+
+  if (! serveraliases)
+    {
+      String l= config->Find("server_aliases");
+      String from, *to;
+      serveraliases = new Dictionary();
+      char *p = strtok(l, " \t");
+      char *salias= NULL;
+      while (p)
+	{
+	  salias = strchr(p, '=');
+	  if (! salias)
+	    {
+	      p = strtok(0, " \t");
+	      continue;
+	    }
+	  *salias++= '\0';
+	  from = p;
+	  from.lowercase();
+	  if (from.indexOf(':') == -1)
+	    from.append(":80");
+	  to= new String(salias);
+	  to->lowercase();
+	  if (to->indexOf(':') == -1)
+	    to->append(":80");
+	  serveraliases->Add(from.get(), to);
+	  // fprintf (stderr, "Alias: %s->%s\n", from.get(), to->get());
+	  p = strtok(0, " \t");
+	}
+    }
+
+  String *al= 0;
+  int newport;
+  int delim;
+  String serversig = _host;
+  serversig << ':' << _port;
+  if ((al= (String *) serveraliases->Find(serversig)))
+    {
+      delim= al->indexOf(':');
+      // fprintf(stderr, "\nOld URL: %s->%s\n", (char *) serversig, (char *) *al);
+      _host= al->sub(0,delim).get();
+      sscanf((char*)al->sub(delim+1), "%d", &newport);
+      _port= newport;
+      // fprintf(stderr, "New URL: %s:%d\n", (char *) _host, _port);
+    }
+}
+
+//*****************************************************************************
+// int URL::slash(const String &protocol)
+// Returns number of slashes folowing the service name for protocol
+//
+int
+URL::slashes(const String &protocol)
+{
+    if (!slashCount)
+    {
+	HtConfiguration* config= HtConfiguration::config();
+	slashCount = new Dictionary();
+
+	slashCount->Add (String("mailto"), new String("0"));
+	slashCount->Add (String("news"),   new String("0"));
+	slashCount->Add (String("http"),   new String("2"));
+	slashCount->Add (String("ftp"),    new String("2"));
+	// file:///  has three, but the last counts as part of the path...
+	slashCount->Add (String("file"),   new String("2"));
+	
+	QuotedStringList	qsl(config->Find("external_protocols"), " \t");
+	String			from;
+	int			i;
+	int			sep,colon;
+
+	for (i = 0; qsl[i]; i += 2)
+	{
+	    from = qsl[i];
+	    sep = from.indexOf("->");
+	    if (sep != -1)
+		from = from.sub(0, sep).get();  // "get" aids portability...
+
+	    colon = from.indexOf(":");
+	    // if service specified as "help:/" or "man:", note trailing slashes
+	    // Default is 2.
+	    if (colon != -1)
+	    {
+		int i;
+		char count [2];
+		for (i = colon+1; from[i] == '/'; i++)
+		    ;
+		count [0] = i - colon + '0' - 1;
+		count [1] = '\0';
+		from = from.sub(0,colon).get();
+		slashCount->Add (from, new String (count));
+	    } else
+		slashCount->Add (from, new String ("2"));
+	}
+    }
+    
+    // Default to two slashes for unknown protocols
+    String *count = (String *)slashCount->Find(protocol);
+    return count ? (count->get()[0] - '0') : 2;
+}
+
+//*****************************************************************************
+// void URL::constructURL()
+// Constructs the _url member from everything else
+// Also ensures the port number is correct for the service
+// Called from  URL::URL(const String &url, const URL &parent)
+//		URL::parse(const String &u)
+//		URL::path(const String &newpath)
+//		URL::normalize()
+//
+void URL::constructURL()
+{
+    if (strcmp((char*)_service, "file") != 0 && _host.length() == 0) {
+	_url = "";
+	return;
+    }
+
+    _url = _service;
+    _url << ":";
+
+    // Add correct number of slashes after service name
+    int i;
+    for (i = slashes (_service); i > 0; i--)
+    {
+	_url << "/";
+    }
+
+    if (slashes (_service) == 2)	// services specifying a particular
+    {					// IP host must begin "service://"
+	if (strcmp((char*)_service, "file") != 0)
+	  {
+	    if (_user.length())
+	      _url << _user << '@';
+	    _url << _host;
+	  }
+
+       if (_port != DefaultPort() && _port != 0)  // Different than the default port
+	  _url << ':' << _port;
+    }
+
+    _url << _path;
+}
+
+
+///////
+   //    Get the default port for the recognised service
+///////
+
+int URL::DefaultPort()
+{
+   if (strcmp((char*)_service, "http") == 0)
+      return 80;
+   else if (strcmp((char*)_service, "https") == 0)
+      return 443;
+   else if (strcmp((char*)_service, "ftp") == 0)
+      return 21;
+   else if (strcmp((char*)_service, "gopher") == 0)
+      return 70;
+   else if (strcmp((char*)_service, "file") == 0)
+      return 0;
+   else if (strcmp((char*)_service, "news") == 0)
+      return NNTP_DEFAULT_PORT;
+   else return 80;
+}
author	Slávek Banko <slavek.banko@axis.cz>	2021-11-05 13:28:23 +0100
committer	Slávek Banko <slavek.banko@axis.cz>	2021-11-05 13:28:23 +0100
commit	8c787c3591c1c885b91a54128835b400858c5cca (patch)
tree	eca1b776912a305c4d45b3964038278a2fae1ead /debian/htdig/htdig-3.2.0b6/htcommon/URL.cc
parent	fe188b907cdf30dfdfe0eba9412e7f8749fec158 (diff)
download	extra-dependencies-8c787c3591c1c885b91a54128835b400858c5cca.tar.gz extra-dependencies-8c787c3591c1c885b91a54128835b400858c5cca.zip