//
// Document.h
//
// Document: This class holds everything there is to know about a document.
// The actual contents of the document may or may not be present at
// all times for memory conservation reasons.
// The document can be told to retrieve its contents. This is done
// with the Retrieve call. In case the retrieval causes a
// redirect, the link is followed, but this process is done
// only once (to prevent loops.) If the redirect didn't
// work, Document_not_found is returned.
//
// Part of the ht://Dig package
// Copyright (c) 1995-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
//
//
// $Id: Document.h,v 1.19 2004/05/28 13:15:14 lha Exp $
//
//
#ifndef _Document_h_
#define _Document_h_
#include "Parsable.h"
#include "Object.h"
#include "URL.h"
#include "htString.h"
#include "StringList.h"
#include "Transport.h"
#include "HtHTTP.h"
#include "HtFile.h"
#include "HtFTP.h"
#include "HtNNTP.h"
#include "ExternalTransport.h"
#include "Server.h"
class Connection;
class Document : public Object
{
public:
//
// Construction/Destruction
//
Document(char *url = 0, int max_size = 0);
~Document();
//
// Interface to the document.
//
void Reset();
int Length() {return document_length;}
int ContentLength() {return contentLength;}
int StoredLength() {return contents.length();}
char *Contents() {return contents;}
void Contents(char *s) {contents = s; document_length = contents.length();}
char *ContentType() {return contentType.get();}
//
// In case the retrieval process went through a redirect process,
// the new url can be gotten using the following call
//
char *Redirected() {return redirected_to;}
URL *Url() {return url;}
void Url(const String &url);
void Referer(const String &url);
time_t ModTime() {return modtime.GetTime_t();}
Transport::DocStatus Retrieve(Server *server, HtDateTime date);
Transport::DocStatus RetrieveLocal(HtDateTime date, StringList *filenames);
//
// Return an appropriate parsable object for the document type.
//
Parsable *getParsable();
//
// Set the username and password to be used in any requests
//
void setUsernamePassword(const String& credentials)
{ authorization = credentials;}
void setProxyUsernamePassword(const String& credentials)
{ proxy_authorization = credentials;}
HtHTTP *GetHTTPHandler() const { return HTTPConnect; }
private:
enum
{
Header_ok,
Header_not_found,
Header_not_changed,
Header_redirect,
Header_not_text,
Header_not_authorized
};
URL *url;
URL *proxy;
URL *referer;
String contents;
String redirected_to;
String contentType;
String authorization;
String proxy_authorization;
int contentLength;
int document_length;
HtDateTime modtime;
int max_doc_size;
int num_retries;
int UseProxy();
Transport *transportConnect;
HtHTTP *HTTPConnect;
HtHTTP *HTTPSConnect;
HtFile *FileConnect;
HtFTP *FTPConnect;
HtNNTP *NNTPConnect;
ExternalTransport *externalConnect;
///////
// Tell us if we should retry to retrieve an URL depending on
// the first returned document status
///////
int ShouldWeRetry(Transport::DocStatus DocumentStatus);
};
#endif