1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
//
// Document.h
//
// Document: This class holds everything there is to know about a document.
// The actual contents of the document may or may not be present at
// all times for memory conservation reasons.
// The document can be told to retrieve its contents. This is done
// with the Retrieve call. In case the retrieval causes a
// redirect, the link is followed, but this process is done
// only once (to prevent loops.) If the redirect didn't
// work, Document_not_found is returned.
//
// Part of the ht://Dig package <http://www.htdig.org/>
// Copyright (c) 1995-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: Document.h,v 1.19 2004/05/28 13:15:14 lha Exp $
//
//
#ifndef _Document_h_
#define _Document_h_
#include "Parsable.h"
#include "Object.h"
#include "URL.h"
#include "htString.h"
#include "StringList.h"
#include "Transport.h"
#include "HtHTTP.h"
#include "HtFile.h"
#include "HtFTP.h"
#include "HtNNTP.h"
#include "ExternalTransport.h"
#include "Server.h"
class Connection;
class Document : public Object
{
public:
//
// Construction/Destruction
//
Document(char *url = 0, int max_size = 0);
~Document();
//
// Interface to the document.
//
void Reset();
int Length() {return document_length;}
int ContentLength() {return contentLength;}
int StoredLength() {return contents.length();}
char *Contents() {return contents;}
void Contents(char *s) {contents = s; document_length = contents.length();}
char *ContentType() {return contentType.get();}
//
// In case the retrieval process went through a redirect process,
// the new url can be gotten using the following call
//
char *Redirected() {return redirected_to;}
URL *Url() {return url;}
void Url(const String &url);
void Referer(const String &url);
time_t ModTime() {return modtime.GetTime_t();}
Transport::DocStatus Retrieve(Server *server, HtDateTime date);
Transport::DocStatus RetrieveLocal(HtDateTime date, StringList *filenames);
//
// Return an appropriate parsable object for the document type.
//
Parsable *getParsable();
//
// Set the username and password to be used in any requests
//
void setUsernamePassword(const String& credentials)
{ authorization = credentials;}
void setProxyUsernamePassword(const String& credentials)
{ proxy_authorization = credentials;}
HtHTTP *GetHTTPHandler() const { return HTTPConnect; }
private:
enum
{
Header_ok,
Header_not_found,
Header_not_changed,
Header_redirect,
Header_not_text,
Header_not_authorized
};
URL *url;
URL *proxy;
URL *referer;
String contents;
String redirected_to;
String contentType;
String authorization;
String proxy_authorization;
int contentLength;
int document_length;
HtDateTime modtime;
int max_doc_size;
int num_retries;
int UseProxy();
Transport *transportConnect;
HtHTTP *HTTPConnect;
HtHTTP *HTTPSConnect;
HtFile *FileConnect;
HtFTP *FTPConnect;
HtNNTP *NNTPConnect;
ExternalTransport *externalConnect;
///////
// Tell us if we should retry to retrieve an URL depending on
// the first returned document status
///////
int ShouldWeRetry(Transport::DocStatus DocumentStatus);
};
#endif
|