summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htcommon/DocumentRef.h
blob: 446ff6f76394e409d8a011f99d60e504a785bd5d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
//
// DocumentRef.h
//
// DocumentRef: Reference to an indexed document. Keeps track of all
//              information stored on the document, either by the dig 
//              or temporary search information.
//
// Part of the ht://Dig package   <http://www.htdig.org/>
// Copyright (c) 1995-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: DocumentRef.h,v 1.29 2004/05/28 13:15:12 lha Exp $
//

#ifndef _DocumentRef_h_
#define _DocumentRef_h_

#include "htString.h"
#include "List.h"
#include "HtWordList.h"

#include <time.h>

enum ReferenceState
{
    Reference_normal,
    Reference_not_found,
    Reference_noindex,
    Reference_obsolete
};

class DocumentRef : public Object
{
    public:
    //
    // Construction/Destruction
    //
    DocumentRef();
    ~DocumentRef();

    //
    // A DocumentRef can read itself from a character string and
    // convert itself into a character string
    //
    void		Serialize(String &s);
    void		Deserialize(String &s);

    //
    // Access to the members
    //
    int			DocID()				{return docID;}
    char		*DocURL()			{return docURL;}
    time_t		DocTime()			{return docTime;}
    char		*DocTitle()			{return docTitle;}
    char		*DocAuthor()			{return docAuthor;}
    char		*DocHead()			{return docHead;}
    int			DocHeadIsSet()			{return docHeadIsSet;}
    char                *DocMetaDsc()                   {return docMetaDsc;}
    time_t		DocAccessed()			{return docAccessed;}
    int			DocLinks()			{return docLinks;}
    int                 DocBackLinks()                  {return docBackLinks;}
    List		*Descriptions()			{return &descriptions;}
    ReferenceState	DocState()			{return docState;}
    int			DocSize()			{return docSize;}
    List		*DocAnchors()			{return &docAnchors;}
    double     		DocScore()			{return docScore;}
    int                 DocSig()                        {return docSig;}
    int			DocAnchor()			{return docAnchor;}
    int			DocHopCount()			{return docHopCount;}
    char		*DocEmail()			{return docEmail;}
    char		*DocNotification()		{return docNotification;}
    char		*DocSubject()			{return docSubject;}
	
    void		DocID(int d)			{docID = d;}
    void		DocURL(const char *u)		{docURL = u;}
    void		DocTime(time_t t)		{docTime = t;}
    void		DocTitle(const char *t)		{docTitle = t;}
    void		DocAuthor(const char *a)	{docAuthor = a;}
    void		DocHead(const char *h)		{docHeadIsSet = 1; docHead = h;}
    void                DocMetaDsc(const char *md)      {docMetaDsc = md;}
    void		DocAccessed(time_t t)		{docAccessed = t;}
    void		DocLinks(int l)			{docLinks = l;}
    void                DocBackLinks(int l)             {docBackLinks = l;}
    void		Descriptions(List &l)		{descriptions = l;}
    void		AddDescription(const char *d, HtWordList &words);
    void		DocState(ReferenceState s)	{docState = s;}
    void		DocState(int s);
    void		DocSize(int s)			{docSize = s;}
    void                DocSig(int s)                   {docSig = s;}
    void		DocAnchors(List &l)		{docAnchors = l;}
    void		AddAnchor(const char *a);
    void		DocScore(double s)		{docScore = s;}
    void		DocAnchor(int a)		{docAnchor = a;}
    void		DocHopCount(int h)		{docHopCount = h;}
    void		DocEmail(const char *e)		{docEmail = e;}
    void		DocNotification(const char *n)	{docNotification = n;}
    void		DocSubject(const char *s)	{docSubject = s;}
	
    void		Clear();			// Reset everything

    protected:
    //
    // These values will be stored when serializing
    //

    // This is the index number of the document in the database.
    int			docID;
    // This is the URL of the document.
    String		docURL;
    // This is the time specified in the document's header
    // Usually that's the last modified time, for servers that return it.
    time_t		docTime;
    // This is the time that the last retrieval occurred.
    time_t		docAccessed;
    // This is the stored excerpt of the document, just text.
    String		docHead;
    // This indicates if the stored excerpt of the document has been set.
    int			docHeadIsSet;
    // This is the document-specified description.
    // For HTML, that's the META description tag.
    String              docMetaDsc;
    // This is the title of the document.
    String		docTitle;
    // This is the author of the document, as specified in meta information
    String		docAuthor;
    // This is a list of Strings, the text of links pointing to this document.
    // (e.g. <a href="docURL">description</a>
    List		descriptions;
    // This is the state of the document--modified, normal, etc.
    ReferenceState	docState;
    // This is the size of the original document.
    int			docSize;
    // This is a count of the links in the document (outgoing links).
    int			docLinks;
    // This is a count of the links to the document (incoming links).
    int                 docBackLinks;
    // This is a list of the anchors in the document (i.e. <A NAME=...)
    List		docAnchors;
    // This is a count of the number of hops from start_urls to here.
    int			docHopCount;
    // This is a signature of the document. (e.g. md5sum, checksum...)
    // This is currently unused.
    long int		docSig;
	
    //
    // The following values are for the email notification of expiration
    //

    // This is the email destination for htnotify.
    String		docEmail;
    // This is the date that htnotify should use as comparison.
    String		docNotification;
    // This is the subject of the email sent out by htnotify.
    String		docSubject;

    //
    // This is used for searching and is not stored in the database
    //
    
    // This is the current score of this document.
    double			docScore;
    // This is the nearest anchor for the search word.
    int			docAnchor;

};

#endif