summaryrefslogtreecommitdiffstats
path: root/akregator/src/librss
diff options
context:
space:
mode:
Diffstat (limited to 'akregator/src/librss')
-rw-r--r--akregator/src/librss/CMakeLists.txt28
-rw-r--r--akregator/src/librss/article.cpp32
-rw-r--r--akregator/src/librss/article.h2
-rw-r--r--akregator/src/librss/category.h2
-rw-r--r--akregator/src/librss/document.cpp4
-rw-r--r--akregator/src/librss/document.h2
-rw-r--r--akregator/src/librss/enclosure.h2
-rw-r--r--akregator/src/librss/global.h17
-rw-r--r--akregator/src/librss/image.cpp6
-rw-r--r--akregator/src/librss/image.h4
-rw-r--r--akregator/src/librss/loader.cpp30
-rw-r--r--akregator/src/librss/loader.h18
-rw-r--r--akregator/src/librss/test_data/atom_spec.xml42
-rw-r--r--akregator/src/librss/test_data/comment_api.xml411
-rw-r--r--akregator/src/librss/test_data/dublincore.xml42
-rw-r--r--akregator/src/librss/test_data/rdf.xml64
-rw-r--r--akregator/src/librss/test_data/rss091.xml50
-rw-r--r--akregator/src/librss/testlibrss.cpp259
-rw-r--r--akregator/src/librss/testlibrss.h22
-rw-r--r--akregator/src/librss/tools_p.cpp63
-rw-r--r--akregator/src/librss/tools_p.h2
21 files changed, 1012 insertions, 90 deletions
diff --git a/akregator/src/librss/CMakeLists.txt b/akregator/src/librss/CMakeLists.txt
index c2e7a001..23dc39a2 100644
--- a/akregator/src/librss/CMakeLists.txt
+++ b/akregator/src/librss/CMakeLists.txt
@@ -16,7 +16,6 @@ include_directories(
${TQT_INCLUDE_DIRS}
)
-
##### rsslocal (static) #########################
tde_add_library( rsslocal STATIC_PIC AUTOMOC
@@ -25,3 +24,30 @@ tde_add_library( rsslocal STATIC_PIC AUTOMOC
tools_p.cpp loader.cpp enclosure.cpp category.cpp
feeddetector.cpp
)
+
+tde_add_check_executable( testlibrss AUTOMOC
+ SOURCES testlibrss.cpp
+ LINK rsslocal-static ${TQT_LIBRARIES} tdeio-shared
+)
+
+set( TEST_DATA "${CMAKE_CURRENT_SOURCE_DIR}/test_data")
+
+add_test( NAME TestLibRSS_0.91
+ COMMAND testlibrss ${TEST_DATA}/rss091.xml
+)
+
+add_test( NAME TestLibRSS_CommentAPI
+ COMMAND testlibrss ${TEST_DATA}/comment_api.xml
+)
+
+add_test( NAME TestLibRSS_DublinCore
+ COMMAND testlibrss ${TEST_DATA}/dublincore.xml
+)
+
+add_test( NAME TestLibRSS_RDF
+ COMMAND testlibrss ${TEST_DATA}/rdf.xml
+)
+
+add_test( NAME TestLibRSS_AtomSpec
+ COMMAND testlibrss ${TEST_DATA}/atom_spec.xml
+)
diff --git a/akregator/src/librss/article.cpp b/akregator/src/librss/article.cpp
index 18522fe3..88d42a7d 100644
--- a/akregator/src/librss/article.cpp
+++ b/akregator/src/librss/article.cpp
@@ -92,13 +92,16 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new
d->link = elemText;
}
+ // prefer content/content:encoded over summary/description for feeds that provide it
+ if (format == AtomFeed)
+ {
+ d->description = extractNode(node, TQString::fromLatin1("content"), false);
+ }
+ else
+ {
+ d->description = extractElementTextNS(node, ContentNamespace, TQString::fromLatin1("encoded"), false);
+ }
- // prefer content/content:encoded over summary/description for feeds that provide it
- TQString tagName=(format==AtomFeed)? TQString::fromLatin1("content"): TQString::fromLatin1("content:encoded");
-
- if (!(elemText = extractNode(node, tagName, false)).isNull())
- d->description = elemText;
-
if (d->description.isEmpty())
{
if (!(elemText = extractNode(node, TQString::fromLatin1("body"), false)).isNull())
@@ -130,7 +133,7 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new
time = KRFCDate::parseDate(elemText);
}
- if (!(elemText = extractNode(node, TQString::fromLatin1("dc:date"))).isNull())
+ if (!(elemText = extractElementTextNS(node, DublinCoreNamespace, TQString::fromLatin1("date"))).isNull())
{
time = parseISO8601Date(elemText);
}
@@ -139,27 +142,22 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new
if (time != 0)
d->pubDate.setTime_t(time);
- if (!(elemText = extractNode(node, TQString::fromLatin1("wfw:comment"))).isNull()) {
- d->commentsLink = elemText;
- }
-
- if (!(elemText = extractNode(node, TQString::fromLatin1("slash:comments"))).isNull()) {
- d->numComments = elemText.toInt();
- }
+ d->commentsLink = extractElementTextNS(node, CommentAPINamespace, TQString::fromLatin1("comment"));
+ d->numComments = extractElementTextNS(node, SlashNamespace, TQString::fromLatin1("comments")).toInt();
TQDomElement element = TQDomNode(node).toElement();
// in RSS 1.0, we use <item about> attribute as ID
// FIXME: pass format version instead of checking for attribute
- if (!element.isNull() && element.hasAttribute(TQString::fromLatin1("rdf:about")))
+ if (!element.isNull() && element.hasAttributeNS(RDFNamespace, TQString::fromLatin1("about")))
{
- d->guid = element.attribute(TQString::fromLatin1("rdf:about")); // HACK: using ns properly did not work
+ d->guid = element.attributeNS(RDFNamespace, TQString::fromLatin1("about"), TQString::null);
d->guidIsPermaLink = false;
}
else
{
- tagName=(format==AtomFeed)? TQString::fromLatin1("id"): TQString::fromLatin1("guid");
+ TQString tagName=(format==AtomFeed)? TQString::fromLatin1("id"): TQString::fromLatin1("guid");
TQDomNode n = node.namedItem(tagName);
if (!n.isNull())
{
diff --git a/akregator/src/librss/article.h b/akregator/src/librss/article.h
index c27fdfe7..574f4840 100644
--- a/akregator/src/librss/article.h
+++ b/akregator/src/librss/article.h
@@ -34,7 +34,7 @@ namespace RSS
* is via Document::articles().
* @see Document::articles()
*/
- class KDE_EXPORT Article
+ class TDE_EXPORT Article
{
public:
/**
diff --git a/akregator/src/librss/category.h b/akregator/src/librss/category.h
index 0c40e418..c38a1c4e 100644
--- a/akregator/src/librss/category.h
+++ b/akregator/src/librss/category.h
@@ -33,7 +33,7 @@ class TQString;
namespace RSS
{
- class KDE_EXPORT Category
+ class TDE_EXPORT Category
{
public:
diff --git a/akregator/src/librss/document.cpp b/akregator/src/librss/document.cpp
index 3bc64d00..7d94a252 100644
--- a/akregator/src/librss/document.cpp
+++ b/akregator/src/librss/document.cpp
@@ -224,7 +224,7 @@ Document::Document(const TQDomDocument &doc) : d(new Private)
d->copyright = elemText;
if (d->format == AtomFeed)
- elemText = rootNode.toElement().attribute(TQString::fromLatin1("xml:lang"), TQString());
+ elemText = rootNode.toElement().attributeNS(XMLNamespace, "lang", TQString::null);
else
elemText = extractNode(channelNode, TQString::fromLatin1("language"));
@@ -441,7 +441,7 @@ Document::Document(const TQDomDocument &doc) : d(new Private)
d->pubDate.setTime_t(_time);
}
- if (!(elemText = extractNode(channelNode, TQString::fromLatin1("dc:date"))).isNull()) {
+ if (!(elemText = extractElementTextNS(channelNode, DublinCoreNamespace, "date")).isNull()) {
time_t _time = parseISO8601Date(elemText);
/* \bug This isn't really the right way since it will set the date to
* Jan 1 1970, 1:00:00 if the passed date was invalid; this means that
diff --git a/akregator/src/librss/document.h b/akregator/src/librss/document.h
index 8d098e7d..b876caf2 100644
--- a/akregator/src/librss/document.h
+++ b/akregator/src/librss/document.h
@@ -28,7 +28,7 @@ namespace RSS
* but rather use Loader::loadFrom() to produce a Document object.
* @see Loader::loadForm()
*/
- class KDE_EXPORT Document
+ class TDE_EXPORT Document
{
public:
/**
diff --git a/akregator/src/librss/enclosure.h b/akregator/src/librss/enclosure.h
index 154f8bc0..54ee2059 100644
--- a/akregator/src/librss/enclosure.h
+++ b/akregator/src/librss/enclosure.h
@@ -33,7 +33,7 @@ class TQString;
namespace RSS
{
- class KDE_EXPORT Enclosure
+ class TDE_EXPORT Enclosure
{
public:
diff --git a/akregator/src/librss/global.h b/akregator/src/librss/global.h
index 966521cc..8ff4e406 100644
--- a/akregator/src/librss/global.h
+++ b/akregator/src/librss/global.h
@@ -11,13 +11,28 @@
#ifndef LIBRSS_GLOBAL_H
#define LIBRSS_GLOBAL_H
-#include <kdemacros.h>
+#include <tdemacros.h>
template <class>
class TQValueList;
namespace RSS
{
+ /// The Atom 1.0 XML namespace.
+ constexpr const char *AtomNamespace = "http://www.w3.org/2005/Atom";
+ /// The CommentAPI XML namespace.
+ constexpr const char *CommentAPINamespace = "http://wellformedweb.org/CommentAPI/";
+ /// The Content XML namespace.
+ constexpr const char *ContentNamespace = "http://purl.org/rss/1.0/modules/content/";
+ /// The Dublin Core XML namespace.
+ constexpr const char *DublinCoreNamespace = "http://purl.org/dc/elements/1.1/";
+ /// The RDF Concepts Vocabulary (RDF) namespace.
+ constexpr const char *RDFNamespace = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+ /// The Slash XML namespace.
+ constexpr const char *SlashNamespace = "http://purl.org/rss/1.0/modules/slash/";
+ /// The XML namespace.
+ constexpr const char *XMLNamespace = "http://www.w3.org/XML/1998/namespace";
+
/**
* Versions currently supported by this library. This enumeration is
* subject to be extended in the future and used by Document::version() to
diff --git a/akregator/src/librss/image.cpp b/akregator/src/librss/image.cpp
index 174a105b..65aaf539 100644
--- a/akregator/src/librss/image.cpp
+++ b/akregator/src/librss/image.cpp
@@ -112,9 +112,9 @@ void Image::getPixmap()
d->pixmapBuffer->open(IO_WriteOnly);
d->job = TDEIO::get(d->url, false, false);
- connect(d->job, TQT_SIGNAL(data(TDEIO::Job *, const TQByteArray &)),
- this, TQT_SLOT(slotData(TDEIO::Job *, const TQByteArray &)));
- connect(d->job, TQT_SIGNAL(result(TDEIO::Job *)), this, TQT_SLOT(slotResult(TDEIO::Job *)));
+ connect(d->job, TQ_SIGNAL(data(TDEIO::Job *, const TQByteArray &)),
+ this, TQ_SLOT(slotData(TDEIO::Job *, const TQByteArray &)));
+ connect(d->job, TQ_SIGNAL(result(TDEIO::Job *)), this, TQ_SLOT(slotResult(TDEIO::Job *)));
}
void Image::slotData(TDEIO::Job *, const TQByteArray &data)
diff --git a/akregator/src/librss/image.h b/akregator/src/librss/image.h
index 299b4292..d98a59fc 100644
--- a/akregator/src/librss/image.h
+++ b/akregator/src/librss/image.h
@@ -31,9 +31,9 @@ namespace RSS
* is via Document::image().
* @see Document::image()
*/
- class KDE_EXPORT Image : public TQObject
+ class TDE_EXPORT Image : public TQObject
{
- Q_OBJECT
+ TQ_OBJECT
public:
/**
diff --git a/akregator/src/librss/loader.cpp b/akregator/src/librss/loader.cpp
index 8e2967b7..8674dfb7 100644
--- a/akregator/src/librss/loader.cpp
+++ b/akregator/src/librss/loader.cpp
@@ -13,7 +13,7 @@
#include "feeddetector.h"
#include <tdeio/job.h>
-#include <kprocess.h>
+#include <tdeprocess.h>
#include <kstaticdeleter.h>
#include <kurl.h>
#include <kdebug.h>
@@ -111,13 +111,13 @@ void FileRetriever::retrieveData(const KURL &url)
d->job->addMetaData("UserAgent", ua);
- TQTimer::singleShot(1000*90, this, TQT_SLOT(slotTimeout()));
+ TQTimer::singleShot(1000*90, this, TQ_SLOT(slotTimeout()));
- connect(d->job, TQT_SIGNAL(data(TDEIO::Job *, const TQByteArray &)),
- TQT_SLOT(slotData(TDEIO::Job *, const TQByteArray &)));
- connect(d->job, TQT_SIGNAL(result(TDEIO::Job *)), TQT_SLOT(slotResult(TDEIO::Job *)));
- connect(d->job, TQT_SIGNAL(permanentRedirection(TDEIO::Job *, const KURL &, const KURL &)),
- TQT_SLOT(slotPermanentRedirection(TDEIO::Job *, const KURL &, const KURL &)));
+ connect(d->job, TQ_SIGNAL(data(TDEIO::Job *, const TQByteArray &)),
+ TQ_SLOT(slotData(TDEIO::Job *, const TQByteArray &)));
+ connect(d->job, TQ_SIGNAL(result(TDEIO::Job *)), TQ_SLOT(slotResult(TDEIO::Job *)));
+ connect(d->job, TQ_SIGNAL(permanentRedirection(TDEIO::Job *, const KURL &, const KURL &)),
+ TQ_SLOT(slotPermanentRedirection(TDEIO::Job *, const KURL &, const KURL &)));
}
void FileRetriever::slotTimeout()
@@ -207,10 +207,10 @@ void OutputRetriever::retrieveData(const KURL &url)
d->buffer->open(IO_WriteOnly);
d->process = new KShellProcess();
- connect(d->process, TQT_SIGNAL(processExited(TDEProcess *)),
- TQT_SLOT(slotExited(TDEProcess *)));
- connect(d->process, TQT_SIGNAL(receivedStdout(TDEProcess *, char *, int)),
- TQT_SLOT(slotOutput(TDEProcess *, char *, int)));
+ connect(d->process, TQ_SIGNAL(processExited(TDEProcess *)),
+ TQ_SLOT(slotExited(TDEProcess *)));
+ connect(d->process, TQ_SIGNAL(receivedStdout(TDEProcess *, char *, int)),
+ TQ_SLOT(slotOutput(TDEProcess *, char *, int)));
*d->process << url.path();
d->process->start(TDEProcess::NotifyOnExit, TDEProcess::Stdout);
}
@@ -268,7 +268,7 @@ Loader *Loader::create()
Loader *Loader::create(TQObject *object, const char *slot)
{
Loader *loader = create();
- connect(loader, TQT_SIGNAL(loadingComplete(Loader *, Document, Status)),
+ connect(loader, TQ_SIGNAL(loadingComplete(Loader *, Document, Status)),
object, slot);
return loader;
}
@@ -290,8 +290,8 @@ void Loader::loadFrom(const KURL &url, DataRetriever *retriever)
d->url=url;
d->retriever = retriever;
- connect(d->retriever, TQT_SIGNAL(dataRetrieved(const TQByteArray &, bool)),
- this, TQT_SLOT(slotRetrieverDone(const TQByteArray &, bool)));
+ connect(d->retriever, TQ_SIGNAL(dataRetrieved(const TQByteArray &, bool)),
+ this, TQ_SLOT(slotRetrieverDone(const TQByteArray &, bool)));
d->retriever->retrieveData(url);
}
@@ -351,7 +351,7 @@ void Loader::slotRetrieverDone(const TQByteArray &data, bool success)
TQByteArray tmpData;
tmpData.setRawData(charData, len);
- if (doc.setContent(tmpData))
+ if (doc.setContent(tmpData, /* namespaceProcessing */ true))
{
rssDoc = Document(doc);
if (!rssDoc.isValid())
diff --git a/akregator/src/librss/loader.h b/akregator/src/librss/loader.h
index ed22da22..ce93fc25 100644
--- a/akregator/src/librss/loader.h
+++ b/akregator/src/librss/loader.h
@@ -32,9 +32,9 @@ namespace RSS
* a new retrieval algorithm which can then be plugged into the RSS loader.
* @see Loader, FileRetriever, OutputRetriever
*/
- class KDE_EXPORT DataRetriever : public TQObject
+ class TDE_EXPORT DataRetriever : public TQObject
{
- Q_OBJECT
+ TQ_OBJECT
public:
/**
@@ -87,9 +87,9 @@ namespace RSS
* Implements a file retriever, to be used with Loader::loadFrom().
* @see DataRetriever, Loader::loadFrom()
*/
- class KDE_EXPORT FileRetriever : public DataRetriever
+ class TDE_EXPORT FileRetriever : public DataRetriever
{
- Q_OBJECT
+ TQ_OBJECT
public:
/**
@@ -161,7 +161,7 @@ namespace RSS
*/
class OutputRetriever : public DataRetriever
{
- Q_OBJECT
+ TQ_OBJECT
public:
/**
@@ -211,8 +211,8 @@ namespace RSS
*
* \code
* Loader *loader = Loader::create();
- * connect(loader, TQT_SIGNAL(loadingComplete(Loader *, Document, Status)),
- * this, TQT_SLOT(slotLoadingComplete(Loader *, Document, Status)));
+ * connect(loader, TQ_SIGNAL(loadingComplete(Loader *, Document, Status)),
+ * this, TQ_SLOT(slotLoadingComplete(Loader *, Document, Status)));
* loader->loadFrom("http://www.blah.org/foobar.rdf", new FileRetriever);
* \endcode
*
@@ -254,9 +254,9 @@ namespace RSS
* loadingComplete signal goes out of scope. This is e.g. the case if you
* intend to call getPixmap() on Document::image()!
*/
- class KDE_EXPORT Loader : public TQObject
+ class TDE_EXPORT Loader : public TQObject
{
- Q_OBJECT
+ TQ_OBJECT
friend class someClassWhichDoesNotExist;
public:
diff --git a/akregator/src/librss/test_data/atom_spec.xml b/akregator/src/librss/test_data/atom_spec.xml
new file mode 100644
index 00000000..b8e3dff4
--- /dev/null
+++ b/akregator/src/librss/test_data/atom_spec.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us">
+ <title type="text">dive into mark</title>
+ <subtitle type="html">
+ A &lt;em&gt;lot&lt;/em&gt; of effort
+ went into making this effortless
+ </subtitle>
+ <updated>2005-07-31T12:29:29Z</updated>
+ <id>tag:example.org,2003:3</id>
+ <link rel="alternate" type="text/html" hreflang="en" href="http://example.org/"/>
+ <link rel="self" type="application/atom+xml" href="http://example.org/feed.atom"/>
+ <rights>Copyright (c) 2003, Mark Pilgrim</rights>
+ <generator uri="http://www.example.com/" version="1.0">
+ Example Toolkit
+ </generator>
+ <entry>
+ <title>Atom draft-07 snapshot</title>
+ <link rel="alternate" type="text/html" href="http://example.org/2005/04/02/atom"/>
+ <link rel="enclosure" type="audio/mpeg" length="1337" href="http://example.org/audio/ph34r_my_podcast.mp3"/>
+ <id>tag:example.org,2003:3.2397</id>
+ <updated>2005-07-31T12:29:29Z</updated>
+ <published>2003-12-13T08:29:29-04:00</published>
+ <author>
+ <name>Mark Pilgrim</name>
+ <uri>http://example.org/</uri>
+ <email>f8dy@example.com</email>
+ </author>
+ <contributor>
+ <name>Sam Ruby</name>
+ </contributor>
+ <contributor>
+ <name>Joe Gregorio</name>
+ </contributor>
+ <content type="xhtml" xml:lang="en" xml:base="http://diveintomark.org/">
+ <div xmlns="http://www.w3.org/1999/xhtml">
+ <p>
+ <i>[Update: The Atom draft is finished.]</i>
+ </p>
+ </div>
+ </content>
+ </entry>
+</feed>
diff --git a/akregator/src/librss/test_data/comment_api.xml b/akregator/src/librss/test_data/comment_api.xml
new file mode 100644
index 00000000..7bbb29ae
--- /dev/null
+++ b/akregator/src/librss/test_data/comment_api.xml
@@ -0,0 +1,411 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw='http://wellformedweb.org/CommentAPI/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rl='http://www.purl.org/RESTLog/'>
+ <channel>
+ <title>The Well-Formed Web</title>
+ <link>http://wellformedweb.org/news/</link>
+ <description>Exploring the limits of XML and HTTP</description>
+ <dc:creator>BitWorking, Inc</dc:creator>
+ <item>
+ <title>Should you use Content Negotiation in your Web Services?</title>
+ <link>http://bitworking.org/news/WebServicesAndContentNegotiation</link>
+ <description>
+ &lt;p&gt;Should you use Content Negotiation when building your web service?
+The short answer is no. There are definite problems with &lt;abbrev title="Content Negotiation"&gt;conneg&lt;/abbrev&gt;
+and I can give some examples of problems I have run into and also point to problems
+other have run into.&lt;/p&gt;
+
+
+&lt;p&gt;First let's back up and explain Content Negotiation. Your browser is
+ a generic display program and can take in various kinds of media, such
+ as HTML, JPEGs, CSS, Flash, etc. and display it for you. The first thing to
+ note is that each of those kinds of media have different mime types.
+ Each format has it's own registered mime type and when a client
+ does a GET on a URL it gets back not only the content but the response
+ also includes a &lt;code&gt;Content-Type:&lt;/code&gt; header which lists
+ the mime-type of what is in the body.
+&lt;/p&gt;
+
+&lt;p&gt;One of the interesting things about HTTP is that it allows
+ the same URI to have multiple representations. For example I
+ could have a URL that had both &lt;code&gt;plain/text&lt;/code&gt; and &lt;code&gt;text/html&lt;/code&gt;
+ representations. Now that leads to two obvious questions.&lt;/p&gt;
+
+&lt;ol&gt;
+ &lt;li&gt;How does the server know which represenation to serve?&lt;/li&gt;
+ &lt;li&gt;How can the browser influence the servers choice to get something it can handle?&lt;/li&gt;
+&lt;/ol&gt;
+
+&lt;p&gt;Let's start by answering question two first. The browser uses the &lt;code&gt;Accept:&lt;/code&gt;
+ header to list out the mime-types that it is willing to accept. There is also a weighting
+ scheme that allows the client to specify a preference for one media type
+ over another. For example, here is the capture of some of the headers, including the &lt;code&gt;Accept:&lt;/code&gt; header,
+ sent by Mozilla when it does a GET on a URI:&lt;/p&gt;
+
+&lt;pre class="example"&gt;&lt;code&gt;Accept: text/xml,application/xml,application/xhtml+xml,\
+ text/html;q=0.9,text/plain;q=0.8,video/x-mng,\
+ image/png,image/jpeg,image/gif;q=0.2,*/*;q=0.1
+Accept-Language: en-us,en;q=0.5
+Accept-Encoding: gzip,deflate,compress;q=0.9
+Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7
+&lt;/code&gt;&lt;/pre&gt;
+
+&lt;p&gt;The &lt;code&gt;Accept:&lt;/code&gt; header list the mime-types that the browser can
+ handle along with weights of the form &lt;code&gt;q=&lt;/code&gt; where the argument
+ is a floating point number between 0 and 1. The weights indicate a preference
+ for that media type, with a higher number inidicating a higher preference. Note that
+ there are several bits of complexity I am going to ignore for now. The first is the last
+ type the Mozilla browser says in can accept, */*;q=0.1. This is a wild card
+ match, which will match any mime-type that the server could want to serve up. The second
+ is that there are multiple Accept headers, one for language, one for encoding, another
+ for charset. How these over-lap and influence the response sent won't be covered here.
+&lt;/p&gt;
+
+&lt;p&gt;Now to answer the first question. The server looks at the available representations
+ is has and servers up the one with the highest preference to the client.
+ Based on the &lt;code&gt;Accept:&lt;/code&gt;
+ header it sends an appropriate representation back and indicates the type it
+ chose using the &lt;code&gt;Content-Type:&lt;/code&gt; header.&lt;/p&gt;
+
+&lt;p&gt;This seems like a really cool and vastly under utilized feature of HTTP. It also
+ seems particularly intriguing for web services. You could return
+ JPEGs from that mapping service for the older client platforms, but also
+ serve up SVG for the newer clients so they can scale and rotate their maps.
+ What could possibly go wrong?&lt;/p&gt;
+
+&lt;p&gt;The first thing that could go wrong is a bug or mis-configuration on the client or the server.
+ This has happened to me in the
+ past. The W3C does conneg on some of their recommendations, returning either HTML or plain
+ text based on the clients capabilities. This is fine, but one day their server was
+ either confused or mis-configured because it would only serve the recommendation in &lt;code&gt;plain/text&lt;/code&gt;.
+ I really needed the HTML form, but after trying multiple browsers from multipe locations I could only retrieve the text
+ format. I ended up pulling the HTML version out of the Google cache.&lt;/p&gt;
+
+&lt;p&gt;The second problem that I ran across highlights the real core problem with conneg. I was
+ trying to use the W3C XSLT service to do some transformations on my web pages. Now the server side
+ software I use to run Well-Formed Web does conneg and can return either HTML or an RSS item
+ fragment for each URI. At the time I was serving up XHTML 1.0, which is valid XML and
+ thus good input into an XSLT service. So the way the XSLT service works is that you enter two URIs, one
+ for the source content and the other for the XSLT sheet to apply to the source content.
+ My transformation kept failing and it was because of the
+ Accept headers that the XSLT service sent when it went to retrieve the source content.
+ My server kept returning the RSS item fragment and not
+ the XHTML. Now this would have been fine if I wanted to apply an XSLT sheet to my RSS item fragment, but in this
+ case I wanted it to apply to the XHTML. Note that the problem could have been completely reversed, I could have
+ been trying to apply the XSLT to the RSS item and not to the XHTML and my server could have returned
+ the XHTML all the time. The crux of the problem is that when I gave the URI to the XSLT transformation
+ service I have no way of specifying what mime-type to request. I get no chance to tweak the
+ services &lt;code&gt;Accept:&lt;/code&gt; header.
+&lt;/p&gt;
+
+&lt;p&gt;Let's cover that again to clarify. If I hand you a URI only, and that URI supports conneg,
+ then I get no control over which representation you retrieve. In the cases where you are
+ passing a URI into a service that is later going to retrieve a represenation from that URI, you
+ really have no idea which representation it's going to get. That could mean that you end up
+ passing your RSS feed to the W3C HTML validator, or you end up passing XHTML instead of RSS into
+ an XSLT translator service, or you end up passing a 12MB PNG to a handheld instead of
+ that 20KB SVG file. You end up with a problem that is hard to debug and
+ one that wouldn't exist if each URI had only one mime-type.&lt;/p&gt;
+
+&lt;h3&gt;Further Reading&lt;/h3&gt;
+&lt;p&gt;&lt;a href="http://norman.walsh.name/2003/07/02/conneg"&gt;Norman Walsh has also run into problems&lt;/a&gt; with Content Negotiation.&lt;/p&gt;
+&lt;p&gt;The issue of using fragment identifiers with conneg has not only come up but was important enough to
+ merit mention in the W3C document &lt;a href="http://www.w3.org/TR/webarch/#frag-conneg"&gt;Architecture of the World Wide Web&lt;/a&gt;.&lt;/p&gt;
+
+
+ </description>
+
+ <dc:date>2003-09-06T21:54:43-05:00</dc:date>
+ <wfw:comment>http://bitworking.org/news/comments/WebServicesAndContentNegotiation</wfw:comment>
+ <wfw:commentRss>http://bitworking.org/news/WebServicesAndContentNegotiation?crss</wfw:commentRss>
+ </item>
+ <item>
+ <title>Google2Atom</title>
+ <link>http://wellformedweb.org/news/Google2Atom</link>
+ <description>
+&lt;p&gt;Welcome to the Google2Atom web service. Just enter your
+ search and your &lt;a href="http://www.google.com/apis/"&gt;Google key&lt;/a&gt;
+ below. Once you press "Search" you will get an &lt;a href="http://www.mnot.net/drafts/draft-nottingham-atom-format-00.html"&gt;
+ Atom&lt;/a&gt; feed of the search results.
+&lt;/p&gt;
+
+&lt;form method="get" action="http://wellformedweb.org/cgi-bin/google2atom.cgi"&gt;
+&lt;p&gt;&lt;input size="50" name="q"/&gt;&lt;/p&gt;
+&lt;p&gt;Google Key: &lt;input size="20" name="license_key"/&gt;&lt;/p&gt;
+&lt;p&gt;&lt;input type="submit" value=" Search "/&gt;&lt;/p&gt;
+&lt;/form&gt;
+
+&lt;hr /&gt;
+
+&lt;p&gt;&lt;strong&gt;Note:&lt;/strong&gt; The Google Key is no longer mandatory, if it's not
+ supplied it will use my own key. In light of that please feel free to
+ use my key for experimentation, but if you start making heavy use
+ of this service please get your own Google API Key to avoid
+ limiting others use of this service.&lt;/p&gt;
+
+&lt;p&gt;This is a REST based reformulation of the Google API. As such it uses
+ query parameters in a GET based HTTP request to do the search. That is, it works
+ just like the regular google web page, but this form returns
+ a well-formed XML document instead of a web page. Why is this better?&lt;/p&gt;
+
+&lt;dl&gt;
+ &lt;dt&gt;Simplicity&lt;/dt&gt;
+ &lt;dd&gt;
+ It works just like the google web page, so it is
+ conceptually easier to understand.
+ &lt;/dd&gt;
+
+ &lt;dt&gt;Composability&lt;/dt&gt;
+ &lt;dd&gt;Since the request is just a simple GET the results of a query can be composed
+ with other web services. For example, the results could be transformed using
+ XSLT or fed into a validator.
+ &lt;/dd&gt;
+&lt;/dl&gt;
+
+&lt;h3&gt;Bonus Features&lt;/h3&gt;
+
+&lt;p&gt;One feature found in this interface that is not found
+ in the original Google API is the well-formedness of the
+ results content.
+ &lt;a href="http://bitworking.org/news/Announcing_pyTidy"&gt;PyTidy&lt;/a&gt;
+ is used to transform the HTML
+ snippets from the Google API into well-formed XML and place
+ those into 'content' elements with type='text/html' and
+ mode='xml'.
+&lt;/p&gt;
+
+&lt;h3&gt;Colophon&lt;/h3&gt;
+
+&lt;p&gt;Google2Atom is written in &lt;a href="http://www.python.org"&gt;Python&lt;/a&gt; and uses
+ both the &lt;a href="http://bitworking.org/news/Announcing_pyTidy"&gt;
+ pyTidy&lt;/a&gt; and &lt;a href="http://www.diveintomark.org/projects/pygoogle/"&gt;
+ pyGoogle&lt;/a&gt; libraries.&lt;/p&gt;
+
+ </description>
+
+ <dc:date>2003-11-22T01:18:42-05:00</dc:date>
+ <wfw:comment>http://wellformedweb.org/news/comments/Google2Atom</wfw:comment>
+ <wfw:commentRss>http://wellformedweb.org/news/Google2Atom?crss</wfw:commentRss>
+ </item>
+ <item>
+ <title>wfw namespace elements</title>
+ <link>http://wellformedweb.org/news/wfw_namespace_elements</link>
+ <description>
+ &lt;p&gt;The &lt;code&gt;wfw&lt;/code&gt; namespace, http://wellformedweb.org/CommentAPI/
+contains multiple elements. As more are added in various places I will
+endeavor to keep the list here updated.&lt;/p&gt;
+
+&lt;dl&gt;
+ &lt;dt&gt;wfw:comment&lt;/dt&gt;
+ &lt;dd&gt;The first element to appear in this namespace is &lt;code&gt;comment&lt;/code&gt;. This element appears
+ in RSS feeds and contains the URI that comment entries are to be POSTed to. The details
+ of this are outlined in the &lt;a href="http://wellformedweb.org/story/9"&gt;CommentAPI Specification&lt;/a&gt;.&lt;dd&gt;
+
+ &lt;dt&gt;wfw:commentRss&lt;/dt&gt;
+ &lt;dd&gt;The second element to appear in the wfw namespace is &lt;code&gt;commentRss&lt;/code&gt;. This element
+ also appears in RSS feeds and contains the URI of the RSS feed for comments on that Item.
+ This is documented in &lt;a href="http://www.sellsbrothers.com/spout/default.aspx?content=archive.htm#exposingRssComments"&gt;Chris Sells' Specification&lt;/a&gt;. Note that for quite a while this page has had a typo and erroneously referred to
+ this element as 'commentRSS' as opposed to the correct 'commentRss'. Feed consumers should be aware
+ that they may run into both spellings in the wild. Please see
+ &lt;a href="http://www.intertwingly.net/blog/2006/04/16/commentRss"&gt;this page&lt;/a&gt; for
+ more information.
+ &lt;/dd&gt;
+&lt;/dl&gt;
+ </description>
+
+ <dc:date>2003-10-10T13:11:46-05:00</dc:date>
+ <wfw:comment>http://wellformedweb.org/news/comments/wfw_namespace_elements</wfw:comment>
+ <wfw:commentRss>http://wellformedweb.org/news/wfw_namespace_elements?crss</wfw:commentRss>
+ </item>
+ <item>
+ <title>The HTTP verb PUT under Apache: Safe or Dangerous?</title>
+ <link>http://wellformedweb.org/news/PUT_SaferOrDangerous</link>
+ <description>
+ &lt;p&gt;"Is the HTTP verb PUT under Apache safe or dangerous?" This is a question I come across often, and have now
+ run into it twice in the work on Atom. So is it safe? The answer is maybe.&lt;/p&gt;
+&lt;p&gt;Here are two such examples:&lt;/p&gt;
+
+&lt;blockquote&gt;&lt;p&gt;
+ Using DELETE and PUT may be the "right thing to do"
+ in an ideal world, but the fact of the matter is that a
+ lot -- if not the vast majority -- of webservers do not allow these
+ operations. &lt;/p&gt;&lt;/blockquote&gt;
+
+&lt;blockquote&gt;&lt;p&gt;If anyone knows of a newer article describing
+ HTTP PUT with apache, I would be very interested in seeing it. Because,
+ due to my experience with PUT, you have to define a single PUTScript in
+ httpd.conf, and if you PUT something to an apache server at the URI
+ www.example.com/blog/entries/1 or something similar, apache passes all
+ of the information to the PUTScript, not to anything else.&lt;/p&gt;&lt;/blockquote&gt;
+
+&lt;p&gt;Both of the above quotes are from the &lt;a href="http://www.intertwingly.net/wiki/pie/RestEchoApiPutAndDelete"&gt;Atom Wiki discussion
+ of the use of PUT&lt;/a&gt;. A little digging reveals that the ApacheWeek article
+ &lt;a href="http://www.apacheweek.com/features/put"&gt;Publishing Pages with PUT&lt;/a&gt;
+ is referenced most often when the danger of PUT is raised. &lt;p&gt;
+
+&lt;p&gt;That ApacheWeek article does talk about the dangers of PUT and
+ the cautions you need to follow when writing a script that
+ does content publishing via PUT. That key part of that phrase
+ is &lt;strong&gt;content publishing&lt;/strong&gt;. That means that PUT is being
+ used to upload arbitrary content to the server and the client
+ is determining via the URI where the content should be stored.
+ Now you can imagine how this might be dangerous, for example
+ not correctly checking URI paths that include &lt;code&gt;../..&lt;/code&gt; could
+ let a malicious agent re-write your &lt;code&gt;.bashrc&lt;/code&gt;.&lt;/p&gt;
+
+&lt;p&gt;Implementing a PUT script can be difficult and a security hazard
+ in the context of content publishing, but that's the case because
+ the client is choosing the target URI and the client could upload
+ any content type. In the case of Web Services in general, and
+ the AtomAPI in particular, PUT is used in a much narrower manner
+ and avoids those potential security problems.&lt;/p&gt;
+
+&lt;p&gt;In the case of the AtomAPI PUT is only allowed on URIs that point
+ to a pre-existing resource. The
+ AtomAPI follows a general idiom for editing resources of doing
+ a GET to retrieve the original XML, then a PUT on the same URI
+ to upate that resource with the edited XML. No URIs are created
+ by doing a PUT. PUT is not accepted on arbitrary URIs. This makes
+ the use of PUT in the context of the AtomAPI just as safe as POST.&lt;/p&gt;
+
+&lt;p&gt;There are quite a few ways to configure Apache to process
+ incoming requests. In particular it is possible to have a single
+ script that handles all PUT requests below a chosen directory. This
+ strategy, and all of the associated security concerns associated with
+ it, are covered fully in the &lt;a href="http://www.apacheweek.com/features/put"&gt;Publishing Pages with PUT&lt;/a&gt;.&lt;/p&gt;
+
+&lt;p&gt;When processing request with a CGI script all the PUT requests
+ will come through. The verb is passed to the CGI program via the REQUEST_METHOD environment
+ variable, and the program decides what to do with the content.&lt;/p&gt;
+
+&lt;p&gt;Using PUT propoerly has advantages in Web Service development. First,
+ Apache lets you control security based on the verb using the
+ &lt;a href="http://httpd.apache.org/docs-2.0/mod/core.html#limit"&gt;Limit&lt;/a&gt;
+ and &lt;a href="http://httpd.apache.org/docs-2.0/mod/core.html#limitexcept"&gt;LimitExcept&lt;/a&gt;
+ directives, which
+ let you restrict access controls based on the verb. Here is a sample
+ of one of my &lt;code&gt;.htaccess&lt;/code&gt; files that restricts the use of
+ all verbs except GET to the CGI program &lt;code&gt;Bulu.cgi.&lt;/code&gt;&lt;/p&gt;
+
+&lt;pre class="example"&gt;&lt;code&gt;&amp;lt;Files Bulu.cgi&gt;
+AuthType Basic
+AuthName myrealm
+AuthUserFile /path/to/my/password/file
+ &amp;lt;LimitExcept GET&gt;
+ Require valid-user
+ &amp;lt;/LimitExcept&gt;
+&amp;lt;/Files&gt;
+&lt;/code&gt;&lt;/pre&gt;
+
+&lt;p&gt;In addition, the &lt;a href="http://httpd.apache.org/docs-2.0/mod/mod_actions.html#script"&gt;Script&lt;/a&gt;
+ directive can be used to dispatch to a CGI program based on the verb used:&lt;/p&gt;
+
+&lt;pre class="example"&gt;&lt;code&gt;Script PUT /cgi-bin/put.cgi&lt;/code&gt;&lt;/pre&gt;
+
+&lt;p&gt;The second advantage using PUT brings is clarity. Given the idiom
+ of using GET/PUT in tandem on a URI to edit resources PUT
+ clearly signals what the interface is doing.&lt;/p&gt;
+
+&lt;h4&gt;Resources&lt;/h4&gt;
+
+&lt;p&gt;&lt;a href="http://www.apacheweek.com"&gt;ApacheWeek&lt;/a&gt;: &lt;a href="http://www.apacheweek.com/features/put"&gt;Publishing Pages with PUT&lt;/a&gt;&lt;/p&gt;
+&lt;p&gt;&lt;a href="http://www.intertwingly.net/wiki/pie/RestEchoApiPutAndDelete"&gt;RestEchoApiPutAndDelete&lt;/a&gt;: Discussion on the use of PUT
+ and DELETE in the AtomAPI.&lt;/p&gt;
+&lt;p&gt;&lt;a href="http://httpd.apache.org/docs-2.0/mod/mod_actions.html"&gt;mod_actions&lt;/a&gt;: An Apache module for
+ controlling dispatching based on verb or content-type.&lt;/p&gt;
+&lt;p&gt;&lt;a href="http://www.w3.org/Amaya/User/Put.html"&gt;Configuring your WWW server to understand the PUT method&lt;/a&gt;, from the W3Cs Amaya project documentation.&lt;/p&gt;
+&lt;p&gt;&lt;a href="http://www.webdav.org/"&gt;WebDAV&lt;/a&gt; is also something you may be interested in if you
+ are looking for ways to publish your content using HTTP. WebDAV stands for
+ "Web-based Distributed Authoring and Versioning". It is a set of extensions to the HTTP
+ protocol which allows users to collaboratively edit and manage files on remote web servers.
+ &lt;a href="http://httpd.apache.org/docs-2.0/mod/mod_dav.html"&gt;
+ Mod_dav&lt;/a&gt; in an Apache module that implements WebDAV.&lt;/p&gt;
+
+
+
+ </description>
+
+ <dc:date>2003-08-23T00:45:25-05:00</dc:date>
+ <wfw:comment>http://wellformedweb.org/news/comments/PUT_SaferOrDangerous</wfw:comment>
+ <wfw:commentRss>http://wellformedweb.org/news/PUT_SaferOrDangerous?crss</wfw:commentRss>
+ </item>
+ <item>
+ <title>Six Plus One</title>
+ <link>http://wellformedweb.org/news/SixPlusOne</link>
+ <description>
+ &lt;p&gt;Previously I talked about the &lt;a href="http://bitworking.org/news/Six_Places"&gt;six different places&lt;/a&gt; there are to
+ store information in an HTTP transaction. This is slightly misleading.
+&lt;/p&gt;
+
+&lt;p&gt; To review, the six places are:&lt;/p&gt;
+&lt;ol&gt;
+ &lt;li&gt;Request URI&lt;/li&gt;
+ &lt;li&gt;Request Headers&lt;/li&gt;
+ &lt;li&gt;Request Content&lt;/li&gt;
+ &lt;li&gt;Response Status Code&lt;/li&gt;
+ &lt;li&gt;Response Headers&lt;/li&gt;
+ &lt;li&gt;Response Content&lt;/li&gt;
+&lt;/ol&gt;
+
+&lt;p&gt;This is slightly misleading because the URI is listed as a single
+ storage location. This isn't the best characterization, as it really
+ contains two different sets of information: the path, and the query parameters.&lt;/p&gt;
+
+&lt;p&gt;Now the path part of a URI usually corresponds to the directory structure on the server.
+ But remember that the path structure of a server is completely controlled
+ by that server and it need not corresponse to any file or directory strucure.
+ While it is at times convenient to map it to a directory structure, this isn't required,
+ and it is possible to pass path information to a
+ CGI program. For example, if you do a GET on the following URL:&lt;/p&gt;
+
+&lt;pre class="example"&gt;&lt;code&gt;http://example.org/cgi-bin/test.py/fred/12
+&lt;/code&gt;&lt;/pre&gt;
+
+&lt;p&gt;and there exists a program named &lt;code&gt;test.py&lt;/code&gt; in the &lt;code&gt;cgi-bin&lt;/code&gt; directory
+ then that program will be executed. The remaining path after the program is passed
+ to the CGI program in the PATH_INFO environment variable. In contrast, if query
+ parameters are passed in, they are passed to the CGI program
+ via the QUERY_STRING environment variable.&lt;/p&gt;
+
+&lt;p&gt;For example, if this is the script &lt;code&gt;test.py&lt;/code&gt;:&lt;/p&gt;
+
+&lt;pre class="example"&gt;&lt;code&gt;import os
+print "Content-type: text/plain\n\n"
+print "PATH_INFO = %s" % os.environ['PATH_INFO']
+print "QUERY_STRING = %s" % os.environ['QUERY_STRING']&lt;/code&gt;&lt;/pre&gt;
+
+&lt;p&gt;And it handles the GET for this URI:&lt;/p&gt;
+
+&lt;pre class="example"&gt;&lt;code&gt;http://localhost/cgi-bin/test.py/reilly/12?id=234454&lt;/code&gt;&lt;/pre&gt;
+
+&lt;p&gt;It will display:&lt;/p&gt;
+
+&lt;pre class="example"&gt;&lt;code&gt;PATH_INFO = /reilly/12
+QUERY_STRING = id=234454
+&lt;/code&gt;&lt;/pre&gt;
+
+&lt;p&gt;Note how the piece of the path below test.py has been stripped off and made
+ available via &lt;code&gt;PATH_INFO&lt;/code&gt;, while the query parameters are
+ stored in the QUERY_STRING environment variable.
+&lt;/p&gt;
+
+&lt;p&gt;So HTTP, via the structure of a URI, gives you two distinct places
+ to store information, one in the path and the second in the query parameters.
+ This isn't even the full story, because if you are running Apache and have
+ the ability to use .htaccess files you can use
+ &lt;a href="http://httpd.apache.org/docs/mod/mod_rewrite.html"&gt;mod_rewrite&lt;/a&gt; and map URIs so that they appear
+ as paths but show up in the CGI as query parameters, but we won't cover that
+ now.
+&lt;/p&gt;
+
+
+ </description>
+
+ <dc:date>2003-08-03T01:34:49-05:00</dc:date>
+ <wfw:comment>http://wellformedweb.org/news/comments/SixPlusOne</wfw:comment>
+ <wfw:commentRss>http://wellformedweb.org/news/SixPlusOne?crss</wfw:commentRss>
+ </item>
+ </channel>
+</rss>
+
+
+
diff --git a/akregator/src/librss/test_data/dublincore.xml b/akregator/src/librss/test_data/dublincore.xml
new file mode 100644
index 00000000..075822bb
--- /dev/null
+++ b/akregator/src/librss/test_data/dublincore.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="utf-8"?>
+<rss xmlns:media="http://search.yahoo.com/mrss/" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
+ <channel>
+ <title>The Guardian</title>
+ <link>https://www.theguardian.com/us</link>
+ <description>Latest US news, world news, sports, business, opinion, analysis and reviews from the Guardian, the world's leading liberal voice</description>
+ <copyright>Guardian News and Media Limited or its affiliated companies. All rights reserved. 2025</copyright>
+ <language>en-gb</language>
+ <!-- pubDate has been modified so we can assure dc:date is chosen. -->
+ <pubDate>Thu, 13 Mar 2020 10:38:39 GMT</pubDate>
+ <dc:date>2025-03-13T07:28:39Z</dc:date>
+ <dc:language>en-gb</dc:language>
+ <dc:rights>Guardian News and Media Limited or its affiliated companies. All rights reserved. 2025</dc:rights>
+ <image>
+ <title>The Guardian</title>
+ <url>https://assets.guim.co.uk/images/guardian-logo-rss.c45beb1bafa34b347ac333af2e6fe23f.png</url>
+ <link>https://www.theguardian.com</link>
+ </image>
+ <item>
+ <title>Judge orders Elon Musk and Doge to produce records about cost-cutting operations</title>
+ <link>https://www.theguardian.com/us-news/2025/mar/13/elon-musk-doge-court-ruling-records</link>
+ <description>&lt;p&gt;The documents would ultimately inform whether Musk has been operating unconstitutionally to the extent Doge’s activities should be halted&lt;/p&gt;&lt;p&gt;Elon Musk and his so-called “department of government efficiency”, or Doge, have been ordered by &lt;a href="https://www.theguardian.com/us-news/2025/mar/12/judge-blocks-trump-order-perkins-coie"&gt;a federal judge&lt;/a&gt; to turn over a wide array of records that would reveal the identities of staffers and internal records related to efforts to aggressively cut federal government spending and programs.&lt;/p&gt;&lt;p&gt;US district judge Tanya Chutkan’s order forces Musk to produce documents related to Doge’s activities &lt;a href="https://storage.courtlistener.com/recap/gov.uscourts.dcd.277463/gov.uscourts.dcd.277463.61.0_1.pdf"&gt;as part of a lawsuit&lt;/a&gt; brought by 14 Democratic state attorneys general that alleges Musk violated the constitution by wielding powers that only Senate-confirmed officials should possess.&lt;/p&gt; &lt;a href="https://www.theguardian.com/us-news/2025/mar/13/elon-musk-doge-court-ruling-records"&gt;Continue reading...&lt;/a&gt;</description>
+ <category domain="https://www.theguardian.com/us-news/trump-administration">Trump administration</category>
+ <category domain="https://www.theguardian.com/us-news/donaldtrump">Donald Trump</category>
+ <category domain="https://www.theguardian.com/law/law-us">Law (US)</category>
+ <category domain="https://www.theguardian.com/us-news/us-politics">US politics</category>
+ <category domain="https://www.theguardian.com/us-news/us-news">US news</category>
+ <!-- pubDate has been modified so we can assure dc:date is chosen. -->
+ <pubDate>Thu, 13 Mar 2020 10:38:39 GMT</pubDate>
+ <guid>https://www.theguardian.com/us-news/2025/mar/13/elon-musk-doge-court-ruling-records</guid>
+ <media:content width="140" url="https://i.guim.co.uk/img/media/8570e3094dfad7e268555097158aa8085221d48f/0_72_5221_3132/master/5221.jpg?width=140&amp;quality=85&amp;auto=format&amp;fit=max&amp;s=8677942b4c15d93e73d6bd18a13a12c0">
+ <media:credit scheme="urn:ebu">Photograph: Alex Brandon/AP</media:credit>
+ </media:content>
+ <media:content width="460" url="https://i.guim.co.uk/img/media/8570e3094dfad7e268555097158aa8085221d48f/0_72_5221_3132/master/5221.jpg?width=460&amp;quality=85&amp;auto=format&amp;fit=max&amp;s=bcd596d741b8c9e4be82c602e5105122">
+ <media:credit scheme="urn:ebu">Photograph: Alex Brandon/AP</media:credit>
+ </media:content>
+ <dc:creator>Hugo Lowell in Washington</dc:creator>
+ <dc:date>2025-03-13T05:22:00Z</dc:date>
+ </item>
+ </channel>
+</rss>
+
diff --git a/akregator/src/librss/test_data/rdf.xml b/akregator/src/librss/test_data/rdf.xml
new file mode 100644
index 00000000..dadcde6d
--- /dev/null
+++ b/akregator/src/librss/test_data/rdf.xml
@@ -0,0 +1,64 @@
+<?xml version="1.0"?>
+<rdf:RDF
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns="http://purl.org/rss/1.0/"
+>
+
+ <channel rdf:about="http://www.xml.com/xml/news.rss">
+ <title>XML.com</title>
+ <link>http://xml.com/pub</link>
+
+ <description>
+ XML.com features a rich mix of information and services
+ for the XML community.
+ </description>
+
+ <image rdf:resource="http://xml.com/universal/images/xml_tiny.gif" />
+
+ <items>
+ <rdf:Seq>
+
+ <rdf:li resource="http://xml.com/pub/2000/08/09/xslt/xslt.html" />
+ <rdf:li resource="http://xml.com/pub/2000/08/09/rdfdb/index.html" />
+ </rdf:Seq>
+ </items>
+
+ <textinput rdf:resource="http://search.xml.com" />
+
+ </channel>
+ <image rdf:about="http://xml.com/universal/images/xml_tiny.gif">
+ <title>XML.com</title>
+ <link>http://www.xml.com</link>
+ <url>http://xml.com/universal/images/xml_tiny.gif</url>
+
+ </image>
+ <item rdf:about="http://xml.com/pub/2000/08/09/xslt/xslt.html">
+ <title>Processing Inclusions with XSLT</title>
+ <link>http://xml.com/pub/2000/08/09/xslt/xslt.html</link>
+ <description>
+
+ Processing document inclusions with general XML tools can be
+ problematic. This article proposes a way of preserving inclusion
+ information through SAX-based processing.
+ </description>
+ </item>
+ <item rdf:about="http://xml.com/pub/2000/08/09/rdfdb/index.html">
+ <title>Putting RDF to Work</title>
+ <link>http://xml.com/pub/2000/08/09/rdfdb/index.html</link>
+
+ <description>
+ Tool and API support for the Resource Description Framework
+ is slowly coming of age. Edd Dumbill takes a look at RDFDB,
+ one of the most exciting new RDF toolkits.
+ </description>
+ </item>
+
+ <textinput rdf:about="http://search.xml.com">
+ <title>Search XML.com</title>
+
+ <description>Search XML.com's XML collection</description>
+ <name>s</name>
+ <link>http://search.xml.com</link>
+ </textinput>
+
+</rdf:RDF>
diff --git a/akregator/src/librss/test_data/rss091.xml b/akregator/src/librss/test_data/rss091.xml
new file mode 100644
index 00000000..65788c3d
--- /dev/null
+++ b/akregator/src/librss/test_data/rss091.xml
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="ISO-8859-1" ?>
+<rss version="0.91">
+ <channel>
+ <title>WriteTheWeb</title>
+ <link>http://writetheweb.com</link>
+ <description>News for web users that write back</description>
+ <language>en-us</language>
+ <copyright>Copyright 2000, WriteTheWeb team.</copyright>
+ <managingEditor>editor@writetheweb.com</managingEditor>
+ <webMaster>webmaster@writetheweb.com</webMaster>
+ <image>
+ <title>WriteTheWeb</title>
+ <url>http://writetheweb.com/images/mynetscape88.gif</url>
+ <link>http://writetheweb.com</link>
+ <width>88</width>
+ <height>31</height>
+ <description>News for web users that write back</description>
+ </image>
+ <item>
+ <title>Giving the world a pluggable Gnutella</title>
+ <link>http://writetheweb.com/read.php?item=24</link>
+ <description>WorldOS is a framework on which to build programs that work like Freenet or Gnutella -allowing distributed applications using peer-to-peer routing.</description>
+ </item>
+ <item>
+ <title>Syndication discussions hot up</title>
+ <link>http://writetheweb.com/read.php?item=23</link>
+ <description>After a period of dormancy, the Syndication mailing list has become active again, with contributions from leaders in traditional media and Web syndication.</description>
+ </item>
+ <item>
+ <title>Personal web server integrates file sharing and messaging</title>
+ <link>http://writetheweb.com/read.php?item=22</link>
+ <description>The Magi Project is an innovative project to create a combined personal web server and messaging system that enables the sharing and synchronization of information across desktop, laptop and palmtop devices.</description>
+ </item>
+ <item>
+ <title>Syndication and Metadata</title>
+ <link>http://writetheweb.com/read.php?item=21</link>
+ <description>RSS is probably the best known metadata format around. RDF is probably one of the least understood. In this essay, published on my O'Reilly Network weblog, I argue that the next generation of RSS should be based on RDF.</description>
+ </item>
+ <item>
+ <title>UK bloggers get organised</title>
+ <link>http://writetheweb.com/read.php?item=20</link>
+ <description>Looks like the weblogs scene is gathering pace beyond the shores of the US. There's now a UK-specific page on weblogs.com, and a mailing list at egroups.</description>
+ </item>
+ <item>
+ <title>Yournamehere.com more important than anything</title>
+ <link>http://writetheweb.com/read.php?item=19</link>
+ <description>Whatever you're publishing on the web, your site name is the most valuable asset you have, according to Carl Steadman.</description>
+ </item>
+ </channel>
+ </rss>
diff --git a/akregator/src/librss/testlibrss.cpp b/akregator/src/librss/testlibrss.cpp
index bef989c6..5e298ea3 100644
--- a/akregator/src/librss/testlibrss.cpp
+++ b/akregator/src/librss/testlibrss.cpp
@@ -1,67 +1,268 @@
#include "testlibrss.h"
-#include "image.h"
+#include <tqdatetime.h>
+#include <tqfile.h>
#include <tdeaboutdata.h>
#include <tdecmdlineargs.h>
#include <tdeapplication.h>
#include <kdebug.h>
+#include <krfcdate.h>
+
+#include "image.h"
+#include "enclosure.h"
+
+#include <cstdlib>
using namespace RSS;
+
+TestRetriever::TestRetriever()
+ : m_errorCode(0)
+{
+}
+
+TestRetriever::~TestRetriever()
+{
+}
+
+void TestRetriever::retrieveData(const KURL &url)
+{
+ // Test files are local paths
+ TQFile file(url.path());
+ if (file.open(IO_ReadOnly))
+ {
+ TQStringList lines;
+ TQTextStream stream(&file);
+
+ while (!stream.atEnd())
+ {
+ lines += stream.readLine();
+ }
+ file.close();
+
+ TQCString content = lines.join("\n").local8Bit();
+ TQByteArray data;
+ data.duplicate(content, content.length());
+ emit dataRetrieved(data, true);
+ }
+ else
+ {
+ kdError() << "Failed to retrieveData: " << file.errorString() << endl;
+ m_errorCode = file.status();
+ emit dataRetrieved(TQByteArray{}, false);
+ }
+}
+
static const TDECmdLineOptions options[] =
{
{ "+url", I18N_NOOP("URL of feed"), 0 },
TDECmdLineLastOption
};
+template<typename ActualType, typename ExpectedType>
+static void assertEquals(const ActualType& actual, const ExpectedType& expected)
+{
+ if (actual != expected)
+ {
+ kdError() << "Assertion failed: actual == expected\n"
+ << " actual: " << actual << "\n"
+ << " expected: " << expected << endl;
+ tdeApp->exit(1);
+ }
+}
+
+static void checkRSS091(const Document& document)
+{
+ assertEquals(document.title(), "WriteTheWeb");
+ assertEquals(document.link().url(), "http://writetheweb.com");
+ assertEquals(document.description(), "News for web users that write back");
+ assertEquals(document.language(), Language::en_us);
+ assertEquals(document.copyright(), "Copyright 2000, WriteTheWeb team.");
+ assertEquals(document.managingEditor(), "editor@writetheweb.com");
+ assertEquals(document.webMaster(), "webmaster@writetheweb.com");
+
+ const Image* image = document.image();
+ if (!image)
+ {
+ kdError() << "Expected an <image> element to be found" << endl;
+ tdeApp->exit(1);
+ }
+
+ assertEquals(image->title(), "WriteTheWeb");
+ assertEquals(image->url().url(), "http://writetheweb.com/images/mynetscape88.gif");
+ assertEquals(image->link().url(), "http://writetheweb.com");
+ assertEquals(image->description(), "News for web users that write back");
+ assertEquals(image->width(), 88);
+ assertEquals(image->height(), 31);
+
+ assertEquals(document.articles().count(), 6);
+
+ Article article = document.articles().first();
+ assertEquals(article.title(), "Giving the world a pluggable Gnutella");
+ assertEquals(article.link().url(), "http://writetheweb.com/read.php?item=24");
+ assertEquals(article.description(), "WorldOS is a framework on which to build programs that work like Freenet or Gnutella -allowing distributed applications using peer-to-peer routing.");
+}
+
+static void checkWFW(const Document& document)
+{
+ assertEquals(document.link().url(), "http://wellformedweb.org/news/");
+ assertEquals(document.description(), "Exploring the limits of XML and HTTP");
+
+ assertEquals(document.articles().count(), 5);
+
+ Article article = document.articles().front();
+ assertEquals(article.title(), "Should you use Content Negotiation in your Web Services?");
+ assertEquals(article.commentsLink().url(), "http://bitworking.org/news/comments/WebServicesAndContentNegotiation");
+}
+
+static void checkDC(const Document& document)
+{
+ // librss will use dc:date if it is provided, otherwise it will use pubDate
+ assertEquals(document.link().url(), "https://www.theguardian.com/us");
+
+ TQDateTime expectedTime;
+ time_t time = KRFCDate::parseDateISO8601("2025-03-13T07:28:39Z");
+ expectedTime.setTime_t(time);
+ assertEquals(document.pubDate(), expectedTime);
+
+ assertEquals(document.articles().count(), 1);
+
+ Article article = document.articles().first();
+ time = KRFCDate::parseDateISO8601("2025-03-13T05:22:00Z");
+ expectedTime.setTime_t(time);
+ assertEquals(article.pubDate(), expectedTime);
+
+ assertEquals(article.author(), "Hugo Lowell in Washington");
+}
+
+static void checkRDF(const Document& document)
+{
+ assertEquals(document.title(), "XML.com");
+ assertEquals(document.link().url(), "http://xml.com/pub");
+
+ assertEquals(document.articles().count(), 2);
+
+ Article article = document.articles().first();
+
+ assertEquals(article.title(), "Processing Inclusions with XSLT");
+ assertEquals(article.link().url(), "http://xml.com/pub/2000/08/09/xslt/xslt.html");
+ assertEquals(article.guid(), "http://xml.com/pub/2000/08/09/xslt/xslt.html");
+ assertEquals(article.guidIsPermaLink(), false);
+}
+
+static void checkAtom10(const Document& document)
+{
+ assertEquals(document.title(), "dive into mark");
+ assertEquals(document.description(), "A <em>lot</em> of effort went into making this effortless");
+ kdWarning() << "Skipping check for Atom \"rights\" (Document::copyright) -- not implemented." << endl;
+ // assertEquals(document.copyright(), "Copyright (c) 2003, Mark Pilgrim");
+ assertEquals(document.language(), Language::en_us);
+
+ // 2005-07-31T12:29:29Z
+ // TQDateTime compTime;
+ // time_t time = KRFCDate::parseDateISO8601("2005-07-31T12:29:29Z");
+ // compTime.setTime_t(time);
+ // assertEquals(document.pubDate(), compTime);
+ kdWarning() << "Skipping check for Atom \"updated\" (Document::pubDate/lastBuildDate) -- not implemented." << endl;
+
+ assertEquals(document.link().url(), "http://example.org/");
+
+ assertEquals(document.articles().count(), 1);
+
+ Article article = document.articles().first();
+
+ assertEquals(article.title(), "Atom draft-07 snapshot");
+ assertEquals(article.link().url(), "http://example.org/2005/04/02/atom");
+
+ if (article.description().isNull())
+ {
+ kdError() << "Empty Atom article description." << endl;
+ tdeApp->exit(1);
+ }
+
+ // Enclosure enclosure = article.enclosure();
+ // assertEquals(enclosure.url(), "http://example.org/audio/ph34r_my_podcast.mp3");
+ // assertEquals(enclosure.length(), 1337);
+ // assertEquals(enclosure.type(), "audio/mpeg");
+ kdWarning() << "Skipping check for Atom \"enclosure\" links -- not implemented." << endl;
+
+ assertEquals(article.guid(), "tag:example.org,2003:3.2397");
+
+ // 2005-07-31T12:29:29Z
+ // Need a little workaround since librss converts the timestamp to local time.
+ // NOTE: Atom provides both 'published' and 'updated'; librss uses 'updated'.
+ TQDateTime articlePublishedDate;
+ time_t publishedTime = KRFCDate::parseDateISO8601("2005-07-31T12:29:29Z");
+ articlePublishedDate.setTime_t(publishedTime);
+ assertEquals(article.pubDate(), articlePublishedDate);
+
+ assertEquals(article.author(), "<a href=\"mailto:f8dy@example.com\">Mark Pilgrim</a>");
+}
void Tester::test( const TQString &url )
{
Loader *loader = Loader::create();
- connect( loader, TQT_SIGNAL( loadingComplete( Loader *, Document, Status ) ),
- this, TQT_SLOT( slotLoadingComplete( Loader *, Document, Status ) ) );
- loader->loadFrom( url, new FileRetriever );
+ connect( loader, TQ_SIGNAL( loadingComplete( Loader *, Document, Status ) ),
+ this, TQ_SLOT( slotLoadingComplete( Loader *, Document, Status ) ) );
+ loader->loadFrom( url, new TestRetriever );
}
void Tester::slotLoadingComplete( Loader *loader, Document doc, Status status )
{
- if ( status == Success )
+ if (status != Success)
{
- kdDebug() << "Successfully retrieved '" << doc.title() << "'" << endl;
- kdDebug() << doc.description() << endl;
-
- if ( doc.image() ) {
- kdDebug() << "Image: ";
- kdDebug() << " Title: " << doc.image()->title() << endl;
- kdDebug() << " URL: " << doc.image()->url() << endl;
- kdDebug() << " Link: " << doc.image()->link() << endl;
- }
-
- kdDebug() << "Articles:" << endl;
-
- Article::List list = doc.articles();
- Article::List::ConstIterator it;
- Article::List::ConstIterator en=list.end();
- for (it = list.begin(); it != en; ++it)
+ kdError() << "Failed to load Document: ec=" << loader->errorCode() << " status=" << status << endl;
+ exit(1);
+ }
+
+ switch (doc.version())
+ {
+ case RSS::v0_91:
{
- kdDebug() << "\tTitle: " << (*it).title() << endl;
- kdDebug() << "\tText: " << (*it).description() << endl;
+ checkRSS091(doc);
+ break;
+ }
+ case RSS::v2_0:
+ {
+ if (doc.title() == "The Well-Formed Web")
+ {
+ checkWFW(doc);
+ break;
+ }
+ else if (doc.title() == "The Guardian")
+ {
+ checkDC(doc);
+ break;
+ }
+ else if (doc.title() == "XML.com")
+ {
+ checkRDF(doc);
+ break;
+ }
+ kdError() << "Unknown RSS 2.0 document '" << doc.title() << "'" << endl;
+ exit(1);
+ }
+ case RSS::vAtom_1_0:
+ {
+ checkAtom10(doc);
+ break;
+ }
+ default:
+ {
+ break;
}
}
- if ( status != Success )
- kdDebug() << "ERROR " << loader->errorCode() << endl;
-
- kapp->quit();
+ exit(0);
}
int main( int argc, char **argv )
{
TDEAboutData aboutData( "testlibrss", "testlibrss", "0.1" );
TDECmdLineArgs::init( argc, argv, &aboutData );
- TDECmdLineArgs::addCmdLineOptions( options );
- TDEApplication app;
+ TDECmdLineArgs::addCmdLineOptions( options );
+ TDEApplication app(false, false);
TDECmdLineArgs *args = TDECmdLineArgs::parsedArgs();
if ( args->count() != 1 ) args->usage();
diff --git a/akregator/src/librss/testlibrss.h b/akregator/src/librss/testlibrss.h
index 222943a6..fdbccbd1 100644
--- a/akregator/src/librss/testlibrss.h
+++ b/akregator/src/librss/testlibrss.h
@@ -14,8 +14,8 @@ using RSS::Status;
class Tester : public TQObject
{
- Q_OBJECT
-
+ TQ_OBJECT
+
public:
void test( const TQString &url );
@@ -23,4 +23,22 @@ class Tester : public TQObject
void slotLoadingComplete( Loader *loader, Document doc, Status status );
};
+class TestRetriever : public RSS::DataRetriever
+{
+ TQ_OBJECT
+
+public:
+ TestRetriever();
+ ~TestRetriever() override;
+
+ void retrieveData(const KURL &url) override;
+
+ int errorCode() const override { return m_errorCode; }
+
+ void abort() override { /* no-op */ }
+
+private:
+ int m_errorCode;
+};
+
#endif
diff --git a/akregator/src/librss/tools_p.cpp b/akregator/src/librss/tools_p.cpp
index 9303bdf5..04dc570b 100644
--- a/akregator/src/librss/tools_p.cpp
+++ b/akregator/src/librss/tools_p.cpp
@@ -117,6 +117,59 @@ static TQString extractAtomContent(const TQDomElement& e)
return TQString();
}
+TQDomElement extractElementNS(const TQDomNode &parent, const TQString &nameSpace, const TQString &localName)
+{
+ TQDomElement element;
+
+ if (parent.isNull())
+ {
+ return element;
+ }
+
+ TQDomNodeList children = parent.childNodes();
+ for (size_t i = 0; i < children.count(); ++i)
+ {
+ TQDomNode node = children.item(i);
+ if (node.isElement() && node.namespaceURI() == nameSpace && node.localName() == localName)
+ {
+ element = node.toElement();
+ break;
+ }
+ }
+
+ return element;
+}
+
+TQString extractElementTextNS(const TQDomNode &parent, const TQString &namespaceURI, const TQString &localName, bool isInlined)
+{
+ TQDomElement element = extractElementNS(parent, namespaceURI, localName);
+
+ if (element.isNull())
+ {
+ return TQString::null;
+ }
+
+ TQString result = element.text().stripWhiteSpace();
+ if (localName == "content")
+ {
+ // Atom content
+ result = extractAtomContent(element);
+ }
+ else
+ {
+ // Check for HTML; not necessary for atom:content
+ // Taken from extractNode below
+ bool hasPre = result.contains("<pre>", false) || result.contains("<pre ", false);
+ bool hasHtml = hasPre || result.contains("<");
+ if (!isInlined && !hasHtml)
+ result = result = result.replace(TQChar('\n'), "<br />");
+ if (!hasPre)
+ result = result.simplifyWhiteSpace();
+ }
+
+ return result.isEmpty() ? TQString::null : result;
+}
+
TQString extractNode(const TQDomNode &parent, const TQString &elemName, bool isInlined)
{
TQDomNode node = parent.namedItem(elemName);
@@ -208,25 +261,25 @@ TQString parseItemAuthor(const TQDomElement& element, Format format, Version ver
TQString name;
TQString email;
- TQDomElement dcCreator = element.namedItem("dc:creator").toElement();
-
+ TQDomElement dcCreator = extractElementNS(element, DublinCoreNamespace, "creator");
+
if (!dcCreator.isNull())
authorFromString(dcCreator.text(), name, email);
else if (format == AtomFeed)
{
TQDomElement atomAuthor = element.namedItem("author").toElement();
if (atomAuthor.isNull())
- atomAuthor = element.namedItem("atom:author").toElement();
+ atomAuthor = extractElementNS(element, AtomNamespace, "author");
if (!atomAuthor.isNull())
{
TQDomElement atomName = atomAuthor.namedItem("name").toElement();
if (atomName.isNull())
- atomName = atomAuthor.namedItem("atom:name").toElement();
+ atomName = extractElementNS(atomAuthor, AtomNamespace, "name");
name = atomName.text().stripWhiteSpace();
TQDomElement atomEmail = atomAuthor.namedItem("email").toElement();
if (atomEmail.isNull())
- atomEmail = atomAuthor.namedItem("atom:email").toElement();
+ atomEmail = extractElementNS(atomAuthor, AtomNamespace, "email");
email = atomEmail.text().stripWhiteSpace();
}
}
diff --git a/akregator/src/librss/tools_p.h b/akregator/src/librss/tools_p.h
index 0ec9013f..1b89fc85 100644
--- a/akregator/src/librss/tools_p.h
+++ b/akregator/src/librss/tools_p.h
@@ -29,6 +29,8 @@ namespace RSS
unsigned int count;
};
+ TQDomElement extractElementNS(const TQDomNode& parent, const TQString& namespaceURI, const TQString& localName);
+ TQString extractElementTextNS(const TQDomNode& parent, const TQString& namespaceURI, const TQString& localName, bool isInlined = true);
TQString extractNode(const TQDomNode &parent, const TQString &elemName, bool isInlined=true);
TQString extractTitle(const TQDomNode &parent);
TQString childNodesAsXML(const TQDomNode& parent);