diff options
Diffstat (limited to 'akregator/src/librss')
-rw-r--r-- | akregator/src/librss/CMakeLists.txt | 28 | ||||
-rw-r--r-- | akregator/src/librss/article.cpp | 32 | ||||
-rw-r--r-- | akregator/src/librss/article.h | 2 | ||||
-rw-r--r-- | akregator/src/librss/category.h | 2 | ||||
-rw-r--r-- | akregator/src/librss/document.cpp | 4 | ||||
-rw-r--r-- | akregator/src/librss/document.h | 2 | ||||
-rw-r--r-- | akregator/src/librss/enclosure.h | 2 | ||||
-rw-r--r-- | akregator/src/librss/global.h | 17 | ||||
-rw-r--r-- | akregator/src/librss/image.cpp | 6 | ||||
-rw-r--r-- | akregator/src/librss/image.h | 4 | ||||
-rw-r--r-- | akregator/src/librss/loader.cpp | 30 | ||||
-rw-r--r-- | akregator/src/librss/loader.h | 18 | ||||
-rw-r--r-- | akregator/src/librss/test_data/atom_spec.xml | 42 | ||||
-rw-r--r-- | akregator/src/librss/test_data/comment_api.xml | 411 | ||||
-rw-r--r-- | akregator/src/librss/test_data/dublincore.xml | 42 | ||||
-rw-r--r-- | akregator/src/librss/test_data/rdf.xml | 64 | ||||
-rw-r--r-- | akregator/src/librss/test_data/rss091.xml | 50 | ||||
-rw-r--r-- | akregator/src/librss/testlibrss.cpp | 259 | ||||
-rw-r--r-- | akregator/src/librss/testlibrss.h | 22 | ||||
-rw-r--r-- | akregator/src/librss/tools_p.cpp | 63 | ||||
-rw-r--r-- | akregator/src/librss/tools_p.h | 2 |
21 files changed, 1012 insertions, 90 deletions
diff --git a/akregator/src/librss/CMakeLists.txt b/akregator/src/librss/CMakeLists.txt index c2e7a001..23dc39a2 100644 --- a/akregator/src/librss/CMakeLists.txt +++ b/akregator/src/librss/CMakeLists.txt @@ -16,7 +16,6 @@ include_directories( ${TQT_INCLUDE_DIRS} ) - ##### rsslocal (static) ######################### tde_add_library( rsslocal STATIC_PIC AUTOMOC @@ -25,3 +24,30 @@ tde_add_library( rsslocal STATIC_PIC AUTOMOC tools_p.cpp loader.cpp enclosure.cpp category.cpp feeddetector.cpp ) + +tde_add_check_executable( testlibrss AUTOMOC + SOURCES testlibrss.cpp + LINK rsslocal-static ${TQT_LIBRARIES} tdeio-shared +) + +set( TEST_DATA "${CMAKE_CURRENT_SOURCE_DIR}/test_data") + +add_test( NAME TestLibRSS_0.91 + COMMAND testlibrss ${TEST_DATA}/rss091.xml +) + +add_test( NAME TestLibRSS_CommentAPI + COMMAND testlibrss ${TEST_DATA}/comment_api.xml +) + +add_test( NAME TestLibRSS_DublinCore + COMMAND testlibrss ${TEST_DATA}/dublincore.xml +) + +add_test( NAME TestLibRSS_RDF + COMMAND testlibrss ${TEST_DATA}/rdf.xml +) + +add_test( NAME TestLibRSS_AtomSpec + COMMAND testlibrss ${TEST_DATA}/atom_spec.xml +) diff --git a/akregator/src/librss/article.cpp b/akregator/src/librss/article.cpp index 18522fe3..88d42a7d 100644 --- a/akregator/src/librss/article.cpp +++ b/akregator/src/librss/article.cpp @@ -92,13 +92,16 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new d->link = elemText; } + // prefer content/content:encoded over summary/description for feeds that provide it + if (format == AtomFeed) + { + d->description = extractNode(node, TQString::fromLatin1("content"), false); + } + else + { + d->description = extractElementTextNS(node, ContentNamespace, TQString::fromLatin1("encoded"), false); + } - // prefer content/content:encoded over summary/description for feeds that provide it - TQString tagName=(format==AtomFeed)? TQString::fromLatin1("content"): TQString::fromLatin1("content:encoded"); - - if (!(elemText = extractNode(node, tagName, false)).isNull()) - d->description = elemText; - if (d->description.isEmpty()) { if (!(elemText = extractNode(node, TQString::fromLatin1("body"), false)).isNull()) @@ -130,7 +133,7 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new time = KRFCDate::parseDate(elemText); } - if (!(elemText = extractNode(node, TQString::fromLatin1("dc:date"))).isNull()) + if (!(elemText = extractElementTextNS(node, DublinCoreNamespace, TQString::fromLatin1("date"))).isNull()) { time = parseISO8601Date(elemText); } @@ -139,27 +142,22 @@ Article::Article(const TQDomNode &node, Format format, Version version) : d(new if (time != 0) d->pubDate.setTime_t(time); - if (!(elemText = extractNode(node, TQString::fromLatin1("wfw:comment"))).isNull()) { - d->commentsLink = elemText; - } - - if (!(elemText = extractNode(node, TQString::fromLatin1("slash:comments"))).isNull()) { - d->numComments = elemText.toInt(); - } + d->commentsLink = extractElementTextNS(node, CommentAPINamespace, TQString::fromLatin1("comment")); + d->numComments = extractElementTextNS(node, SlashNamespace, TQString::fromLatin1("comments")).toInt(); TQDomElement element = TQDomNode(node).toElement(); // in RSS 1.0, we use <item about> attribute as ID // FIXME: pass format version instead of checking for attribute - if (!element.isNull() && element.hasAttribute(TQString::fromLatin1("rdf:about"))) + if (!element.isNull() && element.hasAttributeNS(RDFNamespace, TQString::fromLatin1("about"))) { - d->guid = element.attribute(TQString::fromLatin1("rdf:about")); // HACK: using ns properly did not work + d->guid = element.attributeNS(RDFNamespace, TQString::fromLatin1("about"), TQString::null); d->guidIsPermaLink = false; } else { - tagName=(format==AtomFeed)? TQString::fromLatin1("id"): TQString::fromLatin1("guid"); + TQString tagName=(format==AtomFeed)? TQString::fromLatin1("id"): TQString::fromLatin1("guid"); TQDomNode n = node.namedItem(tagName); if (!n.isNull()) { diff --git a/akregator/src/librss/article.h b/akregator/src/librss/article.h index c27fdfe7..574f4840 100644 --- a/akregator/src/librss/article.h +++ b/akregator/src/librss/article.h @@ -34,7 +34,7 @@ namespace RSS * is via Document::articles(). * @see Document::articles() */ - class KDE_EXPORT Article + class TDE_EXPORT Article { public: /** diff --git a/akregator/src/librss/category.h b/akregator/src/librss/category.h index 0c40e418..c38a1c4e 100644 --- a/akregator/src/librss/category.h +++ b/akregator/src/librss/category.h @@ -33,7 +33,7 @@ class TQString; namespace RSS { - class KDE_EXPORT Category + class TDE_EXPORT Category { public: diff --git a/akregator/src/librss/document.cpp b/akregator/src/librss/document.cpp index 3bc64d00..7d94a252 100644 --- a/akregator/src/librss/document.cpp +++ b/akregator/src/librss/document.cpp @@ -224,7 +224,7 @@ Document::Document(const TQDomDocument &doc) : d(new Private) d->copyright = elemText; if (d->format == AtomFeed) - elemText = rootNode.toElement().attribute(TQString::fromLatin1("xml:lang"), TQString()); + elemText = rootNode.toElement().attributeNS(XMLNamespace, "lang", TQString::null); else elemText = extractNode(channelNode, TQString::fromLatin1("language")); @@ -441,7 +441,7 @@ Document::Document(const TQDomDocument &doc) : d(new Private) d->pubDate.setTime_t(_time); } - if (!(elemText = extractNode(channelNode, TQString::fromLatin1("dc:date"))).isNull()) { + if (!(elemText = extractElementTextNS(channelNode, DublinCoreNamespace, "date")).isNull()) { time_t _time = parseISO8601Date(elemText); /* \bug This isn't really the right way since it will set the date to * Jan 1 1970, 1:00:00 if the passed date was invalid; this means that diff --git a/akregator/src/librss/document.h b/akregator/src/librss/document.h index 8d098e7d..b876caf2 100644 --- a/akregator/src/librss/document.h +++ b/akregator/src/librss/document.h @@ -28,7 +28,7 @@ namespace RSS * but rather use Loader::loadFrom() to produce a Document object. * @see Loader::loadForm() */ - class KDE_EXPORT Document + class TDE_EXPORT Document { public: /** diff --git a/akregator/src/librss/enclosure.h b/akregator/src/librss/enclosure.h index 154f8bc0..54ee2059 100644 --- a/akregator/src/librss/enclosure.h +++ b/akregator/src/librss/enclosure.h @@ -33,7 +33,7 @@ class TQString; namespace RSS { - class KDE_EXPORT Enclosure + class TDE_EXPORT Enclosure { public: diff --git a/akregator/src/librss/global.h b/akregator/src/librss/global.h index 966521cc..8ff4e406 100644 --- a/akregator/src/librss/global.h +++ b/akregator/src/librss/global.h @@ -11,13 +11,28 @@ #ifndef LIBRSS_GLOBAL_H #define LIBRSS_GLOBAL_H -#include <kdemacros.h> +#include <tdemacros.h> template <class> class TQValueList; namespace RSS { + /// The Atom 1.0 XML namespace. + constexpr const char *AtomNamespace = "http://www.w3.org/2005/Atom"; + /// The CommentAPI XML namespace. + constexpr const char *CommentAPINamespace = "http://wellformedweb.org/CommentAPI/"; + /// The Content XML namespace. + constexpr const char *ContentNamespace = "http://purl.org/rss/1.0/modules/content/"; + /// The Dublin Core XML namespace. + constexpr const char *DublinCoreNamespace = "http://purl.org/dc/elements/1.1/"; + /// The RDF Concepts Vocabulary (RDF) namespace. + constexpr const char *RDFNamespace = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + /// The Slash XML namespace. + constexpr const char *SlashNamespace = "http://purl.org/rss/1.0/modules/slash/"; + /// The XML namespace. + constexpr const char *XMLNamespace = "http://www.w3.org/XML/1998/namespace"; + /** * Versions currently supported by this library. This enumeration is * subject to be extended in the future and used by Document::version() to diff --git a/akregator/src/librss/image.cpp b/akregator/src/librss/image.cpp index 174a105b..65aaf539 100644 --- a/akregator/src/librss/image.cpp +++ b/akregator/src/librss/image.cpp @@ -112,9 +112,9 @@ void Image::getPixmap() d->pixmapBuffer->open(IO_WriteOnly); d->job = TDEIO::get(d->url, false, false); - connect(d->job, TQT_SIGNAL(data(TDEIO::Job *, const TQByteArray &)), - this, TQT_SLOT(slotData(TDEIO::Job *, const TQByteArray &))); - connect(d->job, TQT_SIGNAL(result(TDEIO::Job *)), this, TQT_SLOT(slotResult(TDEIO::Job *))); + connect(d->job, TQ_SIGNAL(data(TDEIO::Job *, const TQByteArray &)), + this, TQ_SLOT(slotData(TDEIO::Job *, const TQByteArray &))); + connect(d->job, TQ_SIGNAL(result(TDEIO::Job *)), this, TQ_SLOT(slotResult(TDEIO::Job *))); } void Image::slotData(TDEIO::Job *, const TQByteArray &data) diff --git a/akregator/src/librss/image.h b/akregator/src/librss/image.h index 299b4292..d98a59fc 100644 --- a/akregator/src/librss/image.h +++ b/akregator/src/librss/image.h @@ -31,9 +31,9 @@ namespace RSS * is via Document::image(). * @see Document::image() */ - class KDE_EXPORT Image : public TQObject + class TDE_EXPORT Image : public TQObject { - Q_OBJECT + TQ_OBJECT public: /** diff --git a/akregator/src/librss/loader.cpp b/akregator/src/librss/loader.cpp index 8e2967b7..8674dfb7 100644 --- a/akregator/src/librss/loader.cpp +++ b/akregator/src/librss/loader.cpp @@ -13,7 +13,7 @@ #include "feeddetector.h" #include <tdeio/job.h> -#include <kprocess.h> +#include <tdeprocess.h> #include <kstaticdeleter.h> #include <kurl.h> #include <kdebug.h> @@ -111,13 +111,13 @@ void FileRetriever::retrieveData(const KURL &url) d->job->addMetaData("UserAgent", ua); - TQTimer::singleShot(1000*90, this, TQT_SLOT(slotTimeout())); + TQTimer::singleShot(1000*90, this, TQ_SLOT(slotTimeout())); - connect(d->job, TQT_SIGNAL(data(TDEIO::Job *, const TQByteArray &)), - TQT_SLOT(slotData(TDEIO::Job *, const TQByteArray &))); - connect(d->job, TQT_SIGNAL(result(TDEIO::Job *)), TQT_SLOT(slotResult(TDEIO::Job *))); - connect(d->job, TQT_SIGNAL(permanentRedirection(TDEIO::Job *, const KURL &, const KURL &)), - TQT_SLOT(slotPermanentRedirection(TDEIO::Job *, const KURL &, const KURL &))); + connect(d->job, TQ_SIGNAL(data(TDEIO::Job *, const TQByteArray &)), + TQ_SLOT(slotData(TDEIO::Job *, const TQByteArray &))); + connect(d->job, TQ_SIGNAL(result(TDEIO::Job *)), TQ_SLOT(slotResult(TDEIO::Job *))); + connect(d->job, TQ_SIGNAL(permanentRedirection(TDEIO::Job *, const KURL &, const KURL &)), + TQ_SLOT(slotPermanentRedirection(TDEIO::Job *, const KURL &, const KURL &))); } void FileRetriever::slotTimeout() @@ -207,10 +207,10 @@ void OutputRetriever::retrieveData(const KURL &url) d->buffer->open(IO_WriteOnly); d->process = new KShellProcess(); - connect(d->process, TQT_SIGNAL(processExited(TDEProcess *)), - TQT_SLOT(slotExited(TDEProcess *))); - connect(d->process, TQT_SIGNAL(receivedStdout(TDEProcess *, char *, int)), - TQT_SLOT(slotOutput(TDEProcess *, char *, int))); + connect(d->process, TQ_SIGNAL(processExited(TDEProcess *)), + TQ_SLOT(slotExited(TDEProcess *))); + connect(d->process, TQ_SIGNAL(receivedStdout(TDEProcess *, char *, int)), + TQ_SLOT(slotOutput(TDEProcess *, char *, int))); *d->process << url.path(); d->process->start(TDEProcess::NotifyOnExit, TDEProcess::Stdout); } @@ -268,7 +268,7 @@ Loader *Loader::create() Loader *Loader::create(TQObject *object, const char *slot) { Loader *loader = create(); - connect(loader, TQT_SIGNAL(loadingComplete(Loader *, Document, Status)), + connect(loader, TQ_SIGNAL(loadingComplete(Loader *, Document, Status)), object, slot); return loader; } @@ -290,8 +290,8 @@ void Loader::loadFrom(const KURL &url, DataRetriever *retriever) d->url=url; d->retriever = retriever; - connect(d->retriever, TQT_SIGNAL(dataRetrieved(const TQByteArray &, bool)), - this, TQT_SLOT(slotRetrieverDone(const TQByteArray &, bool))); + connect(d->retriever, TQ_SIGNAL(dataRetrieved(const TQByteArray &, bool)), + this, TQ_SLOT(slotRetrieverDone(const TQByteArray &, bool))); d->retriever->retrieveData(url); } @@ -351,7 +351,7 @@ void Loader::slotRetrieverDone(const TQByteArray &data, bool success) TQByteArray tmpData; tmpData.setRawData(charData, len); - if (doc.setContent(tmpData)) + if (doc.setContent(tmpData, /* namespaceProcessing */ true)) { rssDoc = Document(doc); if (!rssDoc.isValid()) diff --git a/akregator/src/librss/loader.h b/akregator/src/librss/loader.h index ed22da22..ce93fc25 100644 --- a/akregator/src/librss/loader.h +++ b/akregator/src/librss/loader.h @@ -32,9 +32,9 @@ namespace RSS * a new retrieval algorithm which can then be plugged into the RSS loader. * @see Loader, FileRetriever, OutputRetriever */ - class KDE_EXPORT DataRetriever : public TQObject + class TDE_EXPORT DataRetriever : public TQObject { - Q_OBJECT + TQ_OBJECT public: /** @@ -87,9 +87,9 @@ namespace RSS * Implements a file retriever, to be used with Loader::loadFrom(). * @see DataRetriever, Loader::loadFrom() */ - class KDE_EXPORT FileRetriever : public DataRetriever + class TDE_EXPORT FileRetriever : public DataRetriever { - Q_OBJECT + TQ_OBJECT public: /** @@ -161,7 +161,7 @@ namespace RSS */ class OutputRetriever : public DataRetriever { - Q_OBJECT + TQ_OBJECT public: /** @@ -211,8 +211,8 @@ namespace RSS * * \code * Loader *loader = Loader::create(); - * connect(loader, TQT_SIGNAL(loadingComplete(Loader *, Document, Status)), - * this, TQT_SLOT(slotLoadingComplete(Loader *, Document, Status))); + * connect(loader, TQ_SIGNAL(loadingComplete(Loader *, Document, Status)), + * this, TQ_SLOT(slotLoadingComplete(Loader *, Document, Status))); * loader->loadFrom("http://www.blah.org/foobar.rdf", new FileRetriever); * \endcode * @@ -254,9 +254,9 @@ namespace RSS * loadingComplete signal goes out of scope. This is e.g. the case if you * intend to call getPixmap() on Document::image()! */ - class KDE_EXPORT Loader : public TQObject + class TDE_EXPORT Loader : public TQObject { - Q_OBJECT + TQ_OBJECT friend class someClassWhichDoesNotExist; public: diff --git a/akregator/src/librss/test_data/atom_spec.xml b/akregator/src/librss/test_data/atom_spec.xml new file mode 100644 index 00000000..b8e3dff4 --- /dev/null +++ b/akregator/src/librss/test_data/atom_spec.xml @@ -0,0 +1,42 @@ +<?xml version="1.0" encoding="utf-8"?> +<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"> + <title type="text">dive into mark</title> + <subtitle type="html"> + A <em>lot</em> of effort + went into making this effortless + </subtitle> + <updated>2005-07-31T12:29:29Z</updated> + <id>tag:example.org,2003:3</id> + <link rel="alternate" type="text/html" hreflang="en" href="http://example.org/"/> + <link rel="self" type="application/atom+xml" href="http://example.org/feed.atom"/> + <rights>Copyright (c) 2003, Mark Pilgrim</rights> + <generator uri="http://www.example.com/" version="1.0"> + Example Toolkit + </generator> + <entry> + <title>Atom draft-07 snapshot</title> + <link rel="alternate" type="text/html" href="http://example.org/2005/04/02/atom"/> + <link rel="enclosure" type="audio/mpeg" length="1337" href="http://example.org/audio/ph34r_my_podcast.mp3"/> + <id>tag:example.org,2003:3.2397</id> + <updated>2005-07-31T12:29:29Z</updated> + <published>2003-12-13T08:29:29-04:00</published> + <author> + <name>Mark Pilgrim</name> + <uri>http://example.org/</uri> + <email>f8dy@example.com</email> + </author> + <contributor> + <name>Sam Ruby</name> + </contributor> + <contributor> + <name>Joe Gregorio</name> + </contributor> + <content type="xhtml" xml:lang="en" xml:base="http://diveintomark.org/"> + <div xmlns="http://www.w3.org/1999/xhtml"> + <p> + <i>[Update: The Atom draft is finished.]</i> + </p> + </div> + </content> + </entry> +</feed> diff --git a/akregator/src/librss/test_data/comment_api.xml b/akregator/src/librss/test_data/comment_api.xml new file mode 100644 index 00000000..7bbb29ae --- /dev/null +++ b/akregator/src/librss/test_data/comment_api.xml @@ -0,0 +1,411 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw='http://wellformedweb.org/CommentAPI/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rl='http://www.purl.org/RESTLog/'> + <channel> + <title>The Well-Formed Web</title> + <link>http://wellformedweb.org/news/</link> + <description>Exploring the limits of XML and HTTP</description> + <dc:creator>BitWorking, Inc</dc:creator> + <item> + <title>Should you use Content Negotiation in your Web Services?</title> + <link>http://bitworking.org/news/WebServicesAndContentNegotiation</link> + <description> + <p>Should you use Content Negotiation when building your web service? +The short answer is no. There are definite problems with <abbrev title="Content Negotiation">conneg</abbrev> +and I can give some examples of problems I have run into and also point to problems +other have run into.</p> + + +<p>First let's back up and explain Content Negotiation. Your browser is + a generic display program and can take in various kinds of media, such + as HTML, JPEGs, CSS, Flash, etc. and display it for you. The first thing to + note is that each of those kinds of media have different mime types. + Each format has it's own registered mime type and when a client + does a GET on a URL it gets back not only the content but the response + also includes a <code>Content-Type:</code> header which lists + the mime-type of what is in the body. +</p> + +<p>One of the interesting things about HTTP is that it allows + the same URI to have multiple representations. For example I + could have a URL that had both <code>plain/text</code> and <code>text/html</code> + representations. Now that leads to two obvious questions.</p> + +<ol> + <li>How does the server know which represenation to serve?</li> + <li>How can the browser influence the servers choice to get something it can handle?</li> +</ol> + +<p>Let's start by answering question two first. The browser uses the <code>Accept:</code> + header to list out the mime-types that it is willing to accept. There is also a weighting + scheme that allows the client to specify a preference for one media type + over another. For example, here is the capture of some of the headers, including the <code>Accept:</code> header, + sent by Mozilla when it does a GET on a URI:</p> + +<pre class="example"><code>Accept: text/xml,application/xml,application/xhtml+xml,\ + text/html;q=0.9,text/plain;q=0.8,video/x-mng,\ + image/png,image/jpeg,image/gif;q=0.2,*/*;q=0.1 +Accept-Language: en-us,en;q=0.5 +Accept-Encoding: gzip,deflate,compress;q=0.9 +Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7 +</code></pre> + +<p>The <code>Accept:</code> header list the mime-types that the browser can + handle along with weights of the form <code>q=</code> where the argument + is a floating point number between 0 and 1. The weights indicate a preference + for that media type, with a higher number inidicating a higher preference. Note that + there are several bits of complexity I am going to ignore for now. The first is the last + type the Mozilla browser says in can accept, */*;q=0.1. This is a wild card + match, which will match any mime-type that the server could want to serve up. The second + is that there are multiple Accept headers, one for language, one for encoding, another + for charset. How these over-lap and influence the response sent won't be covered here. +</p> + +<p>Now to answer the first question. The server looks at the available representations + is has and servers up the one with the highest preference to the client. + Based on the <code>Accept:</code> + header it sends an appropriate representation back and indicates the type it + chose using the <code>Content-Type:</code> header.</p> + +<p>This seems like a really cool and vastly under utilized feature of HTTP. It also + seems particularly intriguing for web services. You could return + JPEGs from that mapping service for the older client platforms, but also + serve up SVG for the newer clients so they can scale and rotate their maps. + What could possibly go wrong?</p> + +<p>The first thing that could go wrong is a bug or mis-configuration on the client or the server. + This has happened to me in the + past. The W3C does conneg on some of their recommendations, returning either HTML or plain + text based on the clients capabilities. This is fine, but one day their server was + either confused or mis-configured because it would only serve the recommendation in <code>plain/text</code>. + I really needed the HTML form, but after trying multiple browsers from multipe locations I could only retrieve the text + format. I ended up pulling the HTML version out of the Google cache.</p> + +<p>The second problem that I ran across highlights the real core problem with conneg. I was + trying to use the W3C XSLT service to do some transformations on my web pages. Now the server side + software I use to run Well-Formed Web does conneg and can return either HTML or an RSS item + fragment for each URI. At the time I was serving up XHTML 1.0, which is valid XML and + thus good input into an XSLT service. So the way the XSLT service works is that you enter two URIs, one + for the source content and the other for the XSLT sheet to apply to the source content. + My transformation kept failing and it was because of the + Accept headers that the XSLT service sent when it went to retrieve the source content. + My server kept returning the RSS item fragment and not + the XHTML. Now this would have been fine if I wanted to apply an XSLT sheet to my RSS item fragment, but in this + case I wanted it to apply to the XHTML. Note that the problem could have been completely reversed, I could have + been trying to apply the XSLT to the RSS item and not to the XHTML and my server could have returned + the XHTML all the time. The crux of the problem is that when I gave the URI to the XSLT transformation + service I have no way of specifying what mime-type to request. I get no chance to tweak the + services <code>Accept:</code> header. +</p> + +<p>Let's cover that again to clarify. If I hand you a URI only, and that URI supports conneg, + then I get no control over which representation you retrieve. In the cases where you are + passing a URI into a service that is later going to retrieve a represenation from that URI, you + really have no idea which representation it's going to get. That could mean that you end up + passing your RSS feed to the W3C HTML validator, or you end up passing XHTML instead of RSS into + an XSLT translator service, or you end up passing a 12MB PNG to a handheld instead of + that 20KB SVG file. You end up with a problem that is hard to debug and + one that wouldn't exist if each URI had only one mime-type.</p> + +<h3>Further Reading</h3> +<p><a href="http://norman.walsh.name/2003/07/02/conneg">Norman Walsh has also run into problems</a> with Content Negotiation.</p> +<p>The issue of using fragment identifiers with conneg has not only come up but was important enough to + merit mention in the W3C document <a href="http://www.w3.org/TR/webarch/#frag-conneg">Architecture of the World Wide Web</a>.</p> + + + </description> + + <dc:date>2003-09-06T21:54:43-05:00</dc:date> + <wfw:comment>http://bitworking.org/news/comments/WebServicesAndContentNegotiation</wfw:comment> + <wfw:commentRss>http://bitworking.org/news/WebServicesAndContentNegotiation?crss</wfw:commentRss> + </item> + <item> + <title>Google2Atom</title> + <link>http://wellformedweb.org/news/Google2Atom</link> + <description> +<p>Welcome to the Google2Atom web service. Just enter your + search and your <a href="http://www.google.com/apis/">Google key</a> + below. Once you press "Search" you will get an <a href="http://www.mnot.net/drafts/draft-nottingham-atom-format-00.html"> + Atom</a> feed of the search results. +</p> + +<form method="get" action="http://wellformedweb.org/cgi-bin/google2atom.cgi"> +<p><input size="50" name="q"/></p> +<p>Google Key: <input size="20" name="license_key"/></p> +<p><input type="submit" value=" Search "/></p> +</form> + +<hr /> + +<p><strong>Note:</strong> The Google Key is no longer mandatory, if it's not + supplied it will use my own key. In light of that please feel free to + use my key for experimentation, but if you start making heavy use + of this service please get your own Google API Key to avoid + limiting others use of this service.</p> + +<p>This is a REST based reformulation of the Google API. As such it uses + query parameters in a GET based HTTP request to do the search. That is, it works + just like the regular google web page, but this form returns + a well-formed XML document instead of a web page. Why is this better?</p> + +<dl> + <dt>Simplicity</dt> + <dd> + It works just like the google web page, so it is + conceptually easier to understand. + </dd> + + <dt>Composability</dt> + <dd>Since the request is just a simple GET the results of a query can be composed + with other web services. For example, the results could be transformed using + XSLT or fed into a validator. + </dd> +</dl> + +<h3>Bonus Features</h3> + +<p>One feature found in this interface that is not found + in the original Google API is the well-formedness of the + results content. + <a href="http://bitworking.org/news/Announcing_pyTidy">PyTidy</a> + is used to transform the HTML + snippets from the Google API into well-formed XML and place + those into 'content' elements with type='text/html' and + mode='xml'. +</p> + +<h3>Colophon</h3> + +<p>Google2Atom is written in <a href="http://www.python.org">Python</a> and uses + both the <a href="http://bitworking.org/news/Announcing_pyTidy"> + pyTidy</a> and <a href="http://www.diveintomark.org/projects/pygoogle/"> + pyGoogle</a> libraries.</p> + + </description> + + <dc:date>2003-11-22T01:18:42-05:00</dc:date> + <wfw:comment>http://wellformedweb.org/news/comments/Google2Atom</wfw:comment> + <wfw:commentRss>http://wellformedweb.org/news/Google2Atom?crss</wfw:commentRss> + </item> + <item> + <title>wfw namespace elements</title> + <link>http://wellformedweb.org/news/wfw_namespace_elements</link> + <description> + <p>The <code>wfw</code> namespace, http://wellformedweb.org/CommentAPI/ +contains multiple elements. As more are added in various places I will +endeavor to keep the list here updated.</p> + +<dl> + <dt>wfw:comment</dt> + <dd>The first element to appear in this namespace is <code>comment</code>. This element appears + in RSS feeds and contains the URI that comment entries are to be POSTed to. The details + of this are outlined in the <a href="http://wellformedweb.org/story/9">CommentAPI Specification</a>.<dd> + + <dt>wfw:commentRss</dt> + <dd>The second element to appear in the wfw namespace is <code>commentRss</code>. This element + also appears in RSS feeds and contains the URI of the RSS feed for comments on that Item. + This is documented in <a href="http://www.sellsbrothers.com/spout/default.aspx?content=archive.htm#exposingRssComments">Chris Sells' Specification</a>. Note that for quite a while this page has had a typo and erroneously referred to + this element as 'commentRSS' as opposed to the correct 'commentRss'. Feed consumers should be aware + that they may run into both spellings in the wild. Please see + <a href="http://www.intertwingly.net/blog/2006/04/16/commentRss">this page</a> for + more information. + </dd> +</dl> + </description> + + <dc:date>2003-10-10T13:11:46-05:00</dc:date> + <wfw:comment>http://wellformedweb.org/news/comments/wfw_namespace_elements</wfw:comment> + <wfw:commentRss>http://wellformedweb.org/news/wfw_namespace_elements?crss</wfw:commentRss> + </item> + <item> + <title>The HTTP verb PUT under Apache: Safe or Dangerous?</title> + <link>http://wellformedweb.org/news/PUT_SaferOrDangerous</link> + <description> + <p>"Is the HTTP verb PUT under Apache safe or dangerous?" This is a question I come across often, and have now + run into it twice in the work on Atom. So is it safe? The answer is maybe.</p> +<p>Here are two such examples:</p> + +<blockquote><p> + Using DELETE and PUT may be the "right thing to do" + in an ideal world, but the fact of the matter is that a + lot -- if not the vast majority -- of webservers do not allow these + operations. </p></blockquote> + +<blockquote><p>If anyone knows of a newer article describing + HTTP PUT with apache, I would be very interested in seeing it. Because, + due to my experience with PUT, you have to define a single PUTScript in + httpd.conf, and if you PUT something to an apache server at the URI + www.example.com/blog/entries/1 or something similar, apache passes all + of the information to the PUTScript, not to anything else.</p></blockquote> + +<p>Both of the above quotes are from the <a href="http://www.intertwingly.net/wiki/pie/RestEchoApiPutAndDelete">Atom Wiki discussion + of the use of PUT</a>. A little digging reveals that the ApacheWeek article + <a href="http://www.apacheweek.com/features/put">Publishing Pages with PUT</a> + is referenced most often when the danger of PUT is raised. <p> + +<p>That ApacheWeek article does talk about the dangers of PUT and + the cautions you need to follow when writing a script that + does content publishing via PUT. That key part of that phrase + is <strong>content publishing</strong>. That means that PUT is being + used to upload arbitrary content to the server and the client + is determining via the URI where the content should be stored. + Now you can imagine how this might be dangerous, for example + not correctly checking URI paths that include <code>../..</code> could + let a malicious agent re-write your <code>.bashrc</code>.</p> + +<p>Implementing a PUT script can be difficult and a security hazard + in the context of content publishing, but that's the case because + the client is choosing the target URI and the client could upload + any content type. In the case of Web Services in general, and + the AtomAPI in particular, PUT is used in a much narrower manner + and avoids those potential security problems.</p> + +<p>In the case of the AtomAPI PUT is only allowed on URIs that point + to a pre-existing resource. The + AtomAPI follows a general idiom for editing resources of doing + a GET to retrieve the original XML, then a PUT on the same URI + to upate that resource with the edited XML. No URIs are created + by doing a PUT. PUT is not accepted on arbitrary URIs. This makes + the use of PUT in the context of the AtomAPI just as safe as POST.</p> + +<p>There are quite a few ways to configure Apache to process + incoming requests. In particular it is possible to have a single + script that handles all PUT requests below a chosen directory. This + strategy, and all of the associated security concerns associated with + it, are covered fully in the <a href="http://www.apacheweek.com/features/put">Publishing Pages with PUT</a>.</p> + +<p>When processing request with a CGI script all the PUT requests + will come through. The verb is passed to the CGI program via the REQUEST_METHOD environment + variable, and the program decides what to do with the content.</p> + +<p>Using PUT propoerly has advantages in Web Service development. First, + Apache lets you control security based on the verb using the + <a href="http://httpd.apache.org/docs-2.0/mod/core.html#limit">Limit</a> + and <a href="http://httpd.apache.org/docs-2.0/mod/core.html#limitexcept">LimitExcept</a> + directives, which + let you restrict access controls based on the verb. Here is a sample + of one of my <code>.htaccess</code> files that restricts the use of + all verbs except GET to the CGI program <code>Bulu.cgi.</code></p> + +<pre class="example"><code>&lt;Files Bulu.cgi> +AuthType Basic +AuthName myrealm +AuthUserFile /path/to/my/password/file + &lt;LimitExcept GET> + Require valid-user + &lt;/LimitExcept> +&lt;/Files> +</code></pre> + +<p>In addition, the <a href="http://httpd.apache.org/docs-2.0/mod/mod_actions.html#script">Script</a> + directive can be used to dispatch to a CGI program based on the verb used:</p> + +<pre class="example"><code>Script PUT /cgi-bin/put.cgi</code></pre> + +<p>The second advantage using PUT brings is clarity. Given the idiom + of using GET/PUT in tandem on a URI to edit resources PUT + clearly signals what the interface is doing.</p> + +<h4>Resources</h4> + +<p><a href="http://www.apacheweek.com">ApacheWeek</a>: <a href="http://www.apacheweek.com/features/put">Publishing Pages with PUT</a></p> +<p><a href="http://www.intertwingly.net/wiki/pie/RestEchoApiPutAndDelete">RestEchoApiPutAndDelete</a>: Discussion on the use of PUT + and DELETE in the AtomAPI.</p> +<p><a href="http://httpd.apache.org/docs-2.0/mod/mod_actions.html">mod_actions</a>: An Apache module for + controlling dispatching based on verb or content-type.</p> +<p><a href="http://www.w3.org/Amaya/User/Put.html">Configuring your WWW server to understand the PUT method</a>, from the W3Cs Amaya project documentation.</p> +<p><a href="http://www.webdav.org/">WebDAV</a> is also something you may be interested in if you + are looking for ways to publish your content using HTTP. WebDAV stands for + "Web-based Distributed Authoring and Versioning". It is a set of extensions to the HTTP + protocol which allows users to collaboratively edit and manage files on remote web servers. + <a href="http://httpd.apache.org/docs-2.0/mod/mod_dav.html"> + Mod_dav</a> in an Apache module that implements WebDAV.</p> + + + + </description> + + <dc:date>2003-08-23T00:45:25-05:00</dc:date> + <wfw:comment>http://wellformedweb.org/news/comments/PUT_SaferOrDangerous</wfw:comment> + <wfw:commentRss>http://wellformedweb.org/news/PUT_SaferOrDangerous?crss</wfw:commentRss> + </item> + <item> + <title>Six Plus One</title> + <link>http://wellformedweb.org/news/SixPlusOne</link> + <description> + <p>Previously I talked about the <a href="http://bitworking.org/news/Six_Places">six different places</a> there are to + store information in an HTTP transaction. This is slightly misleading. +</p> + +<p> To review, the six places are:</p> +<ol> + <li>Request URI</li> + <li>Request Headers</li> + <li>Request Content</li> + <li>Response Status Code</li> + <li>Response Headers</li> + <li>Response Content</li> +</ol> + +<p>This is slightly misleading because the URI is listed as a single + storage location. This isn't the best characterization, as it really + contains two different sets of information: the path, and the query parameters.</p> + +<p>Now the path part of a URI usually corresponds to the directory structure on the server. + But remember that the path structure of a server is completely controlled + by that server and it need not corresponse to any file or directory strucure. + While it is at times convenient to map it to a directory structure, this isn't required, + and it is possible to pass path information to a + CGI program. For example, if you do a GET on the following URL:</p> + +<pre class="example"><code>http://example.org/cgi-bin/test.py/fred/12 +</code></pre> + +<p>and there exists a program named <code>test.py</code> in the <code>cgi-bin</code> directory + then that program will be executed. The remaining path after the program is passed + to the CGI program in the PATH_INFO environment variable. In contrast, if query + parameters are passed in, they are passed to the CGI program + via the QUERY_STRING environment variable.</p> + +<p>For example, if this is the script <code>test.py</code>:</p> + +<pre class="example"><code>import os +print "Content-type: text/plain\n\n" +print "PATH_INFO = %s" % os.environ['PATH_INFO'] +print "QUERY_STRING = %s" % os.environ['QUERY_STRING']</code></pre> + +<p>And it handles the GET for this URI:</p> + +<pre class="example"><code>http://localhost/cgi-bin/test.py/reilly/12?id=234454</code></pre> + +<p>It will display:</p> + +<pre class="example"><code>PATH_INFO = /reilly/12 +QUERY_STRING = id=234454 +</code></pre> + +<p>Note how the piece of the path below test.py has been stripped off and made + available via <code>PATH_INFO</code>, while the query parameters are + stored in the QUERY_STRING environment variable. +</p> + +<p>So HTTP, via the structure of a URI, gives you two distinct places + to store information, one in the path and the second in the query parameters. + This isn't even the full story, because if you are running Apache and have + the ability to use .htaccess files you can use + <a href="http://httpd.apache.org/docs/mod/mod_rewrite.html">mod_rewrite</a> and map URIs so that they appear + as paths but show up in the CGI as query parameters, but we won't cover that + now. +</p> + + + </description> + + <dc:date>2003-08-03T01:34:49-05:00</dc:date> + <wfw:comment>http://wellformedweb.org/news/comments/SixPlusOne</wfw:comment> + <wfw:commentRss>http://wellformedweb.org/news/SixPlusOne?crss</wfw:commentRss> + </item> + </channel> +</rss> + + + diff --git a/akregator/src/librss/test_data/dublincore.xml b/akregator/src/librss/test_data/dublincore.xml new file mode 100644 index 00000000..075822bb --- /dev/null +++ b/akregator/src/librss/test_data/dublincore.xml @@ -0,0 +1,42 @@ +<?xml version="1.0" encoding="utf-8"?> +<rss xmlns:media="http://search.yahoo.com/mrss/" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0"> + <channel> + <title>The Guardian</title> + <link>https://www.theguardian.com/us</link> + <description>Latest US news, world news, sports, business, opinion, analysis and reviews from the Guardian, the world's leading liberal voice</description> + <copyright>Guardian News and Media Limited or its affiliated companies. All rights reserved. 2025</copyright> + <language>en-gb</language> + <!-- pubDate has been modified so we can assure dc:date is chosen. --> + <pubDate>Thu, 13 Mar 2020 10:38:39 GMT</pubDate> + <dc:date>2025-03-13T07:28:39Z</dc:date> + <dc:language>en-gb</dc:language> + <dc:rights>Guardian News and Media Limited or its affiliated companies. All rights reserved. 2025</dc:rights> + <image> + <title>The Guardian</title> + <url>https://assets.guim.co.uk/images/guardian-logo-rss.c45beb1bafa34b347ac333af2e6fe23f.png</url> + <link>https://www.theguardian.com</link> + </image> + <item> + <title>Judge orders Elon Musk and Doge to produce records about cost-cutting operations</title> + <link>https://www.theguardian.com/us-news/2025/mar/13/elon-musk-doge-court-ruling-records</link> + <description><p>The documents would ultimately inform whether Musk has been operating unconstitutionally to the extent Doge’s activities should be halted</p><p>Elon Musk and his so-called “department of government efficiency”, or Doge, have been ordered by <a href="https://www.theguardian.com/us-news/2025/mar/12/judge-blocks-trump-order-perkins-coie">a federal judge</a> to turn over a wide array of records that would reveal the identities of staffers and internal records related to efforts to aggressively cut federal government spending and programs.</p><p>US district judge Tanya Chutkan’s order forces Musk to produce documents related to Doge’s activities <a href="https://storage.courtlistener.com/recap/gov.uscourts.dcd.277463/gov.uscourts.dcd.277463.61.0_1.pdf">as part of a lawsuit</a> brought by 14 Democratic state attorneys general that alleges Musk violated the constitution by wielding powers that only Senate-confirmed officials should possess.</p> <a href="https://www.theguardian.com/us-news/2025/mar/13/elon-musk-doge-court-ruling-records">Continue reading...</a></description> + <category domain="https://www.theguardian.com/us-news/trump-administration">Trump administration</category> + <category domain="https://www.theguardian.com/us-news/donaldtrump">Donald Trump</category> + <category domain="https://www.theguardian.com/law/law-us">Law (US)</category> + <category domain="https://www.theguardian.com/us-news/us-politics">US politics</category> + <category domain="https://www.theguardian.com/us-news/us-news">US news</category> + <!-- pubDate has been modified so we can assure dc:date is chosen. --> + <pubDate>Thu, 13 Mar 2020 10:38:39 GMT</pubDate> + <guid>https://www.theguardian.com/us-news/2025/mar/13/elon-musk-doge-court-ruling-records</guid> + <media:content width="140" url="https://i.guim.co.uk/img/media/8570e3094dfad7e268555097158aa8085221d48f/0_72_5221_3132/master/5221.jpg?width=140&quality=85&auto=format&fit=max&s=8677942b4c15d93e73d6bd18a13a12c0"> + <media:credit scheme="urn:ebu">Photograph: Alex Brandon/AP</media:credit> + </media:content> + <media:content width="460" url="https://i.guim.co.uk/img/media/8570e3094dfad7e268555097158aa8085221d48f/0_72_5221_3132/master/5221.jpg?width=460&quality=85&auto=format&fit=max&s=bcd596d741b8c9e4be82c602e5105122"> + <media:credit scheme="urn:ebu">Photograph: Alex Brandon/AP</media:credit> + </media:content> + <dc:creator>Hugo Lowell in Washington</dc:creator> + <dc:date>2025-03-13T05:22:00Z</dc:date> + </item> + </channel> +</rss> + diff --git a/akregator/src/librss/test_data/rdf.xml b/akregator/src/librss/test_data/rdf.xml new file mode 100644 index 00000000..dadcde6d --- /dev/null +++ b/akregator/src/librss/test_data/rdf.xml @@ -0,0 +1,64 @@ +<?xml version="1.0"?> +<rdf:RDF + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns="http://purl.org/rss/1.0/" +> + + <channel rdf:about="http://www.xml.com/xml/news.rss"> + <title>XML.com</title> + <link>http://xml.com/pub</link> + + <description> + XML.com features a rich mix of information and services + for the XML community. + </description> + + <image rdf:resource="http://xml.com/universal/images/xml_tiny.gif" /> + + <items> + <rdf:Seq> + + <rdf:li resource="http://xml.com/pub/2000/08/09/xslt/xslt.html" /> + <rdf:li resource="http://xml.com/pub/2000/08/09/rdfdb/index.html" /> + </rdf:Seq> + </items> + + <textinput rdf:resource="http://search.xml.com" /> + + </channel> + <image rdf:about="http://xml.com/universal/images/xml_tiny.gif"> + <title>XML.com</title> + <link>http://www.xml.com</link> + <url>http://xml.com/universal/images/xml_tiny.gif</url> + + </image> + <item rdf:about="http://xml.com/pub/2000/08/09/xslt/xslt.html"> + <title>Processing Inclusions with XSLT</title> + <link>http://xml.com/pub/2000/08/09/xslt/xslt.html</link> + <description> + + Processing document inclusions with general XML tools can be + problematic. This article proposes a way of preserving inclusion + information through SAX-based processing. + </description> + </item> + <item rdf:about="http://xml.com/pub/2000/08/09/rdfdb/index.html"> + <title>Putting RDF to Work</title> + <link>http://xml.com/pub/2000/08/09/rdfdb/index.html</link> + + <description> + Tool and API support for the Resource Description Framework + is slowly coming of age. Edd Dumbill takes a look at RDFDB, + one of the most exciting new RDF toolkits. + </description> + </item> + + <textinput rdf:about="http://search.xml.com"> + <title>Search XML.com</title> + + <description>Search XML.com's XML collection</description> + <name>s</name> + <link>http://search.xml.com</link> + </textinput> + +</rdf:RDF> diff --git a/akregator/src/librss/test_data/rss091.xml b/akregator/src/librss/test_data/rss091.xml new file mode 100644 index 00000000..65788c3d --- /dev/null +++ b/akregator/src/librss/test_data/rss091.xml @@ -0,0 +1,50 @@ +<?xml version="1.0" encoding="ISO-8859-1" ?> +<rss version="0.91"> + <channel> + <title>WriteTheWeb</title> + <link>http://writetheweb.com</link> + <description>News for web users that write back</description> + <language>en-us</language> + <copyright>Copyright 2000, WriteTheWeb team.</copyright> + <managingEditor>editor@writetheweb.com</managingEditor> + <webMaster>webmaster@writetheweb.com</webMaster> + <image> + <title>WriteTheWeb</title> + <url>http://writetheweb.com/images/mynetscape88.gif</url> + <link>http://writetheweb.com</link> + <width>88</width> + <height>31</height> + <description>News for web users that write back</description> + </image> + <item> + <title>Giving the world a pluggable Gnutella</title> + <link>http://writetheweb.com/read.php?item=24</link> + <description>WorldOS is a framework on which to build programs that work like Freenet or Gnutella -allowing distributed applications using peer-to-peer routing.</description> + </item> + <item> + <title>Syndication discussions hot up</title> + <link>http://writetheweb.com/read.php?item=23</link> + <description>After a period of dormancy, the Syndication mailing list has become active again, with contributions from leaders in traditional media and Web syndication.</description> + </item> + <item> + <title>Personal web server integrates file sharing and messaging</title> + <link>http://writetheweb.com/read.php?item=22</link> + <description>The Magi Project is an innovative project to create a combined personal web server and messaging system that enables the sharing and synchronization of information across desktop, laptop and palmtop devices.</description> + </item> + <item> + <title>Syndication and Metadata</title> + <link>http://writetheweb.com/read.php?item=21</link> + <description>RSS is probably the best known metadata format around. RDF is probably one of the least understood. In this essay, published on my O'Reilly Network weblog, I argue that the next generation of RSS should be based on RDF.</description> + </item> + <item> + <title>UK bloggers get organised</title> + <link>http://writetheweb.com/read.php?item=20</link> + <description>Looks like the weblogs scene is gathering pace beyond the shores of the US. There's now a UK-specific page on weblogs.com, and a mailing list at egroups.</description> + </item> + <item> + <title>Yournamehere.com more important than anything</title> + <link>http://writetheweb.com/read.php?item=19</link> + <description>Whatever you're publishing on the web, your site name is the most valuable asset you have, according to Carl Steadman.</description> + </item> + </channel> + </rss> diff --git a/akregator/src/librss/testlibrss.cpp b/akregator/src/librss/testlibrss.cpp index bef989c6..5e298ea3 100644 --- a/akregator/src/librss/testlibrss.cpp +++ b/akregator/src/librss/testlibrss.cpp @@ -1,67 +1,268 @@ #include "testlibrss.h" -#include "image.h" +#include <tqdatetime.h> +#include <tqfile.h> #include <tdeaboutdata.h> #include <tdecmdlineargs.h> #include <tdeapplication.h> #include <kdebug.h> +#include <krfcdate.h> + +#include "image.h" +#include "enclosure.h" + +#include <cstdlib> using namespace RSS; + +TestRetriever::TestRetriever() + : m_errorCode(0) +{ +} + +TestRetriever::~TestRetriever() +{ +} + +void TestRetriever::retrieveData(const KURL &url) +{ + // Test files are local paths + TQFile file(url.path()); + if (file.open(IO_ReadOnly)) + { + TQStringList lines; + TQTextStream stream(&file); + + while (!stream.atEnd()) + { + lines += stream.readLine(); + } + file.close(); + + TQCString content = lines.join("\n").local8Bit(); + TQByteArray data; + data.duplicate(content, content.length()); + emit dataRetrieved(data, true); + } + else + { + kdError() << "Failed to retrieveData: " << file.errorString() << endl; + m_errorCode = file.status(); + emit dataRetrieved(TQByteArray{}, false); + } +} + static const TDECmdLineOptions options[] = { { "+url", I18N_NOOP("URL of feed"), 0 }, TDECmdLineLastOption }; +template<typename ActualType, typename ExpectedType> +static void assertEquals(const ActualType& actual, const ExpectedType& expected) +{ + if (actual != expected) + { + kdError() << "Assertion failed: actual == expected\n" + << " actual: " << actual << "\n" + << " expected: " << expected << endl; + tdeApp->exit(1); + } +} + +static void checkRSS091(const Document& document) +{ + assertEquals(document.title(), "WriteTheWeb"); + assertEquals(document.link().url(), "http://writetheweb.com"); + assertEquals(document.description(), "News for web users that write back"); + assertEquals(document.language(), Language::en_us); + assertEquals(document.copyright(), "Copyright 2000, WriteTheWeb team."); + assertEquals(document.managingEditor(), "editor@writetheweb.com"); + assertEquals(document.webMaster(), "webmaster@writetheweb.com"); + + const Image* image = document.image(); + if (!image) + { + kdError() << "Expected an <image> element to be found" << endl; + tdeApp->exit(1); + } + + assertEquals(image->title(), "WriteTheWeb"); + assertEquals(image->url().url(), "http://writetheweb.com/images/mynetscape88.gif"); + assertEquals(image->link().url(), "http://writetheweb.com"); + assertEquals(image->description(), "News for web users that write back"); + assertEquals(image->width(), 88); + assertEquals(image->height(), 31); + + assertEquals(document.articles().count(), 6); + + Article article = document.articles().first(); + assertEquals(article.title(), "Giving the world a pluggable Gnutella"); + assertEquals(article.link().url(), "http://writetheweb.com/read.php?item=24"); + assertEquals(article.description(), "WorldOS is a framework on which to build programs that work like Freenet or Gnutella -allowing distributed applications using peer-to-peer routing."); +} + +static void checkWFW(const Document& document) +{ + assertEquals(document.link().url(), "http://wellformedweb.org/news/"); + assertEquals(document.description(), "Exploring the limits of XML and HTTP"); + + assertEquals(document.articles().count(), 5); + + Article article = document.articles().front(); + assertEquals(article.title(), "Should you use Content Negotiation in your Web Services?"); + assertEquals(article.commentsLink().url(), "http://bitworking.org/news/comments/WebServicesAndContentNegotiation"); +} + +static void checkDC(const Document& document) +{ + // librss will use dc:date if it is provided, otherwise it will use pubDate + assertEquals(document.link().url(), "https://www.theguardian.com/us"); + + TQDateTime expectedTime; + time_t time = KRFCDate::parseDateISO8601("2025-03-13T07:28:39Z"); + expectedTime.setTime_t(time); + assertEquals(document.pubDate(), expectedTime); + + assertEquals(document.articles().count(), 1); + + Article article = document.articles().first(); + time = KRFCDate::parseDateISO8601("2025-03-13T05:22:00Z"); + expectedTime.setTime_t(time); + assertEquals(article.pubDate(), expectedTime); + + assertEquals(article.author(), "Hugo Lowell in Washington"); +} + +static void checkRDF(const Document& document) +{ + assertEquals(document.title(), "XML.com"); + assertEquals(document.link().url(), "http://xml.com/pub"); + + assertEquals(document.articles().count(), 2); + + Article article = document.articles().first(); + + assertEquals(article.title(), "Processing Inclusions with XSLT"); + assertEquals(article.link().url(), "http://xml.com/pub/2000/08/09/xslt/xslt.html"); + assertEquals(article.guid(), "http://xml.com/pub/2000/08/09/xslt/xslt.html"); + assertEquals(article.guidIsPermaLink(), false); +} + +static void checkAtom10(const Document& document) +{ + assertEquals(document.title(), "dive into mark"); + assertEquals(document.description(), "A <em>lot</em> of effort went into making this effortless"); + kdWarning() << "Skipping check for Atom \"rights\" (Document::copyright) -- not implemented." << endl; + // assertEquals(document.copyright(), "Copyright (c) 2003, Mark Pilgrim"); + assertEquals(document.language(), Language::en_us); + + // 2005-07-31T12:29:29Z + // TQDateTime compTime; + // time_t time = KRFCDate::parseDateISO8601("2005-07-31T12:29:29Z"); + // compTime.setTime_t(time); + // assertEquals(document.pubDate(), compTime); + kdWarning() << "Skipping check for Atom \"updated\" (Document::pubDate/lastBuildDate) -- not implemented." << endl; + + assertEquals(document.link().url(), "http://example.org/"); + + assertEquals(document.articles().count(), 1); + + Article article = document.articles().first(); + + assertEquals(article.title(), "Atom draft-07 snapshot"); + assertEquals(article.link().url(), "http://example.org/2005/04/02/atom"); + + if (article.description().isNull()) + { + kdError() << "Empty Atom article description." << endl; + tdeApp->exit(1); + } + + // Enclosure enclosure = article.enclosure(); + // assertEquals(enclosure.url(), "http://example.org/audio/ph34r_my_podcast.mp3"); + // assertEquals(enclosure.length(), 1337); + // assertEquals(enclosure.type(), "audio/mpeg"); + kdWarning() << "Skipping check for Atom \"enclosure\" links -- not implemented." << endl; + + assertEquals(article.guid(), "tag:example.org,2003:3.2397"); + + // 2005-07-31T12:29:29Z + // Need a little workaround since librss converts the timestamp to local time. + // NOTE: Atom provides both 'published' and 'updated'; librss uses 'updated'. + TQDateTime articlePublishedDate; + time_t publishedTime = KRFCDate::parseDateISO8601("2005-07-31T12:29:29Z"); + articlePublishedDate.setTime_t(publishedTime); + assertEquals(article.pubDate(), articlePublishedDate); + + assertEquals(article.author(), "<a href=\"mailto:f8dy@example.com\">Mark Pilgrim</a>"); +} void Tester::test( const TQString &url ) { Loader *loader = Loader::create(); - connect( loader, TQT_SIGNAL( loadingComplete( Loader *, Document, Status ) ), - this, TQT_SLOT( slotLoadingComplete( Loader *, Document, Status ) ) ); - loader->loadFrom( url, new FileRetriever ); + connect( loader, TQ_SIGNAL( loadingComplete( Loader *, Document, Status ) ), + this, TQ_SLOT( slotLoadingComplete( Loader *, Document, Status ) ) ); + loader->loadFrom( url, new TestRetriever ); } void Tester::slotLoadingComplete( Loader *loader, Document doc, Status status ) { - if ( status == Success ) + if (status != Success) { - kdDebug() << "Successfully retrieved '" << doc.title() << "'" << endl; - kdDebug() << doc.description() << endl; - - if ( doc.image() ) { - kdDebug() << "Image: "; - kdDebug() << " Title: " << doc.image()->title() << endl; - kdDebug() << " URL: " << doc.image()->url() << endl; - kdDebug() << " Link: " << doc.image()->link() << endl; - } - - kdDebug() << "Articles:" << endl; - - Article::List list = doc.articles(); - Article::List::ConstIterator it; - Article::List::ConstIterator en=list.end(); - for (it = list.begin(); it != en; ++it) + kdError() << "Failed to load Document: ec=" << loader->errorCode() << " status=" << status << endl; + exit(1); + } + + switch (doc.version()) + { + case RSS::v0_91: { - kdDebug() << "\tTitle: " << (*it).title() << endl; - kdDebug() << "\tText: " << (*it).description() << endl; + checkRSS091(doc); + break; + } + case RSS::v2_0: + { + if (doc.title() == "The Well-Formed Web") + { + checkWFW(doc); + break; + } + else if (doc.title() == "The Guardian") + { + checkDC(doc); + break; + } + else if (doc.title() == "XML.com") + { + checkRDF(doc); + break; + } + kdError() << "Unknown RSS 2.0 document '" << doc.title() << "'" << endl; + exit(1); + } + case RSS::vAtom_1_0: + { + checkAtom10(doc); + break; + } + default: + { + break; } } - if ( status != Success ) - kdDebug() << "ERROR " << loader->errorCode() << endl; - - kapp->quit(); + exit(0); } int main( int argc, char **argv ) { TDEAboutData aboutData( "testlibrss", "testlibrss", "0.1" ); TDECmdLineArgs::init( argc, argv, &aboutData ); - TDECmdLineArgs::addCmdLineOptions( options ); - TDEApplication app; + TDECmdLineArgs::addCmdLineOptions( options ); + TDEApplication app(false, false); TDECmdLineArgs *args = TDECmdLineArgs::parsedArgs(); if ( args->count() != 1 ) args->usage(); diff --git a/akregator/src/librss/testlibrss.h b/akregator/src/librss/testlibrss.h index 222943a6..fdbccbd1 100644 --- a/akregator/src/librss/testlibrss.h +++ b/akregator/src/librss/testlibrss.h @@ -14,8 +14,8 @@ using RSS::Status; class Tester : public TQObject { - Q_OBJECT - + TQ_OBJECT + public: void test( const TQString &url ); @@ -23,4 +23,22 @@ class Tester : public TQObject void slotLoadingComplete( Loader *loader, Document doc, Status status ); }; +class TestRetriever : public RSS::DataRetriever +{ + TQ_OBJECT + +public: + TestRetriever(); + ~TestRetriever() override; + + void retrieveData(const KURL &url) override; + + int errorCode() const override { return m_errorCode; } + + void abort() override { /* no-op */ } + +private: + int m_errorCode; +}; + #endif diff --git a/akregator/src/librss/tools_p.cpp b/akregator/src/librss/tools_p.cpp index 9303bdf5..04dc570b 100644 --- a/akregator/src/librss/tools_p.cpp +++ b/akregator/src/librss/tools_p.cpp @@ -117,6 +117,59 @@ static TQString extractAtomContent(const TQDomElement& e) return TQString(); } +TQDomElement extractElementNS(const TQDomNode &parent, const TQString &nameSpace, const TQString &localName) +{ + TQDomElement element; + + if (parent.isNull()) + { + return element; + } + + TQDomNodeList children = parent.childNodes(); + for (size_t i = 0; i < children.count(); ++i) + { + TQDomNode node = children.item(i); + if (node.isElement() && node.namespaceURI() == nameSpace && node.localName() == localName) + { + element = node.toElement(); + break; + } + } + + return element; +} + +TQString extractElementTextNS(const TQDomNode &parent, const TQString &namespaceURI, const TQString &localName, bool isInlined) +{ + TQDomElement element = extractElementNS(parent, namespaceURI, localName); + + if (element.isNull()) + { + return TQString::null; + } + + TQString result = element.text().stripWhiteSpace(); + if (localName == "content") + { + // Atom content + result = extractAtomContent(element); + } + else + { + // Check for HTML; not necessary for atom:content + // Taken from extractNode below + bool hasPre = result.contains("<pre>", false) || result.contains("<pre ", false); + bool hasHtml = hasPre || result.contains("<"); + if (!isInlined && !hasHtml) + result = result = result.replace(TQChar('\n'), "<br />"); + if (!hasPre) + result = result.simplifyWhiteSpace(); + } + + return result.isEmpty() ? TQString::null : result; +} + TQString extractNode(const TQDomNode &parent, const TQString &elemName, bool isInlined) { TQDomNode node = parent.namedItem(elemName); @@ -208,25 +261,25 @@ TQString parseItemAuthor(const TQDomElement& element, Format format, Version ver TQString name; TQString email; - TQDomElement dcCreator = element.namedItem("dc:creator").toElement(); - + TQDomElement dcCreator = extractElementNS(element, DublinCoreNamespace, "creator"); + if (!dcCreator.isNull()) authorFromString(dcCreator.text(), name, email); else if (format == AtomFeed) { TQDomElement atomAuthor = element.namedItem("author").toElement(); if (atomAuthor.isNull()) - atomAuthor = element.namedItem("atom:author").toElement(); + atomAuthor = extractElementNS(element, AtomNamespace, "author"); if (!atomAuthor.isNull()) { TQDomElement atomName = atomAuthor.namedItem("name").toElement(); if (atomName.isNull()) - atomName = atomAuthor.namedItem("atom:name").toElement(); + atomName = extractElementNS(atomAuthor, AtomNamespace, "name"); name = atomName.text().stripWhiteSpace(); TQDomElement atomEmail = atomAuthor.namedItem("email").toElement(); if (atomEmail.isNull()) - atomEmail = atomAuthor.namedItem("atom:email").toElement(); + atomEmail = extractElementNS(atomAuthor, AtomNamespace, "email"); email = atomEmail.text().stripWhiteSpace(); } } diff --git a/akregator/src/librss/tools_p.h b/akregator/src/librss/tools_p.h index 0ec9013f..1b89fc85 100644 --- a/akregator/src/librss/tools_p.h +++ b/akregator/src/librss/tools_p.h @@ -29,6 +29,8 @@ namespace RSS unsigned int count; }; + TQDomElement extractElementNS(const TQDomNode& parent, const TQString& namespaceURI, const TQString& localName); + TQString extractElementTextNS(const TQDomNode& parent, const TQString& namespaceURI, const TQString& localName, bool isInlined = true); TQString extractNode(const TQDomNode &parent, const TQString &elemName, bool isInlined=true); TQString extractTitle(const TQDomNode &parent); TQString childNodesAsXML(const TQDomNode& parent); |