diff options
author | mio <stigma@disroot.org> | 2025-03-13 18:46:53 +1000 |
---|---|---|
committer | mio <stigma@disroot.org> | 2025-03-13 19:57:45 +1000 |
commit | b69050d6e8956d0f38c526b9fca93d76fccffeac (patch) | |
tree | 8acba10a8911d483d59a7bc6265dea5266223349 | |
parent | 3971704ec5b8fc81d1a94a4e664c153fc0f2cdaa (diff) | |
download | tdepim-b69050d6e8956d0f38c526b9fca93d76fccffeac.tar.gz tdepim-b69050d6e8956d0f38c526b9fca93d76fccffeac.zip |
akregator: Add automated tests for librss
Signed-off-by: mio <stigma@disroot.org>
-rw-r--r-- | akregator/src/librss/CMakeLists.txt | 28 | ||||
-rw-r--r-- | akregator/src/librss/test_data/atom_spec.xml | 42 | ||||
-rw-r--r-- | akregator/src/librss/test_data/comment_api.xml | 411 | ||||
-rw-r--r-- | akregator/src/librss/test_data/dublincore.xml | 42 | ||||
-rw-r--r-- | akregator/src/librss/test_data/rdf.xml | 64 | ||||
-rw-r--r-- | akregator/src/librss/test_data/rss091.xml | 50 | ||||
-rw-r--r-- | akregator/src/librss/testlibrss.cpp | 210 |
7 files changed, 821 insertions, 26 deletions
diff --git a/akregator/src/librss/CMakeLists.txt b/akregator/src/librss/CMakeLists.txt index c2e7a001..23dc39a2 100644 --- a/akregator/src/librss/CMakeLists.txt +++ b/akregator/src/librss/CMakeLists.txt @@ -16,7 +16,6 @@ include_directories( ${TQT_INCLUDE_DIRS} ) - ##### rsslocal (static) ######################### tde_add_library( rsslocal STATIC_PIC AUTOMOC @@ -25,3 +24,30 @@ tde_add_library( rsslocal STATIC_PIC AUTOMOC tools_p.cpp loader.cpp enclosure.cpp category.cpp feeddetector.cpp ) + +tde_add_check_executable( testlibrss AUTOMOC + SOURCES testlibrss.cpp + LINK rsslocal-static ${TQT_LIBRARIES} tdeio-shared +) + +set( TEST_DATA "${CMAKE_CURRENT_SOURCE_DIR}/test_data") + +add_test( NAME TestLibRSS_0.91 + COMMAND testlibrss ${TEST_DATA}/rss091.xml +) + +add_test( NAME TestLibRSS_CommentAPI + COMMAND testlibrss ${TEST_DATA}/comment_api.xml +) + +add_test( NAME TestLibRSS_DublinCore + COMMAND testlibrss ${TEST_DATA}/dublincore.xml +) + +add_test( NAME TestLibRSS_RDF + COMMAND testlibrss ${TEST_DATA}/rdf.xml +) + +add_test( NAME TestLibRSS_AtomSpec + COMMAND testlibrss ${TEST_DATA}/atom_spec.xml +) diff --git a/akregator/src/librss/test_data/atom_spec.xml b/akregator/src/librss/test_data/atom_spec.xml new file mode 100644 index 00000000..b8e3dff4 --- /dev/null +++ b/akregator/src/librss/test_data/atom_spec.xml @@ -0,0 +1,42 @@ +<?xml version="1.0" encoding="utf-8"?> +<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"> + <title type="text">dive into mark</title> + <subtitle type="html"> + A <em>lot</em> of effort + went into making this effortless + </subtitle> + <updated>2005-07-31T12:29:29Z</updated> + <id>tag:example.org,2003:3</id> + <link rel="alternate" type="text/html" hreflang="en" href="http://example.org/"/> + <link rel="self" type="application/atom+xml" href="http://example.org/feed.atom"/> + <rights>Copyright (c) 2003, Mark Pilgrim</rights> + <generator uri="http://www.example.com/" version="1.0"> + Example Toolkit + </generator> + <entry> + <title>Atom draft-07 snapshot</title> + <link rel="alternate" type="text/html" href="http://example.org/2005/04/02/atom"/> + <link rel="enclosure" type="audio/mpeg" length="1337" href="http://example.org/audio/ph34r_my_podcast.mp3"/> + <id>tag:example.org,2003:3.2397</id> + <updated>2005-07-31T12:29:29Z</updated> + <published>2003-12-13T08:29:29-04:00</published> + <author> + <name>Mark Pilgrim</name> + <uri>http://example.org/</uri> + <email>f8dy@example.com</email> + </author> + <contributor> + <name>Sam Ruby</name> + </contributor> + <contributor> + <name>Joe Gregorio</name> + </contributor> + <content type="xhtml" xml:lang="en" xml:base="http://diveintomark.org/"> + <div xmlns="http://www.w3.org/1999/xhtml"> + <p> + <i>[Update: The Atom draft is finished.]</i> + </p> + </div> + </content> + </entry> +</feed> diff --git a/akregator/src/librss/test_data/comment_api.xml b/akregator/src/librss/test_data/comment_api.xml new file mode 100644 index 00000000..7bbb29ae --- /dev/null +++ b/akregator/src/librss/test_data/comment_api.xml @@ -0,0 +1,411 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:wfw='http://wellformedweb.org/CommentAPI/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rl='http://www.purl.org/RESTLog/'> + <channel> + <title>The Well-Formed Web</title> + <link>http://wellformedweb.org/news/</link> + <description>Exploring the limits of XML and HTTP</description> + <dc:creator>BitWorking, Inc</dc:creator> + <item> + <title>Should you use Content Negotiation in your Web Services?</title> + <link>http://bitworking.org/news/WebServicesAndContentNegotiation</link> + <description> + <p>Should you use Content Negotiation when building your web service? +The short answer is no. There are definite problems with <abbrev title="Content Negotiation">conneg</abbrev> +and I can give some examples of problems I have run into and also point to problems +other have run into.</p> + + +<p>First let's back up and explain Content Negotiation. Your browser is + a generic display program and can take in various kinds of media, such + as HTML, JPEGs, CSS, Flash, etc. and display it for you. The first thing to + note is that each of those kinds of media have different mime types. + Each format has it's own registered mime type and when a client + does a GET on a URL it gets back not only the content but the response + also includes a <code>Content-Type:</code> header which lists + the mime-type of what is in the body. +</p> + +<p>One of the interesting things about HTTP is that it allows + the same URI to have multiple representations. For example I + could have a URL that had both <code>plain/text</code> and <code>text/html</code> + representations. Now that leads to two obvious questions.</p> + +<ol> + <li>How does the server know which represenation to serve?</li> + <li>How can the browser influence the servers choice to get something it can handle?</li> +</ol> + +<p>Let's start by answering question two first. The browser uses the <code>Accept:</code> + header to list out the mime-types that it is willing to accept. There is also a weighting + scheme that allows the client to specify a preference for one media type + over another. For example, here is the capture of some of the headers, including the <code>Accept:</code> header, + sent by Mozilla when it does a GET on a URI:</p> + +<pre class="example"><code>Accept: text/xml,application/xml,application/xhtml+xml,\ + text/html;q=0.9,text/plain;q=0.8,video/x-mng,\ + image/png,image/jpeg,image/gif;q=0.2,*/*;q=0.1 +Accept-Language: en-us,en;q=0.5 +Accept-Encoding: gzip,deflate,compress;q=0.9 +Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7 +</code></pre> + +<p>The <code>Accept:</code> header list the mime-types that the browser can + handle along with weights of the form <code>q=</code> where the argument + is a floating point number between 0 and 1. The weights indicate a preference + for that media type, with a higher number inidicating a higher preference. Note that + there are several bits of complexity I am going to ignore for now. The first is the last + type the Mozilla browser says in can accept, */*;q=0.1. This is a wild card + match, which will match any mime-type that the server could want to serve up. The second + is that there are multiple Accept headers, one for language, one for encoding, another + for charset. How these over-lap and influence the response sent won't be covered here. +</p> + +<p>Now to answer the first question. The server looks at the available representations + is has and servers up the one with the highest preference to the client. + Based on the <code>Accept:</code> + header it sends an appropriate representation back and indicates the type it + chose using the <code>Content-Type:</code> header.</p> + +<p>This seems like a really cool and vastly under utilized feature of HTTP. It also + seems particularly intriguing for web services. You could return + JPEGs from that mapping service for the older client platforms, but also + serve up SVG for the newer clients so they can scale and rotate their maps. + What could possibly go wrong?</p> + +<p>The first thing that could go wrong is a bug or mis-configuration on the client or the server. + This has happened to me in the + past. The W3C does conneg on some of their recommendations, returning either HTML or plain + text based on the clients capabilities. This is fine, but one day their server was + either confused or mis-configured because it would only serve the recommendation in <code>plain/text</code>. + I really needed the HTML form, but after trying multiple browsers from multipe locations I could only retrieve the text + format. I ended up pulling the HTML version out of the Google cache.</p> + +<p>The second problem that I ran across highlights the real core problem with conneg. I was + trying to use the W3C XSLT service to do some transformations on my web pages. Now the server side + software I use to run Well-Formed Web does conneg and can return either HTML or an RSS item + fragment for each URI. At the time I was serving up XHTML 1.0, which is valid XML and + thus good input into an XSLT service. So the way the XSLT service works is that you enter two URIs, one + for the source content and the other for the XSLT sheet to apply to the source content. + My transformation kept failing and it was because of the + Accept headers that the XSLT service sent when it went to retrieve the source content. + My server kept returning the RSS item fragment and not + the XHTML. Now this would have been fine if I wanted to apply an XSLT sheet to my RSS item fragment, but in this + case I wanted it to apply to the XHTML. Note that the problem could have been completely reversed, I could have + been trying to apply the XSLT to the RSS item and not to the XHTML and my server could have returned + the XHTML all the time. The crux of the problem is that when I gave the URI to the XSLT transformation + service I have no way of specifying what mime-type to request. I get no chance to tweak the + services <code>Accept:</code> header. +</p> + +<p>Let's cover that again to clarify. If I hand you a URI only, and that URI supports conneg, + then I get no control over which representation you retrieve. In the cases where you are + passing a URI into a service that is later going to retrieve a represenation from that URI, you + really have no idea which representation it's going to get. That could mean that you end up + passing your RSS feed to the W3C HTML validator, or you end up passing XHTML instead of RSS into + an XSLT translator service, or you end up passing a 12MB PNG to a handheld instead of + that 20KB SVG file. You end up with a problem that is hard to debug and + one that wouldn't exist if each URI had only one mime-type.</p> + +<h3>Further Reading</h3> +<p><a href="http://norman.walsh.name/2003/07/02/conneg">Norman Walsh has also run into problems</a> with Content Negotiation.</p> +<p>The issue of using fragment identifiers with conneg has not only come up but was important enough to + merit mention in the W3C document <a href="http://www.w3.org/TR/webarch/#frag-conneg">Architecture of the World Wide Web</a>.</p> + + + </description> + + <dc:date>2003-09-06T21:54:43-05:00</dc:date> + <wfw:comment>http://bitworking.org/news/comments/WebServicesAndContentNegotiation</wfw:comment> + <wfw:commentRss>http://bitworking.org/news/WebServicesAndContentNegotiation?crss</wfw:commentRss> + </item> + <item> + <title>Google2Atom</title> + <link>http://wellformedweb.org/news/Google2Atom</link> + <description> +<p>Welcome to the Google2Atom web service. Just enter your + search and your <a href="http://www.google.com/apis/">Google key</a> + below. Once you press "Search" you will get an <a href="http://www.mnot.net/drafts/draft-nottingham-atom-format-00.html"> + Atom</a> feed of the search results. +</p> + +<form method="get" action="http://wellformedweb.org/cgi-bin/google2atom.cgi"> +<p><input size="50" name="q"/></p> +<p>Google Key: <input size="20" name="license_key"/></p> +<p><input type="submit" value=" Search "/></p> +</form> + +<hr /> + +<p><strong>Note:</strong> The Google Key is no longer mandatory, if it's not + supplied it will use my own key. In light of that please feel free to + use my key for experimentation, but if you start making heavy use + of this service please get your own Google API Key to avoid + limiting others use of this service.</p> + +<p>This is a REST based reformulation of the Google API. As such it uses + query parameters in a GET based HTTP request to do the search. That is, it works + just like the regular google web page, but this form returns + a well-formed XML document instead of a web page. Why is this better?</p> + +<dl> + <dt>Simplicity</dt> + <dd> + It works just like the google web page, so it is + conceptually easier to understand. + </dd> + + <dt>Composability</dt> + <dd>Since the request is just a simple GET the results of a query can be composed + with other web services. For example, the results could be transformed using + XSLT or fed into a validator. + </dd> +</dl> + +<h3>Bonus Features</h3> + +<p>One feature found in this interface that is not found + in the original Google API is the well-formedness of the + results content. + <a href="http://bitworking.org/news/Announcing_pyTidy">PyTidy</a> + is used to transform the HTML + snippets from the Google API into well-formed XML and place + those into 'content' elements with type='text/html' and + mode='xml'. +</p> + +<h3>Colophon</h3> + +<p>Google2Atom is written in <a href="http://www.python.org">Python</a> and uses + both the <a href="http://bitworking.org/news/Announcing_pyTidy"> + pyTidy</a> and <a href="http://www.diveintomark.org/projects/pygoogle/"> + pyGoogle</a> libraries.</p> + + </description> + + <dc:date>2003-11-22T01:18:42-05:00</dc:date> + <wfw:comment>http://wellformedweb.org/news/comments/Google2Atom</wfw:comment> + <wfw:commentRss>http://wellformedweb.org/news/Google2Atom?crss</wfw:commentRss> + </item> + <item> + <title>wfw namespace elements</title> + <link>http://wellformedweb.org/news/wfw_namespace_elements</link> + <description> + <p>The <code>wfw</code> namespace, http://wellformedweb.org/CommentAPI/ +contains multiple elements. As more are added in various places I will +endeavor to keep the list here updated.</p> + +<dl> + <dt>wfw:comment</dt> + <dd>The first element to appear in this namespace is <code>comment</code>. This element appears + in RSS feeds and contains the URI that comment entries are to be POSTed to. The details + of this are outlined in the <a href="http://wellformedweb.org/story/9">CommentAPI Specification</a>.<dd> + + <dt>wfw:commentRss</dt> + <dd>The second element to appear in the wfw namespace is <code>commentRss</code>. This element + also appears in RSS feeds and contains the URI of the RSS feed for comments on that Item. + This is documented in <a href="http://www.sellsbrothers.com/spout/default.aspx?content=archive.htm#exposingRssComments">Chris Sells' Specification</a>. Note that for quite a while this page has had a typo and erroneously referred to + this element as 'commentRSS' as opposed to the correct 'commentRss'. Feed consumers should be aware + that they may run into both spellings in the wild. Please see + <a href="http://www.intertwingly.net/blog/2006/04/16/commentRss">this page</a> for + more information. + </dd> +</dl> + </description> + + <dc:date>2003-10-10T13:11:46-05:00</dc:date> + <wfw:comment>http://wellformedweb.org/news/comments/wfw_namespace_elements</wfw:comment> + <wfw:commentRss>http://wellformedweb.org/news/wfw_namespace_elements?crss</wfw:commentRss> + </item> + <item> + <title>The HTTP verb PUT under Apache: Safe or Dangerous?</title> + <link>http://wellformedweb.org/news/PUT_SaferOrDangerous</link> + <description> + <p>"Is the HTTP verb PUT under Apache safe or dangerous?" This is a question I come across often, and have now + run into it twice in the work on Atom. So is it safe? The answer is maybe.</p> +<p>Here are two such examples:</p> + +<blockquote><p> + Using DELETE and PUT may be the "right thing to do" + in an ideal world, but the fact of the matter is that a + lot -- if not the vast majority -- of webservers do not allow these + operations. </p></blockquote> + +<blockquote><p>If anyone knows of a newer article describing + HTTP PUT with apache, I would be very interested in seeing it. Because, + due to my experience with PUT, you have to define a single PUTScript in + httpd.conf, and if you PUT something to an apache server at the URI + www.example.com/blog/entries/1 or something similar, apache passes all + of the information to the PUTScript, not to anything else.</p></blockquote> + +<p>Both of the above quotes are from the <a href="http://www.intertwingly.net/wiki/pie/RestEchoApiPutAndDelete">Atom Wiki discussion + of the use of PUT</a>. A little digging reveals that the ApacheWeek article + <a href="http://www.apacheweek.com/features/put">Publishing Pages with PUT</a> + is referenced most often when the danger of PUT is raised. <p> + +<p>That ApacheWeek article does talk about the dangers of PUT and + the cautions you need to follow when writing a script that + does content publishing via PUT. That key part of that phrase + is <strong>content publishing</strong>. That means that PUT is being + used to upload arbitrary content to the server and the client + is determining via the URI where the content should be stored. + Now you can imagine how this might be dangerous, for example + not correctly checking URI paths that include <code>../..</code> could + let a malicious agent re-write your <code>.bashrc</code>.</p> + +<p>Implementing a PUT script can be difficult and a security hazard + in the context of content publishing, but that's the case because + the client is choosing the target URI and the client could upload + any content type. In the case of Web Services in general, and + the AtomAPI in particular, PUT is used in a much narrower manner + and avoids those potential security problems.</p> + +<p>In the case of the AtomAPI PUT is only allowed on URIs that point + to a pre-existing resource. The + AtomAPI follows a general idiom for editing resources of doing + a GET to retrieve the original XML, then a PUT on the same URI + to upate that resource with the edited XML. No URIs are created + by doing a PUT. PUT is not accepted on arbitrary URIs. This makes + the use of PUT in the context of the AtomAPI just as safe as POST.</p> + +<p>There are quite a few ways to configure Apache to process + incoming requests. In particular it is possible to have a single + script that handles all PUT requests below a chosen directory. This + strategy, and all of the associated security concerns associated with + it, are covered fully in the <a href="http://www.apacheweek.com/features/put">Publishing Pages with PUT</a>.</p> + +<p>When processing request with a CGI script all the PUT requests + will come through. The verb is passed to the CGI program via the REQUEST_METHOD environment + variable, and the program decides what to do with the content.</p> + +<p>Using PUT propoerly has advantages in Web Service development. First, + Apache lets you control security based on the verb using the + <a href="http://httpd.apache.org/docs-2.0/mod/core.html#limit">Limit</a> + and <a href="http://httpd.apache.org/docs-2.0/mod/core.html#limitexcept">LimitExcept</a> + directives, which + let you restrict access controls based on the verb. Here is a sample + of one of my <code>.htaccess</code> files that restricts the use of + all verbs except GET to the CGI program <code>Bulu.cgi.</code></p> + +<pre class="example"><code>&lt;Files Bulu.cgi> +AuthType Basic +AuthName myrealm +AuthUserFile /path/to/my/password/file + &lt;LimitExcept GET> + Require valid-user + &lt;/LimitExcept> +&lt;/Files> +</code></pre> + +<p>In addition, the <a href="http://httpd.apache.org/docs-2.0/mod/mod_actions.html#script">Script</a> + directive can be used to dispatch to a CGI program based on the verb used:</p> + +<pre class="example"><code>Script PUT /cgi-bin/put.cgi</code></pre> + +<p>The second advantage using PUT brings is clarity. Given the idiom + of using GET/PUT in tandem on a URI to edit resources PUT + clearly signals what the interface is doing.</p> + +<h4>Resources</h4> + +<p><a href="http://www.apacheweek.com">ApacheWeek</a>: <a href="http://www.apacheweek.com/features/put">Publishing Pages with PUT</a></p> +<p><a href="http://www.intertwingly.net/wiki/pie/RestEchoApiPutAndDelete">RestEchoApiPutAndDelete</a>: Discussion on the use of PUT + and DELETE in the AtomAPI.</p> +<p><a href="http://httpd.apache.org/docs-2.0/mod/mod_actions.html">mod_actions</a>: An Apache module for + controlling dispatching based on verb or content-type.</p> +<p><a href="http://www.w3.org/Amaya/User/Put.html">Configuring your WWW server to understand the PUT method</a>, from the W3Cs Amaya project documentation.</p> +<p><a href="http://www.webdav.org/">WebDAV</a> is also something you may be interested in if you + are looking for ways to publish your content using HTTP. WebDAV stands for + "Web-based Distributed Authoring and Versioning". It is a set of extensions to the HTTP + protocol which allows users to collaboratively edit and manage files on remote web servers. + <a href="http://httpd.apache.org/docs-2.0/mod/mod_dav.html"> + Mod_dav</a> in an Apache module that implements WebDAV.</p> + + + + </description> + + <dc:date>2003-08-23T00:45:25-05:00</dc:date> + <wfw:comment>http://wellformedweb.org/news/comments/PUT_SaferOrDangerous</wfw:comment> + <wfw:commentRss>http://wellformedweb.org/news/PUT_SaferOrDangerous?crss</wfw:commentRss> + </item> + <item> + <title>Six Plus One</title> + <link>http://wellformedweb.org/news/SixPlusOne</link> + <description> + <p>Previously I talked about the <a href="http://bitworking.org/news/Six_Places">six different places</a> there are to + store information in an HTTP transaction. This is slightly misleading. +</p> + +<p> To review, the six places are:</p> +<ol> + <li>Request URI</li> + <li>Request Headers</li> + <li>Request Content</li> + <li>Response Status Code</li> + <li>Response Headers</li> + <li>Response Content</li> +</ol> + +<p>This is slightly misleading because the URI is listed as a single + storage location. This isn't the best characterization, as it really + contains two different sets of information: the path, and the query parameters.</p> + +<p>Now the path part of a URI usually corresponds to the directory structure on the server. + But remember that the path structure of a server is completely controlled + by that server and it need not corresponse to any file or directory strucure. + While it is at times convenient to map it to a directory structure, this isn't required, + and it is possible to pass path information to a + CGI program. For example, if you do a GET on the following URL:</p> + +<pre class="example"><code>http://example.org/cgi-bin/test.py/fred/12 +</code></pre> + +<p>and there exists a program named <code>test.py</code> in the <code>cgi-bin</code> directory + then that program will be executed. The remaining path after the program is passed + to the CGI program in the PATH_INFO environment variable. In contrast, if query + parameters are passed in, they are passed to the CGI program + via the QUERY_STRING environment variable.</p> + +<p>For example, if this is the script <code>test.py</code>:</p> + +<pre class="example"><code>import os +print "Content-type: text/plain\n\n" +print "PATH_INFO = %s" % os.environ['PATH_INFO'] +print "QUERY_STRING = %s" % os.environ['QUERY_STRING']</code></pre> + +<p>And it handles the GET for this URI:</p> + +<pre class="example"><code>http://localhost/cgi-bin/test.py/reilly/12?id=234454</code></pre> + +<p>It will display:</p> + +<pre class="example"><code>PATH_INFO = /reilly/12 +QUERY_STRING = id=234454 +</code></pre> + +<p>Note how the piece of the path below test.py has been stripped off and made + available via <code>PATH_INFO</code>, while the query parameters are + stored in the QUERY_STRING environment variable. +</p> + +<p>So HTTP, via the structure of a URI, gives you two distinct places + to store information, one in the path and the second in the query parameters. + This isn't even the full story, because if you are running Apache and have + the ability to use .htaccess files you can use + <a href="http://httpd.apache.org/docs/mod/mod_rewrite.html">mod_rewrite</a> and map URIs so that they appear + as paths but show up in the CGI as query parameters, but we won't cover that + now. +</p> + + + </description> + + <dc:date>2003-08-03T01:34:49-05:00</dc:date> + <wfw:comment>http://wellformedweb.org/news/comments/SixPlusOne</wfw:comment> + <wfw:commentRss>http://wellformedweb.org/news/SixPlusOne?crss</wfw:commentRss> + </item> + </channel> +</rss> + + + diff --git a/akregator/src/librss/test_data/dublincore.xml b/akregator/src/librss/test_data/dublincore.xml new file mode 100644 index 00000000..075822bb --- /dev/null +++ b/akregator/src/librss/test_data/dublincore.xml @@ -0,0 +1,42 @@ +<?xml version="1.0" encoding="utf-8"?> +<rss xmlns:media="http://search.yahoo.com/mrss/" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0"> + <channel> + <title>The Guardian</title> + <link>https://www.theguardian.com/us</link> + <description>Latest US news, world news, sports, business, opinion, analysis and reviews from the Guardian, the world's leading liberal voice</description> + <copyright>Guardian News and Media Limited or its affiliated companies. All rights reserved. 2025</copyright> + <language>en-gb</language> + <!-- pubDate has been modified so we can assure dc:date is chosen. --> + <pubDate>Thu, 13 Mar 2020 10:38:39 GMT</pubDate> + <dc:date>2025-03-13T07:28:39Z</dc:date> + <dc:language>en-gb</dc:language> + <dc:rights>Guardian News and Media Limited or its affiliated companies. All rights reserved. 2025</dc:rights> + <image> + <title>The Guardian</title> + <url>https://assets.guim.co.uk/images/guardian-logo-rss.c45beb1bafa34b347ac333af2e6fe23f.png</url> + <link>https://www.theguardian.com</link> + </image> + <item> + <title>Judge orders Elon Musk and Doge to produce records about cost-cutting operations</title> + <link>https://www.theguardian.com/us-news/2025/mar/13/elon-musk-doge-court-ruling-records</link> + <description><p>The documents would ultimately inform whether Musk has been operating unconstitutionally to the extent Doge’s activities should be halted</p><p>Elon Musk and his so-called “department of government efficiency”, or Doge, have been ordered by <a href="https://www.theguardian.com/us-news/2025/mar/12/judge-blocks-trump-order-perkins-coie">a federal judge</a> to turn over a wide array of records that would reveal the identities of staffers and internal records related to efforts to aggressively cut federal government spending and programs.</p><p>US district judge Tanya Chutkan’s order forces Musk to produce documents related to Doge’s activities <a href="https://storage.courtlistener.com/recap/gov.uscourts.dcd.277463/gov.uscourts.dcd.277463.61.0_1.pdf">as part of a lawsuit</a> brought by 14 Democratic state attorneys general that alleges Musk violated the constitution by wielding powers that only Senate-confirmed officials should possess.</p> <a href="https://www.theguardian.com/us-news/2025/mar/13/elon-musk-doge-court-ruling-records">Continue reading...</a></description> + <category domain="https://www.theguardian.com/us-news/trump-administration">Trump administration</category> + <category domain="https://www.theguardian.com/us-news/donaldtrump">Donald Trump</category> + <category domain="https://www.theguardian.com/law/law-us">Law (US)</category> + <category domain="https://www.theguardian.com/us-news/us-politics">US politics</category> + <category domain="https://www.theguardian.com/us-news/us-news">US news</category> + <!-- pubDate has been modified so we can assure dc:date is chosen. --> + <pubDate>Thu, 13 Mar 2020 10:38:39 GMT</pubDate> + <guid>https://www.theguardian.com/us-news/2025/mar/13/elon-musk-doge-court-ruling-records</guid> + <media:content width="140" url="https://i.guim.co.uk/img/media/8570e3094dfad7e268555097158aa8085221d48f/0_72_5221_3132/master/5221.jpg?width=140&quality=85&auto=format&fit=max&s=8677942b4c15d93e73d6bd18a13a12c0"> + <media:credit scheme="urn:ebu">Photograph: Alex Brandon/AP</media:credit> + </media:content> + <media:content width="460" url="https://i.guim.co.uk/img/media/8570e3094dfad7e268555097158aa8085221d48f/0_72_5221_3132/master/5221.jpg?width=460&quality=85&auto=format&fit=max&s=bcd596d741b8c9e4be82c602e5105122"> + <media:credit scheme="urn:ebu">Photograph: Alex Brandon/AP</media:credit> + </media:content> + <dc:creator>Hugo Lowell in Washington</dc:creator> + <dc:date>2025-03-13T05:22:00Z</dc:date> + </item> + </channel> +</rss> + diff --git a/akregator/src/librss/test_data/rdf.xml b/akregator/src/librss/test_data/rdf.xml new file mode 100644 index 00000000..dadcde6d --- /dev/null +++ b/akregator/src/librss/test_data/rdf.xml @@ -0,0 +1,64 @@ +<?xml version="1.0"?> +<rdf:RDF + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns="http://purl.org/rss/1.0/" +> + + <channel rdf:about="http://www.xml.com/xml/news.rss"> + <title>XML.com</title> + <link>http://xml.com/pub</link> + + <description> + XML.com features a rich mix of information and services + for the XML community. + </description> + + <image rdf:resource="http://xml.com/universal/images/xml_tiny.gif" /> + + <items> + <rdf:Seq> + + <rdf:li resource="http://xml.com/pub/2000/08/09/xslt/xslt.html" /> + <rdf:li resource="http://xml.com/pub/2000/08/09/rdfdb/index.html" /> + </rdf:Seq> + </items> + + <textinput rdf:resource="http://search.xml.com" /> + + </channel> + <image rdf:about="http://xml.com/universal/images/xml_tiny.gif"> + <title>XML.com</title> + <link>http://www.xml.com</link> + <url>http://xml.com/universal/images/xml_tiny.gif</url> + + </image> + <item rdf:about="http://xml.com/pub/2000/08/09/xslt/xslt.html"> + <title>Processing Inclusions with XSLT</title> + <link>http://xml.com/pub/2000/08/09/xslt/xslt.html</link> + <description> + + Processing document inclusions with general XML tools can be + problematic. This article proposes a way of preserving inclusion + information through SAX-based processing. + </description> + </item> + <item rdf:about="http://xml.com/pub/2000/08/09/rdfdb/index.html"> + <title>Putting RDF to Work</title> + <link>http://xml.com/pub/2000/08/09/rdfdb/index.html</link> + + <description> + Tool and API support for the Resource Description Framework + is slowly coming of age. Edd Dumbill takes a look at RDFDB, + one of the most exciting new RDF toolkits. + </description> + </item> + + <textinput rdf:about="http://search.xml.com"> + <title>Search XML.com</title> + + <description>Search XML.com's XML collection</description> + <name>s</name> + <link>http://search.xml.com</link> + </textinput> + +</rdf:RDF> diff --git a/akregator/src/librss/test_data/rss091.xml b/akregator/src/librss/test_data/rss091.xml new file mode 100644 index 00000000..65788c3d --- /dev/null +++ b/akregator/src/librss/test_data/rss091.xml @@ -0,0 +1,50 @@ +<?xml version="1.0" encoding="ISO-8859-1" ?> +<rss version="0.91"> + <channel> + <title>WriteTheWeb</title> + <link>http://writetheweb.com</link> + <description>News for web users that write back</description> + <language>en-us</language> + <copyright>Copyright 2000, WriteTheWeb team.</copyright> + <managingEditor>editor@writetheweb.com</managingEditor> + <webMaster>webmaster@writetheweb.com</webMaster> + <image> + <title>WriteTheWeb</title> + <url>http://writetheweb.com/images/mynetscape88.gif</url> + <link>http://writetheweb.com</link> + <width>88</width> + <height>31</height> + <description>News for web users that write back</description> + </image> + <item> + <title>Giving the world a pluggable Gnutella</title> + <link>http://writetheweb.com/read.php?item=24</link> + <description>WorldOS is a framework on which to build programs that work like Freenet or Gnutella -allowing distributed applications using peer-to-peer routing.</description> + </item> + <item> + <title>Syndication discussions hot up</title> + <link>http://writetheweb.com/read.php?item=23</link> + <description>After a period of dormancy, the Syndication mailing list has become active again, with contributions from leaders in traditional media and Web syndication.</description> + </item> + <item> + <title>Personal web server integrates file sharing and messaging</title> + <link>http://writetheweb.com/read.php?item=22</link> + <description>The Magi Project is an innovative project to create a combined personal web server and messaging system that enables the sharing and synchronization of information across desktop, laptop and palmtop devices.</description> + </item> + <item> + <title>Syndication and Metadata</title> + <link>http://writetheweb.com/read.php?item=21</link> + <description>RSS is probably the best known metadata format around. RDF is probably one of the least understood. In this essay, published on my O'Reilly Network weblog, I argue that the next generation of RSS should be based on RDF.</description> + </item> + <item> + <title>UK bloggers get organised</title> + <link>http://writetheweb.com/read.php?item=20</link> + <description>Looks like the weblogs scene is gathering pace beyond the shores of the US. There's now a UK-specific page on weblogs.com, and a mailing list at egroups.</description> + </item> + <item> + <title>Yournamehere.com more important than anything</title> + <link>http://writetheweb.com/read.php?item=19</link> + <description>Whatever you're publishing on the web, your site name is the most valuable asset you have, according to Carl Steadman.</description> + </item> + </channel> + </rss> diff --git a/akregator/src/librss/testlibrss.cpp b/akregator/src/librss/testlibrss.cpp index a6e970e2..ed75069d 100644 --- a/akregator/src/librss/testlibrss.cpp +++ b/akregator/src/librss/testlibrss.cpp @@ -1,11 +1,15 @@ #include "testlibrss.h" -#include "image.h" +#include <tqdatetime.h> #include <tdeaboutdata.h> #include <tdecmdlineargs.h> #include <tdeapplication.h> #include <kdebug.h> +#include <krfcdate.h> + +#include "image.h" +#include "enclosure.h" using namespace RSS; @@ -15,6 +19,145 @@ static const TDECmdLineOptions options[] = TDECmdLineLastOption }; +template<typename ActualType, typename ExpectedType> +static void assertEquals(const ActualType& actual, const ExpectedType& expected) +{ + if (actual != expected) + { + kdError() << "Assertion failed: actual == expected\n" + << " actual: " << actual << "\n" + << " expected: " << expected << endl; + tdeApp->exit(1); + } +} + +static void checkRSS091(const Document& document) +{ + assertEquals(document.title(), "WriteTheWeb"); + assertEquals(document.link().url(), "http://writetheweb.com"); + assertEquals(document.description(), "News for web users that write back"); + assertEquals(document.language(), Language::en_us); + assertEquals(document.copyright(), "Copyright 2000, WriteTheWeb team."); + assertEquals(document.managingEditor(), "editor@writetheweb.com"); + assertEquals(document.webMaster(), "webmaster@writetheweb.com"); + + const Image* image = document.image(); + if (!image) + { + kdError() << "Expected an <image> element to be found" << endl; + tdeApp->exit(1); + } + + assertEquals(image->title(), "WriteTheWeb"); + assertEquals(image->url().url(), "http://writetheweb.com/images/mynetscape88.gif"); + assertEquals(image->link().url(), "http://writetheweb.com"); + assertEquals(image->description(), "News for web users that write back"); + assertEquals(image->width(), 88); + assertEquals(image->height(), 31); + + assertEquals(document.articles().count(), 6); + + Article article = document.articles().first(); + assertEquals(article.title(), "Giving the world a pluggable Gnutella"); + assertEquals(article.link().url(), "http://writetheweb.com/read.php?item=24"); + assertEquals(article.description(), "WorldOS is a framework on which to build programs that work like Freenet or Gnutella -allowing distributed applications using peer-to-peer routing."); +} + +static void checkWFW(const Document& document) +{ + assertEquals(document.link().url(), "http://wellformedweb.org/news/"); + assertEquals(document.description(), "Exploring the limits of XML and HTTP"); + + assertEquals(document.articles().count(), 5); + + Article article = document.articles().front(); + assertEquals(article.title(), "Should you use Content Negotiation in your Web Services?"); + assertEquals(article.commentsLink().url(), "http://bitworking.org/news/comments/WebServicesAndContentNegotiation"); +} + +static void checkDC(const Document& document) +{ + // librss will use dc:date if it is provided, otherwise it will use pubDate + assertEquals(document.link().url(), "https://www.theguardian.com/us"); + + TQDateTime expectedTime; + time_t time = KRFCDate::parseDateISO8601("2025-03-13T07:28:39Z"); + expectedTime.setTime_t(time); + assertEquals(document.pubDate(), expectedTime); + + assertEquals(document.articles().count(), 1); + + Article article = document.articles().first(); + time = KRFCDate::parseDateISO8601("2025-03-13T05:22:00Z"); + expectedTime.setTime_t(time); + assertEquals(article.pubDate(), expectedTime); + + assertEquals(article.author(), "Hugo Lowell in Washington"); +} + +static void checkRDF(const Document& document) +{ + assertEquals(document.title(), "XML.com"); + assertEquals(document.link().url(), "http://xml.com/pub"); + + assertEquals(document.articles().count(), 2); + + Article article = document.articles().first(); + + assertEquals(article.title(), "Processing Inclusions with XSLT"); + assertEquals(article.link().url(), "http://xml.com/pub/2000/08/09/xslt/xslt.html"); + assertEquals(article.guid(), "http://xml.com/pub/2000/08/09/xslt/xslt.html"); + assertEquals(article.guidIsPermaLink(), false); +} + +static void checkAtom10(const Document& document) +{ + assertEquals(document.title(), "dive into mark"); + assertEquals(document.description(), "A <em>lot</em> of effort went into making this effortless"); + kdWarning() << "Skipping check for Atom \"rights\" (Document::copyright) -- not implemented." << endl; + // assertEquals(document.copyright(), "Copyright (c) 2003, Mark Pilgrim"); + assertEquals(document.language(), Language::en_us); + + // 2005-07-31T12:29:29Z + // TQDateTime compTime; + // time_t time = KRFCDate::parseDateISO8601("2005-07-31T12:29:29Z"); + // compTime.setTime_t(time); + // assertEquals(document.pubDate(), compTime); + kdWarning() << "Skipping check for Atom \"updated\" (Document::pubDate/lastBuildDate) -- not implemented." << endl; + + assertEquals(document.link().url(), "http://example.org/"); + + assertEquals(document.articles().count(), 1); + + Article article = document.articles().first(); + + assertEquals(article.title(), "Atom draft-07 snapshot"); + assertEquals(article.link().url(), "http://example.org/2005/04/02/atom"); + + if (article.description().isNull()) + { + kdError() << "Empty Atom article description." << endl; + tdeApp->exit(1); + } + + // Enclosure enclosure = article.enclosure(); + // assertEquals(enclosure.url(), "http://example.org/audio/ph34r_my_podcast.mp3"); + // assertEquals(enclosure.length(), 1337); + // assertEquals(enclosure.type(), "audio/mpeg"); + kdWarning() << "Skipping check for Atom \"enclosure\" links -- not implemented." << endl; + + assertEquals(article.guid(), "tag:example.org,2003:3.2397"); + + // 2005-07-31T12:29:29Z + // Need a little workaround since librss converts the timestamp to local time. + // NOTE: Atom provides both 'published' and 'updated'; librss uses 'updated'. + TQDateTime articlePublishedDate; + time_t publishedTime = KRFCDate::parseDateISO8601("2005-07-31T12:29:29Z"); + articlePublishedDate.setTime_t(publishedTime); + assertEquals(article.pubDate(), articlePublishedDate); + + assertEquals(article.author(), "<a href=\"mailto:f8dy@example.com\">Mark Pilgrim</a>"); +} void Tester::test( const TQString &url ) { @@ -26,33 +169,50 @@ void Tester::test( const TQString &url ) void Tester::slotLoadingComplete( Loader *loader, Document doc, Status status ) { - if ( status == Success ) + if (status != Success) + { + kdError() << "Failed to load Document " << loader->errorCode() << endl; + tdeApp->exit(1); + } + + switch (doc.version()) { - kdDebug() << "Successfully retrieved '" << doc.title() << "'" << endl; - kdDebug() << doc.description() << endl; - - if ( doc.image() ) { - kdDebug() << "Image: "; - kdDebug() << " Title: " << doc.image()->title() << endl; - kdDebug() << " URL: " << doc.image()->url() << endl; - kdDebug() << " Link: " << doc.image()->link() << endl; - } - - kdDebug() << "Articles:" << endl; - - Article::List list = doc.articles(); - Article::List::ConstIterator it; - Article::List::ConstIterator en=list.end(); - for (it = list.begin(); it != en; ++it) + case RSS::v0_91: { - kdDebug() << "\tTitle: " << (*it).title() << endl; - kdDebug() << "\tText: " << (*it).description() << endl; + checkRSS091(doc); + break; + } + case RSS::v2_0: + { + if (doc.title() == "The Well-Formed Web") + { + checkWFW(doc); + break; + } + else if (doc.title() == "The Guardian") + { + checkDC(doc); + break; + } + else if (doc.title() == "XML.com") + { + checkRDF(doc); + break; + } + kdError() << "Unknown RSS 2.0 document '" << doc.title() << "'" << endl; + tqApp->exit(1); + } + case RSS::vAtom_1_0: + { + checkAtom10(doc); + break; + } + default: + { + break; } } - if ( status != Success ) - kdDebug() << "ERROR " << loader->errorCode() << endl; - tdeApp->quit(); } @@ -60,8 +220,8 @@ int main( int argc, char **argv ) { TDEAboutData aboutData( "testlibrss", "testlibrss", "0.1" ); TDECmdLineArgs::init( argc, argv, &aboutData ); - TDECmdLineArgs::addCmdLineOptions( options ); - TDEApplication app; + TDECmdLineArgs::addCmdLineOptions( options ); + TDEApplication app(false, false); TDECmdLineArgs *args = TDECmdLineArgs::parsedArgs(); if ( args->count() != 1 ) args->usage(); |