Monday, July 20, 2009

PHP Rss Reader , Atom reader

PHP RSS ATOM Reader - PHP class

What is RSS - RSS (Rich Site Summary) is a format for delivering regularly changing web content. Many news-related sites, weblogs and other online publishers syndicate their content as an RSS Feed to whoever wants it. RSS solves a problem for people who regularly use the web. It allows you to easily stay informed by retrieving the latest content from the sites you are interested in.

ATOM FEED - The Atom Syndication Format is an XML language used for web feeds.
All Atom feeds must be well-formed XML documents, and are identified with the application/atom+xml media type.


Here you can find a class Written in php to read RSS/ATOM feeds from a url.
This will helpful for you to publish news content in your website easliy.


CLASS FILE


class FeedParser
{

private $xmlData; //XML data Read from the Feed Url


private $curlHandler; // Curl Object //


private $feedUrl; // Feed Url , Url from which data scrapped


private $parserData; //Xml Components frpm XML parser


private $feedResults; // Final Results in Array format //


/*
* FeedParser Construct
* @Param as the Feed Url
* Init settings
*/

function __construct($url)
{
$this->feedUrl = $url;
$this->parserData = null;
$this->xmlData = null;
$this->feedResults = array();
}

/*
* Read Feed Content from Remote Url
* Fetch XML Content Using CURL
*/

function readXml()
{
if(isset($this->feedUrl)){
try
{
$this->curlHandler = curl_init($this->feedUrl);
curl_setopt($this->curlHandler, CURLOPT_RETURNTRANSFER, true);
curl_setopt($this->curlHandler, CURLOPT_HEADER, 0);
$this->xmlData = curl_exec($this->curlHandler);
curl_close($this->curlHandler);
return true;
}catch(Exception $c){
return false;
}
}else{
return false;
}
}


/*
* Entry Point for Parsing from XML DATA
* Data will be parsed from Rss Or Atom Feeds.
*/

function parseXmlData()
{
$docElim = new SimpleXmlElement($this->xmlData, LIBXML_NOCDATA);
$this->parserData = $docElim;

if(isset($docElim->channel)){
$this->parseFromRSS(); //RSS Feed
}else if(isset($docElim->entry)){
$this->parseFromATOM(); //Atom Feed
}
return $this->feedResults;
}


/*
* Parser now creates the Feed Results From Rss Feeds/
* Rss Feedas are popular feeds for news and podcast
* only comman items are added to results
*/

function parseFromRSS(){
/*
* Retrieve Header Information
* Get Common Header Items
*/
$this->feedResults["headInfo"]["feedType"] = "RSS";
$this->feedResults["headInfo"]["title"] = (string)$this->parserData->channel->title;
$this->feedResults["headInfo"]["description"] = (string)$this->parserData->channel->description;
$this->feedResults["headInfo"]["link"] = (string)$this->parserData->channel->link;
$this->feedResults["headInfo"]["category"] = (string)$this->parserData->channel->category;
$this->feedResults["headInfo"]["docs"] = (string)$this->parserData->channel->docs;
$this->feedResults["headInfo"]["copyright"] = (string)$this->parserData->channel->copyright;
$this->feedResults["headInfo"]["pubDate"] = (string)$this->parserData->channel->pubDate;
$this->feedResults["headInfo"]["webMaster"] = (string)$this->parserData->channel->webMaster;
$this->feedResults["headInfo"]["imageUrl"] = (string)$this->parserData->channel->image->url;
$this->feedResults["headInfo"]["imageWidth"] = (string)$this->parserData->channel->image->width;
$this->feedResults["headInfo"]["imageHeight"] = (string)$this->parserData->channel->image->height;
$this->feedResults["headInfo"]["imageLink"] = (string)$this->parserData->channel->image->link;
$this->feedResults["headInfo"]["imageTitle"] = (string)$this->parserData->channel->image->title;

/*
* Rss Feed Items
* Items and common fields only
*/
$rec = 0;
foreach($this->parserData->channel->item as $key=>$val){
$this->feedResults["items"][$rec]["title"] = (string)$val->title;
$this->feedResults["items"][$rec]["description"] = (string)$val->description;
$this->feedResults["items"][$rec]["link"] = (string)$val->link;
$this->feedResults["items"][$rec]["comments"] = (string)$val->comments;
$this->feedResults["items"][$rec]["category"] = (string)$val->category;
$this->feedResults["items"][$rec]["pubDate"] = (string)$val->pubDate;
$rec++;
}
$this->feedResults["headInfo"]["countRecords"] = $rec;

}

/*
* Parse Data From Atom Content
* Atom Feeds vary from RSS in elements
* Here the data is scrapped from Atom Feed.
*/

function parseFromATOM(){
/*
* Retrieve Header Information
* Get Common Header Items
*/
$this->feedResults["headInfo"]["feedType"] = "ATOM";
$this->feedResults["headInfo"]["authorName"] = (string)$this->parserData->author->name;
$this->feedResults["headInfo"]["authorEmail"] = (string)$this->parserData->author->email;
$this->feedResults["headInfo"]["copyright"] = (string)$this->parserData->author->copyright;
$this->feedResults["headInfo"]["modified"] = (string)$this->parserData->author->modified;

/*
* ATOM Feed Items
* Items and common fields only
*/
$rec= 0;
foreach($this->parserData->entry as $key=>$val){
$this->feedResults["items"][$rec]["title"] = (string)$val->title;
$this->feedResults["items"][$rec]["linkUrl"] = (string)$val->link{"href"};
$this->feedResults["items"][$rec]["linkType"] = (string)$val->link->{"type"};
$this->feedResults["items"][$rec]["issued"] = (string)$val->issued;
$this->feedResults["items"][$rec]["id"] = (string)$val->id;
$this->feedResults["items"][$rec]["modified"] = (string)$val->modified;
$this->feedResults["items"][$rec]["content"] = (string)$val->content;
$rec++;
}
$this->feedResults["headInfo"]["countRecords"] = $rec;

}


/*
* Method is the entry to FeedParser
* Function Called from invoking object
* @ No parameters
* Returns the Feed Results in array
*/


function parseFeed()
{
if($this->readXml())
{
if(empty($this->xmlData)) {
die ("Nothing to parse this time");
return null;
}else{
if(class_exists("SimpleXmlElement")){
$results = $this->parseXmlData();
return $results;
}else{
die("LIB XML Not installed");
return null;
}
}
}else{
die( "Sorry , Cannot read xml data from source");
return null;
}
}

}



Note : Requires Curl and LIBXML Installed on server. (PHP 5 is needed).



How to use the code

require_once("FeedParser.php");
$url ="http://news.google.com/news?ned=us&topic=h&output=atom";

$obj = new FeedParser($url);
$content = $obj->parseFeed();

print_r($content);
//$content will hold the feed elements in array format //


?>

0 comments: