#!/usr/bin/perl
use warnings;
use strict;
use DBI;
use XML::LibXML;
use LWP::Simple;
use HTML::TagParser;
our $VERSION = '0.0.1';
my $interval = 30;
my $twitter_pt = "http://twitter.com/statuses/public_timeline.xml";
my $mysql = DBI->connect("DBI:mysql:tc:localhost","twitter");
my $parser = XML::LibXML->new();
my $topid = 0;
while (sleep $interval) {
if (is_success(getstore($twitter_pt,"/tmp/tpt.xml"))) {
print "file is ok, lets parse it: ".
time.
"\n";
my $data = $parser->parse_file('/tmp/tpt.xml');
foreach my $twit (reverse($data->findnodes('/statuses/status'))) {
my $statusid = $twit->findnodes('./id');
$statusid = int($statusid->to_literal);
if ($topid != $statusid) {
$topid = $statusid;
my ($clname,$clurl) = '';
my $source_string = $twit->findnodes('./source')->to_literal;
my $html = HTML::TagParser->new($source_string);
my $list = $html->getElementsByTagName("a");
if (!defined $list) {$clurl = 'http://twitter.com'; $clname='web'}
else {$clurl = $list->getAttribute('href'); $clname = $list->innerText;}
my $query = $mysql->prepare("INSERT INTO source (name,url,count) VALUES (?,?,1) ON DUPLICATE KEY UPDATE count = count + 1");
$query->bind_param(1,$clname);
$query->bind_param(2,$clurl);
$query->execute;
} else {last;}
} unlink ("/tmp/tpt.xml"); print('everything is done, lets go to the next:'.
time.
"\n");
} else { print "error downloading twitter public timeline xml: ".
time.
"\n"; }
}