Local copypaste @ orl.sumy.ua
File: stream_twitterv1.perl (perl syntax), download it
  1. #!/usr/bin/perl
  2. use warnings;
  3. use strict;
  4. use DBI;
  5. use XML::LibXML;
  6. use LWP::Simple;
  7. use HTML::TagParser;
  8. our $VERSION = '0.0.1';
  9.  
  10. my $interval = 30;
  11. my $twitter_pt = "http://twitter.com/statuses/public_timeline.xml";
  12. my $mysql = DBI->connect("DBI:mysql:tc:localhost","twitter");
  13. my $parser = XML::LibXML->new();
  14. my $topid = 0;
  15.  
  16. while (sleep $interval) {
  17.         if (is_success(getstore($twitter_pt,"/tmp/tpt.xml"))) {
  18.                 print "file is ok, lets parse it: ".time."\n";
  19.                 my $data = $parser->parse_file('/tmp/tpt.xml');
  20.                 foreach my $twit (reverse($data->findnodes('/statuses/status'))) {
  21.                         my $statusid = $twit->findnodes('./id');
  22.                         $statusid = int($statusid->to_literal);
  23.                         if ($topid != $statusid) {
  24.                                 $topid = $statusid;
  25.                                 my ($clname,$clurl) = '';
  26.                                 my $source_string = $twit->findnodes('./source')->to_literal;
  27.                                 my $html = HTML::TagParser->new($source_string);
  28.                                 my $list = $html->getElementsByTagName("a");
  29.                                 if (!defined $list) {$clurl = 'http://twitter.com'; $clname='web'}
  30.                                 else {$clurl = $list->getAttribute('href'); $clname = $list->innerText;}
  31.                                 my $query = $mysql->prepare("INSERT INTO source (name,url,count) VALUES (?,?,1) ON DUPLICATE KEY UPDATE count = count + 1");
  32.                                 $query->bind_param(1,$clname);
  33.                                 $query->bind_param(2,$clurl);
  34.                                 $query->execute;
  35.                         } else {last;}
  36.                 } unlink ("/tmp/tpt.xml"); print('everything is done, lets go to the next:'.time."\n");
  37.         } else { print "error downloading twitter public timeline xml: ".time."\n"; }
  38. }
  39.  
Filesize: 1475 bytes; Generation time: 0.0701 sec; Memory usage: 1.95MB; GeSHi version: 1.0.8.1;