This is just a quick simple example of scraping a html page's content and turning it into something else (RSS) using Perl regular expressions and the XML::RSS module.
#!/usr/bin/perl -w use strict; use LWP::UserAgent; use HTML::Entities; use XML::RSS; use DateTime::Format::Mail; my $ie="Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)"; my $ua = LWP::UserAgent->new; $ua->agent($ie); my $url = "http://www.somesiteorother.com/upcoming-events/month-calendar"; my $tz = 'Europe/Amsterdam'; my $d = DateTime::Format::Mail->format_datetime( DateTime->now(time_zone => $tz)); my $response = $ua->get ($url); my ($content,$date ); my $rss = XML::RSS->new (version => '2.0'); if ($response->is_success) { $rss->channel(title => 'Some site or other', link => $url, language => 'en', pubDate => $d, ); my @chunks = (split (/<div [^>]* class="event">/, $response->content)); foreach (@chunks) { if (/DATE=(\d{4}-\d{2}-\d{2})/) { $d = $1; $d =~ /(\d{4})-(\d{2})-(\d{2})/; $d = DateTime::Format::Mail->format_datetime( DateTime->new( year => $1, month => $2, day => $3, time_zone => $tz)); $_ =~ /<a href="(.+)"\s[^>]+>([^<]+)<\/a>/; $rss->add_item(title => $2, link => $1, pubDate=> $d ); } } } else { die $response->status_line; } print $rss->as_string; #$rss->save("file.rss");