cvs commit: www/test/lfs

jeroen at jeroen at
Tue Aug 5 09:39:25 PDT 2003

jeroen      03/08/05 10:39:25

  Added:       test
               test/images rss.png
  Removed:     test/blfs
  Script changes, now for real.
  Revision  Changes    Path
  1.1                  www/test/
  #!/usr/bin/perl -w
  # Author: Rob Park <rbpark at>
  # License: GNU General Public License
  use XML::RSS;
  use Getopt::Long;
  use Pod::Usage;
  use strict;
  # make a new XML::RSS object
  my $RSS = new XML::RSS (version => '2.0');
  # default arguments for commandline switches
  my %args = 
  	"help" => undef,
  	"man" => undef,
  	"news-file" => "news.html",
  	"rss-file" => "feed.rss"
  # parse commandline options, display help if needed
  GetOptions(\%args, 'help|?', 'man', 'news-file=s', 'rss-file=s');
  pod2usage(1) if $args{help};
  pod2usage(-exitstatus => 0, -verbose => 2) if ($args{man});
  my @items;
  my %channel = 
  	link => "",
  	language => "en-us"
  sub sanitize
  	my $string = shift;
  	$string =~ s/&(?!\w+;)/&/g;
  	$string =~ s/</</g;
  	$string =~ s/>/>/g;
  	return $string;
  # open the HTML file and tell the user what's going on.
  print "Opening $args{'news-file'} for parsing...\n";
  open HTML, "<$args{'news-file'}";
  # parse the HTML file
  while (<HTML>)
  	# figure out what page we're working on
  	m/<body id="body" class="(.+)">/
  		&& ($channel{link} .= "$1/$args{'news-file'}");
  	# extract the title of the page
  		&& ($channel{title} = sanitize($1));
  	# extract the description from the meta tags
  	m/<meta name="description" content="(.+)"\s?\/>/ 
  		&& ($channel{description} = sanitize($1));
  	# extract the news items
  	if (m/<h3 id="(.+)">(.+)<\/h3>/)
  		my %item;
  		$item{link} = "$channel{link}#$1";
  		$item{title} = sanitize($2);
  		# ugly way of getting $channel{link} without $args{'news-file'} on the end of it
  		my $cwdir = $channel{link};
  		{ local $/ = $args{'news-file'}; chomp $cwdir; }
  		# skip two lines down, to the first <p> tag
  		$item{description} = <HTML>;
  		$item{description} = <HTML>; 
  		$item{description} =~ s/^.*<p>(.+)<\/p>.*$/sanitize($1)/e;
  		$item{description} =~ s/(href|src)="((?!http|ftp|mailto).+)"/$1="$cwdir$2"/g;
  		chomp $item{description};
  		# add the newly parsed news item to the list of news items
  		push @items, \%item;
  # insert the channel information into the RSS object
  # insert the news items into the RSS object
  for my $item (@items)
  # save the RSS to a file and tell the user what's going on.
  print "Saving RSS feed to $args{'rss-file'}...\n";
  =head1 NAME - parse the LFS website and convert it into an RSS feed
  =head1 SYNOPSIS [options]
  =head1 OPTIONS
  =over 8
  =item B<-h, --help>
  Print this help message.
  =item B<-m, --man>
  Output more verbose help in the form of a man page.
  =item B<-n, --news-file>
  Define the location of the HTML file to parse. Defaults to C<./news.html>.
  =item B<-r, --rss-file>
  Define the location of the RSS file out output. Defaults to C<./feed.rss>.
  =head1 VERSION
  $Id:,v 1.1 2003/08/05 16:39:25 jeroen Exp $
  Report bugs to <rbpark at>.
  1.1                  www/test/images/rss.png
  	<<Binary file>>

More information about the website mailing list