r447 - scripts/trunk

lizardo at linuxfromscratch.org lizardo at linuxfromscratch.org
Wed Jul 13 10:08:29 PDT 2005


Author: lizardo
Date: 2005-07-13 11:08:29 -0600 (Wed, 13 Jul 2005)
New Revision: 447

Modified:
   scripts/trunk/lfs2rss.pl
Log:
lfs2rss.pl: Added new informational options: -d (debug) and -v (verbose).


Modified: scripts/trunk/lfs2rss.pl
===================================================================
--- scripts/trunk/lfs2rss.pl	2005-07-13 06:50:36 UTC (rev 446)
+++ scripts/trunk/lfs2rss.pl	2005-07-13 17:08:29 UTC (rev 447)
@@ -2,7 +2,9 @@
 # Author: Rob Park <rbpark at ualberta.ca>
 # License: GNU General Public License
 
-# Modified by Anderson Lizardo <lizardo at linuxfromscratch.org>
+# Changes by Anderson Lizardo <lizardo at linuxfromscratch.org>:
+# 
+# 2005-07-13: added new informational options: -d (debug) and -v (verbose).
 # 2005-07-12: updated for www2 and implemented handling of wrapped <p> tags.
 # 2004-12-29: changed encoding to ISO-8859-1
 # 2004-12-12: added BUGS section
@@ -22,11 +24,14 @@
 	"help" => undef,
 	"man" => undef,
 	"news-file" => "news.html",
-	"rss-file" => "feed.rss"
+	"rss-file" => "feed.rss",
+	"verbose" => "",
+	"debug" => "",
 );
 
 # parse commandline options, display help if needed
-GetOptions(\%args, 'help|?', 'man', 'news-file=s', 'rss-file=s');
+GetOptions(\%args, 'help|?', 'man', 'news-file=s', 'rss-file=s',
+	'verbose', 'debug');
 pod2usage(1) if $args{help};
 pod2usage(-exitstatus => 0, -verbose => 2) if ($args{man});
 
@@ -48,7 +53,7 @@
 }
 
 # open the HTML file
-#print "Opening $args{'news-file'} for parsing...\n";
+print "Opening $args{'news-file'} for parsing...\n" if ($args{verbose});
 open HTML, "<$args{'news-file'}";
 
 my %item;
@@ -60,26 +65,27 @@
 {
 	# extract the news items
 	if (m/<!--#set\s+var="pageTitle"\s+value="([^"]+)"/) {
+		print "  pageTitle\n" if ($args{debug});
 		$channel{title} = sanitize($1);
 		# insert the channel information into the RSS object
 		$RSS->channel(%channel);
 	}
 	elsif (m/<dt>(.+)<\/dt>/) {
-		print "dt\n";
+		print "  dt\n" if ($args{debug});
 		$item{title} = sanitize($1);
 	}
 	elsif (m/<p>(.+)<\/p>/) {
-		print "one-line p\n";
+		print "  one-line p\n" if ($args{debug});
 		$push_item = 1 if ($item{title});
 		$item{description} = $1;
 	}
 	elsif (m/<p>(.*)/) {
-		print "open p\n";
+		print "  open p\n" if ($args{debug});
 		$open_para = 1;
 		$item{description} = "$1\n";
 	}
 	elsif (m/(.*)<\/p>/) {
-		print "close p\n";
+		print "  close p\n" if ($args{debug});
 		$push_item = 1 if ($item{title});
 		$open_para = 0;
 		$item{description} .= $1;
@@ -103,7 +109,7 @@
 }
 
 # save the RSS to a file
-#print "Saving RSS feed to $args{'rss-file'}...\n";
+print "Saving RSS feed to $args{'rss-file'}...\n" if ($args{verbose});
 $RSS->save($args{'rss-file'});
 
 __END__
@@ -136,15 +142,14 @@
 
 Define the location of the RSS file out output. Defaults to C<./feed.rss>.
 
-=back
+=item B<-v, --verbose>
 
-=head1 BUGS
+Show informational messages on each step.
 
-Due to a inherent limitation of the lfs2rss.pl script, news items should not
-have newlines inside <p>...</p> tags (at least not in the first paragraph).
-Such limitation can be avoided by using a XML/HTML parser instead of reading
-the HTML file line by line.
+=item B<-d, --debug>
 
+Show debugging information (e.g. HTML parsing traces).
+
 =head1 REPORTING BUGS
 
 Report bugs to <rbpark at ualberta.ca>.




More information about the website mailing list