r446 - scripts/trunk

lizardo at linuxfromscratch.org lizardo at linuxfromscratch.org
Tue Jul 12 23:50:37 PDT 2005


Author: lizardo
Date: 2005-07-13 00:50:36 -0600 (Wed, 13 Jul 2005)
New Revision: 446

Modified:
   scripts/trunk/lfs2rss.pl
   scripts/trunk/update-website.mk
Log:
Committed RSS autogeneration support; needs testing.


Modified: scripts/trunk/lfs2rss.pl
===================================================================
--- scripts/trunk/lfs2rss.pl	2005-07-13 06:29:14 UTC (rev 445)
+++ scripts/trunk/lfs2rss.pl	2005-07-13 06:50:36 UTC (rev 446)
@@ -3,6 +3,7 @@
 # License: GNU General Public License
 
 # Modified by Anderson Lizardo <lizardo at linuxfromscratch.org>
+# 2005-07-12: updated for www2 and implemented handling of wrapped <p> tags.
 # 2004-12-29: changed encoding to ISO-8859-1
 # 2004-12-12: added BUGS section
 # 2004-03-09: fixed news item URL creation
@@ -29,10 +30,11 @@
 pod2usage(1) if $args{help};
 pod2usage(-exitstatus => 0, -verbose => 2) if ($args{man});
 
-my @items;
 my %channel = 
 (
 	link => "http://www.linuxfromscratch.org/",
+	title => "LFS General News",
+	description => "Linux From Scratch - build your own Linux distribution from scratch",
 	language => "en-us"
 );
 
@@ -45,58 +47,62 @@
 	return $string;
 }
 
-# open the HTML file and tell the user what's going on.
+# open the HTML file
 #print "Opening $args{'news-file'} for parsing...\n";
 open HTML, "<$args{'news-file'}";
 
+my %item;
+$item{link} = $channel{link} . $args{'news-file'};
+my $open_para = 0;
+my $push_item = 0;
 # parse the HTML file
 while (<HTML>)
 {
-	# figure out what page we're working on
-	m/<body.*class="(.+)">/
-		&& ($channel{link} .= "$1/$args{'news-file'}");
-
-	# extract the title of the page
-	m/<title>(.+)<\/title>/ 
-		&& ($channel{title} = sanitize($1));
-	
-	# extract the description from the meta tags
-	m/<meta name="description" content="(.+)"\s?\/>/ 
-		&& ($channel{description} = sanitize($1));
-	
 	# extract the news items
-	if (m/<h3 id="([^"]+)"><a[^>]+>(.+)<\/a><\/h3>/)
-	{
-		my %item;
-		$item{link} = "$channel{link}#$1";
-		$item{title} = sanitize($2);
-		
-		# ugly way of getting $channel{link} without $args{'news-file'} on the end of it
-		my $cwdir = $channel{link};
-		{ local $/ = $args{'news-file'}; chomp $cwdir; }
-	
-		# skip two lines down, to the first <p> tag
-		$item{description} = <HTML>;
-		$item{description} = <HTML>; 
-		$item{description} =~ s/^.*<p>(.+)<\/p>.*$/sanitize($1)/e;
-		$item{description} =~ s/(href|src)="((?!http|ftp|mailto)[^"]+)"/$1="$cwdir$2"/g;
-		chomp $item{description};
+	if (m/<!--#set\s+var="pageTitle"\s+value="([^"]+)"/) {
+		$channel{title} = sanitize($1);
+		# insert the channel information into the RSS object
+		$RSS->channel(%channel);
+	}
+	elsif (m/<dt>(.+)<\/dt>/) {
+		print "dt\n";
+		$item{title} = sanitize($1);
+	}
+	elsif (m/<p>(.+)<\/p>/) {
+		print "one-line p\n";
+		$push_item = 1 if ($item{title});
+		$item{description} = $1;
+	}
+	elsif (m/<p>(.*)/) {
+		print "open p\n";
+		$open_para = 1;
+		$item{description} = "$1\n";
+	}
+	elsif (m/(.*)<\/p>/) {
+		print "close p\n";
+		$push_item = 1 if ($item{title});
+		$open_para = 0;
+		$item{description} .= $1;
+	}
+	else {
+		$item{description} .= $_ if ($open_para);
+	}
+	if ($push_item) {
+		$push_item = 0;
+		$item{description} =~
+		s/(href|src)="((?!http|ftp|mailto)[^"]+)"/$1="$channel{link}$2"/g;
+		$item{description} = sanitize($item{description});
+		if ($item{title} && $item{description}) {
+			# add the newly parsed news item to the RSS object
+			$RSS->add_item(%item);
+			$item{title} = "";
+			$item{description} = "";
+		}
 
-		# add the newly parsed news item to the list of news items
-		push @items, \%item;
 	}
 }
 
-# insert the channel information into the RSS object
-$RSS->channel(%channel);
-
-# insert the news items into the RSS object
-for my $item (@items)
-{
-	$RSS->add_item(%{$item});
-}
-
-# save the RSS to a file and tell the user what's going on.
+# save the RSS to a file
 #print "Saving RSS feed to $args{'rss-file'}...\n";
 $RSS->save($args{'rss-file'});
 

Modified: scripts/trunk/update-website.mk
===================================================================
--- scripts/trunk/update-website.mk	2005-07-13 06:29:14 UTC (rev 445)
+++ scripts/trunk/update-website.mk	2005-07-13 06:50:36 UTC (rev 446)
@@ -59,6 +59,9 @@
 svnlog2-%:
 	$(SVN2HTML2) --project $* --with-branchname > logs/$*.html
 
+%/feed.rss: %/news.html
+	$(SCRIPTS)/lfs2rss.pl -n $< -r $@
+
 ###########################################
 # Update project-specific website sections
 #
@@ -67,24 +70,24 @@
 # TODO: These rules should be run by each project's post-commit script or by
 # cron jobs
 
-update-alfs: svnlog2-ALFS
+update-alfs: svnlog2-ALFS alfs/feed.rss
 # Temporary sync of alfs-srs until a more permanent location/rendering is set up.
 	rsync -av --delete /home/matthew/public_html/alfs-srs $(TARGETDIR)/alfs/view/
 
-update-blfs: svnlog-BLFS
+update-blfs: svnlog-BLFS blfs/feed.rss
 
-update-hints: svnlog-hints fetch-hints hints-tarball hints-symlinks hints-list
+update-hints: svnlog-hints fetch-hints hints-tarball hints-symlinks hints-list hints/feed.rss
 
-update-hlfs: svnlog-HLFS
+update-hlfs: svnlog-HLFS hlfs/feed.rss
 
-update-lfs: svnlog-LFS
+update-lfs: svnlog-LFS lfs/feed.rss
 
-update-livecd: svnlog-livecd
+update-livecd: svnlog-livecd livecd/feed.rss
 
-update-patches: svnlog-patches
+update-patches: svnlog-patches patches/feed.rss
 	svn -q update patches/downloads -r $(REV)
 
-update-www: svnlog-www2 .stamp.error-pages
+update-www: svnlog-www2 .stamp.error-pages ./feed.rss
 	svn -q update . -r $(REV)
 	svn -q update $(SCRIPTS) -r $(REV)
 




More information about the website mailing list