cvs commit: www manage_news.pl

lizardo at linuxfromscratch.org lizardo at linuxfromscratch.org
Fri Sep 5 16:44:16 PDT 2003


lizardo     03/09/05 17:44:16

  Modified:    .        manage_news.pl
  Log:
  manage_news.pl: Improved code efficiency
  
  Revision  Changes    Path
  1.5       +50 -86    www/manage_news.pl
  
  Index: manage_news.pl
  ===================================================================
  RCS file: /home/cvsroot/www/manage_news.pl,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- manage_news.pl	5 Sep 2003 19:50:03 -0000	1.4
  +++ manage_news.pl	5 Sep 2003 23:44:16 -0000	1.5
  @@ -17,6 +17,8 @@
   # along with this program; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
   
  +# $Id: manage_news.pl,v 1.5 2003/09/05 23:44:16 lizardo Exp $
  +
   use warnings;
   use strict;
   
  @@ -28,11 +30,8 @@
   use URI;
   use Pod::Usage;
   
  -my $my_version = "0.3";
  -
   my $help = 0;
   my $man = 0;
  -my $version = 0;
   my $infile = "";
   my $archive_under = "";
   my $top = "";
  @@ -41,7 +40,6 @@
   GetOptions(
       "help" => \$help,
       "man" => \$man,
  -    "version" => \$version,
       "infile|i=s" => \$infile,
       "archive-under|a=s" => \$archive_under,
       "top|t=s" => \$top,
  @@ -51,12 +49,6 @@
   pod2usage(1) if $help;
   pod2usage(-exitstatus => 0, -verbose => 2) if $man;
   
  -if ($version) {
  -    print "manage_news.pl $my_version\n" .
  -    "Copyright (C) 2003 Anderson Lizardo\n";
  -    exit 0;
  -}
  -
   sub Get_header {
       my ($header, $name) = @_;
       # Extract the field from the header
  @@ -72,6 +64,35 @@
       return $hdr_content;
   }
   
  +# Translate the MIME data to HTML
  +sub mime2html {
  +    my ($sect, $mime_part) = @_;
  +
  +    my ($year, $month, undef) = split /\//, Get_header($mime_part->head, "date");
  +    my $archive_url;
  +    if ($sect =~ /^general$/) {
  +        $archive_url = "http://linuxfromscratch.org/news/".$year."/".$month.".html";
  +    }
  +    else {
  +        $archive_url = "http://linuxfromscratch.org/news/".$sect."/".$year."/".$month.".html";
  +    }
  +    my $news_id;
  +    if (defined($mime_part->head->get("id"))) {
  +        $news_id = lc(Get_header($mime_part->head, "id"));
  +    }
  +    else {
  +        # News item ID, created from the news title
  +        $news_id = lc(Get_header($mime_part->head, "title"));
  +        $news_id =~ s/\W+//g;
  +    }
  +    # Return the XHTML code
  +    return "\t<h3 id=\"$news_id\"><a href=\"$archive_url#$news_id\">" .
  +    Get_header($mime_part->head, "title") . "</a></h3>\n" .
  +    "\t\t<h4>" . Get_header($mime_part->head, "author") . " - " .
  +    Get_header($mime_part->head, "date") . "</h4>\n" .
  +    $mime_part->bodyhandle->as_string . "\n";
  +}
  +
   my $parser = new MIME::Parser;
   # These options disable the use of temporary files
   $parser->output_to_core(1);
  @@ -90,6 +111,8 @@
   }
   
   my $base_url = "";
  +my $section = lc(Get_header($entity->head, "section"));
  +die "Invalid section name: " . $section . "\n" unless $section =~ /^[\w-]+$/;
   
   if ($archive_under) {
       my %archive_files = ();
  @@ -98,13 +121,12 @@
           # It also avoids possible cross-site scripting
           if (Get_header($part->head, "date") !~ /^\d{4}\/\d{2}\/\d{2}$/) {
               die "Invalid date: " . Get_header($part->head, "date") . "\n" .
  -            "News item header:\n" . $part->head->as_string . "\n";
  +            "News header:\n---\n" . $part->head->as_string . "\n---\n";
           }
           # Split year and month, and store the destination archive location
           # ($archive_under/YYYY/MM.html)
           my ($year, $month, undef) = split /\//, Get_header($part->head, "date");
  -        my $archive_file = $archive_under . "/" . $year . "/" . $month . ".html";
  -        $archive_files{$archive_file} = 1;
  +        $archive_files{$archive_under . "/" . $year . "/" . $month . ".html"} = 1;
       }
       foreach my $archive_file (keys %archive_files) {
           # Create the destination archive location
  @@ -120,43 +142,23 @@
           }
       }
       foreach my $part ($entity->parts) {
  -        my ($year, $month, undef) = split /\//, Get_header($part->head, "date");
  -        my $section = lc(Get_header($entity->head, "section"));
  -        my $archive_file = $archive_under."/".$year."/".$month.".html";
  -        my $archive_url;
           if ($section =~ /^general$/) {
  -            $archive_url = "http://linuxfromscratch.org/news/".$year."/".$month.".html";
               $base_url = "http://linuxfromscratch.org/news/";
           }
           else {
  -            $archive_url = "http://linuxfromscratch.org/news/".$section."/".$year."/".$month.".html";
               $base_url = "http://linuxfromscratch.org/".$section."/";
  -            $archive_url =~ s/\.\.\///g;
  -            $base_url =~ s/\.\.\///g;
           }
  -        open(NEWS, ">>$archive_file") or die "Could not open $archive_file" . ": $!\n";
  -        my $news_id;
  -        if (defined($part->head->get("id"))) {
  -            $news_id = lc(Get_header($part->head, "id"));
  -        }
  -        else {
  -            # News item ID, created from the news title
  -            $news_id = lc(Get_header($part->head, "title"));
  -            $news_id =~ s/\W+//g;
  -        }
  -        # Output the actual XHTML code
  -        print NEWS "\t<h3 id=\"$news_id\"><a href=\"$archive_url#$news_id\">" .
  -        Get_header($part->head, "title") . "</a></h3>\n" .
  -        "\t\t<h4>" . Get_header($part->head, "author") . " - " .
  -        Get_header($part->head, "date") . "</h4>\n";
  -
           # Use the HTML::Parser and URI modules to resolve relative links
           my $html_p = HTML::Parser->new(api_version => 3,
               start_h => [\&handle_StartTag, "tagname, attr"],
               end_h => [\&handle_EndTag, "tagname"],
               text_h => [\&handle_Text, "dtext" ],
           );
  -        $html_p->parse($part->bodyhandle->as_string);
  +        my ($year, $month, undef) = split /\//, Get_header($part->head, "date");
  +        my $archive_file = $archive_under."/".$year."/".$month.".html";
  +        open(NEWS, ">>$archive_file") or die "Could not open $archive_file" . ": $!\n";
  +        $html_p->parse(mime2html($section, $part));
  +        $html_p->eof;
           print NEWS $buffer . "\n\n";
           $buffer = "";
           close NEWS;
  @@ -173,50 +175,15 @@
       }
   }
   else {
  +    print '<p><a href="#header">Back to the top.</a></p>' . "\n" .
  +    '<h2 id="generalnews">General news</h2>' . "\n" if $section eq "general";
       my $count = 0;
  -    if (Get_header($entity->head, "section") =~ /^general$/i) {
  -        print <<EOF;
  -<p><a href="#header">Back to the top.</a></p>
  -<h2 id="generalnews">General news</h2>
  -EOF
  -    }
       foreach my $part ($entity->parts) {
           last if $count++ == 5;
  -        my ($year, $month, undef) = split /\//, Get_header($part->head, "date");
  -        my $section = lc(Get_header($entity->head, "section"));
  -        my $archive_url;
  -        if ($section =~ /^general$/) {
  -            $archive_url = "http://linuxfromscratch.org/news/".$year."/".$month.".html";
  -            $base_url = "http://linuxfromscratch.org/news/";
  -        }
  -        else {
  -            $archive_url = "http://linuxfromscratch.org/news/".$section."/".$year."/".$month.".html";
  -            $base_url = "http://linuxfromscratch.org/".$section."/";
  -            $archive_url =~ s/\.\.\///g;
  -            $base_url =~ s/\.\.\///g;
  -        }
  -        my $news_id;
  -        if (defined($part->head->get("id"))) {
  -            $news_id = lc(Get_header($part->head, "id"));
  -        }
  -        else {
  -            # News item ID, created from the news title
  -            $news_id = lc(Get_header($part->head, "title"));
  -            $news_id =~ s/\W+//g;
  -        }
  -        # Output the actual XHTML code
  -        print "\t<h3 id=\"$news_id\"><a href=\"$archive_url#$news_id\">" .
  -        Get_header($part->head, "title") . "</a></h3>\n" .
  -        "\t\t<h4>" . Get_header($part->head, "author") . " - " .
  -        Get_header($part->head, "date") . "</h4>\n" .
  -        $part->bodyhandle->as_string . "\n";
  -    }
  -    if (Get_header($entity->head, "section") =~ /^general$/i) {
  -        print <<EOF;
  -<p><a href="#header">Back to the top.</a></p>
  -<h2 id="changelog">Latest CVS changes:</h2>
  -EOF
  +        print mime2html($section, $part);
       }
  +    print '<p><a href="#header">Back to the top.</a></p>' . "\n" .
  +    '<h2 id="changelog">Latest CVS changes:</h2>' . "\n" if $section eq "general";
   }
   
   print STDERR "Errors: " . $parser->results->errors . "\n";
  @@ -232,8 +199,10 @@
       my ($tag, $attrs) = @_;
   
       if ($tag eq "a" and $$attrs{"href"}) {
  -        my $uri = URI->new($$attrs{"href"})->abs($base_url);
  -        $$attrs{"href"} = $uri->as_string;
  +        $$attrs{"href"} = URI->new($$attrs{"href"})->abs($base_url)->as_string;
  +    }
  +    elsif ($tag eq "img" and $$attrs{"src"}) {
  +        $$attrs{"src"} = URI->new($$attrs{"src"})->abs($base_url)->as_string;
       }
       $buffer .= "<$tag";
       $buffer .= " $_=\"$$attrs{$_}\"" foreach (keys %$attrs);
  @@ -268,7 +237,7 @@
   
   =head1 SYNOPSIS
   
  -manage_news.pl  [--help|--man|--version]  [-t top.html]  [-b bottom.html]  [-a output_dir] [-i html_file]
  +manage_news.pl  [--help|--man]  [-t top.html]  [-b bottom.html]  [-a output_dir] [-i html_file]
   
       Options:
           --infile|-i         Parse MIME news database from given file
  @@ -277,7 +246,6 @@
           --archive-under|-a  Output news under output_dir/YYYY/MM.html
           --help              Show brief help message
           --man               Full documentation
  -        --version           Show program version
   
   =head1 DESCRIPTION
   
  @@ -312,10 +280,6 @@
   =item B<--man>
   
   Print the manual page and exits.
  -
  -=item B<--version>
  -
  -Show program version and exits.
   
   =back
   
  
  
  



More information about the website mailing list