#!/usr/bin/perl -w use XML::Writer; use Date::Manip; use XML::Parser; use FileHandle; use IPC::Open2; use POSIX qw(strftime); my $url = "http://azure.humbug.org.au"; my $DIR = $ARGV[0] or die( "Usage blosxom_to_wp_import directory\n\n" ); my $author = $ENV{ 'USER' } or die( "Couldn't get USER from env" ); -d $DIR or die( "Unrecognized directory $DIR\n" ); my @files = `find $DIR -name '*.txt'`; my $writer = get_xml_writer( ); $writer->xmlDecl( "UTF-8" ); $writer->startTag( 'rss', 'version' => '2.0' ); $writer->startTag( 'channel' ); foreach my $file ( @files ) { chomp( $file ); my ( $name, $category ) = get_meta( $file ); my $pubdate = get_pubdate( $file ); my ( $title, $description ) = read_story( $file ); $writer->startTag( 'item' ); $writer->dataElement( 'category', $category ); $writer->dataElement( 'name', $name ); $writer->dataElement( 'title', $title ); $writer->dataElement( 'pubdate', $pubdate ); $writer->dataElement( 'description', $description ); $writer->endTag( 'item' ); } $writer->endTag( 'channel' ); $writer->endTag( 'rss' ); sub get_meta { my ( $file ) = @_; $file =~ s/^\/?$DIR\/?//; my @parts = split( /\//, $file ); my $category = shift( @parts ); $name = join( '_', @parts ); $name =~ s/\.txt$//; return ( $name, $category ); } sub get_pubdate { my ( $file ) = @_; my ( $dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks ) = stat( $file ) or die( "Couldn't stat $file: $!" ); my $date = strftime( "%a, %d %b %Y %H:%M:%S +1000", localtime($mtime) ); return $date; } sub get_xml_writer { return new XML::Writer( NAMESPACES => 1 ); } sub read_story { my ( $file ) = @_; print STDERR "Got $file\n"; open ( STORY, $file ) or die( "Couldn't open $file: $!" ); my $title = ; chomp( $title ); $title or die( "Couldn't get title from $file" ); $title or die( "Couldn't get title" ); my $description = ""; while ( ) { $description .= $_; } close( STORY ); $description or die( "Couldn't read story" ); my $updfn; my $base = $file; $base =~ s/\.txt$//; for (my $updn = 1; -e ($updfn = "$base-$updn.upd"); $updn++) { my $mtime = (stat(_))[9]; my $d = strftime("%Y/%m/%d", localtime($mtime)); $description .= "

UPDATE $d:

\n"; open(UPD, "< $updfn") or die ("Couldn't open update: $updfn\n"); my $t = ; # skip title while() { $description .= $_; } close(UPD); } my $result = ""; my $rest = $description; while ($rest =~ m/^(.*?)(.*)$/s) { my @links = split /\n/, $2; my $change = $1; $rest = $3; for my $l (@links) { next unless $l =~ m/^(#\d+):\s*(\S.*\S)\s*$/; my ($ind, $ref) = ($1, $2); $change =~ s,<$ind>(.*?),$1,sg; } $result .= $change; } $description = $result . $rest; $description =~ s!(!$1$url$3$2$4>!sig; $description =~ s!(!$1$url$3$2$4>!sig; if ($description eq "") { die "accidently got null description?"; } my $pdesc = $description; $description = tidy( $description ); if ($description eq "") { #print STDERR "----\n$pdesc\n----\n"; die "tidied to null description?"; } return ( $title, $description ); } sub tidy { my ( $xml ) = @_; my $pid = open2(*Reader, *Writer, "tidy -q -asxhtml --show-errors 0 --show-body-only auto -wrap 0"); print Writer "$xml"; close(Writer); my $tidy_xml = ""; while() { $tidy_xml .= $_; } close(Reader); my $parser = new XML::Parser( ); $doctype = ''; my $validate_xml = $tidy_xml; $parser->parse( "$doctype$validate_xml" ); return $tidy_xml; }