From 31b078cf8ff0fe7156332fa6cef033a351f805f4 Mon Sep 17 00:00:00 2001 From: P. J. McDermott Date: Tue, 05 Jun 2012 21:12:29 -0400 Subject: Add test script that numbers section headers. --- (limited to 'notes/number-sections.pl') diff --git a/notes/number-sections.pl b/notes/number-sections.pl new file mode 100755 index 0000000..3cef327 --- /dev/null +++ b/notes/number-sections.pl @@ -0,0 +1,73 @@ +#! /usr/bin/perl + +use strict; +use warnings; + +use HTML::TreeBuilder; + +sub find_elements_by_tag_names($@) +{ + my ($elem, @tagnames) = @_; + if (ref($elem) ne 'HTML::TreeBuilder' && ref($elem) ne 'HTML::Element') { + die('Invalid argument to find_elements_by_tag_names'); + } + + my @list = $elem->content_list(); + my $tag; + my @retlist = (); + + foreach (@list) { + if (ref($_) ne 'HTML::Element') { + next; + } + $tag = $_->tag(); + if (grep($_ eq $tag, @tagnames)) { + push(@retlist, $_); + } + } + + return @retlist; +} + +sub number_sections($) +{ + my ($doc) = @_; +# my $doc = shift; + if (ref($doc) ne 'HTML::TreeBuilder') { + die('Invalid argument to number_sections'); + } + + my @body = find_elements_by_tag_names($doc, ('body')); + my @headers = find_elements_by_tag_names($body[0], + ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')); + my @secnums = (0, 0, 0, 0, 0, 0); + my $curlev = -1; + my $newlev; + my $secstr; + + foreach (@headers) { + $newlev = $_->tag(); + $newlev =~ s/^h(\d)$/$1/; + if ($newlev != $curlev) { + foreach (@secnums[$newlev .. $#secnums]) { + $_ = 0; + } + } + ++$secnums[$newlev - 1]; + $secstr = join('.', @secnums); + $secstr =~ s/(\.0)*$//; + print($secstr, ":\t\"", $_->content_list(), "\"\n"); + $curlev = $newlev; + } +} + +sub main() +{ + my $doc = HTML::TreeBuilder->new(); + while (<>) { + $doc->parse($_); + } + number_sections($doc); +} + +main(); -- cgit v0.9.1