diff options
author | P. J. McDermott <pjm@nac.net> | 2012-06-05 21:12:29 (EDT) |
---|---|---|
committer | P. J. McDermott <pjm@nac.net> | 2012-06-05 21:12:29 (EDT) |
commit | 31b078cf8ff0fe7156332fa6cef033a351f805f4 (patch) | |
tree | ebf2860189cab6aa2ce908f62ebbbca9b22aadfb | |
parent | a5d1ce13172e8d91d24ede48f28cc269e3d1aa23 (diff) |
Add test script that numbers section headers.
-rwxr-xr-x | notes/number-sections.pl | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/notes/number-sections.pl b/notes/number-sections.pl new file mode 100755 index 0000000..3cef327 --- /dev/null +++ b/notes/number-sections.pl @@ -0,0 +1,73 @@ +#! /usr/bin/perl + +use strict; +use warnings; + +use HTML::TreeBuilder; + +sub find_elements_by_tag_names($@) +{ + my ($elem, @tagnames) = @_; + if (ref($elem) ne 'HTML::TreeBuilder' && ref($elem) ne 'HTML::Element') { + die('Invalid argument to find_elements_by_tag_names'); + } + + my @list = $elem->content_list(); + my $tag; + my @retlist = (); + + foreach (@list) { + if (ref($_) ne 'HTML::Element') { + next; + } + $tag = $_->tag(); + if (grep($_ eq $tag, @tagnames)) { + push(@retlist, $_); + } + } + + return @retlist; +} + +sub number_sections($) +{ + my ($doc) = @_; +# my $doc = shift; + if (ref($doc) ne 'HTML::TreeBuilder') { + die('Invalid argument to number_sections'); + } + + my @body = find_elements_by_tag_names($doc, ('body')); + my @headers = find_elements_by_tag_names($body[0], + ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')); + my @secnums = (0, 0, 0, 0, 0, 0); + my $curlev = -1; + my $newlev; + my $secstr; + + foreach (@headers) { + $newlev = $_->tag(); + $newlev =~ s/^h(\d)$/$1/; + if ($newlev != $curlev) { + foreach (@secnums[$newlev .. $#secnums]) { + $_ = 0; + } + } + ++$secnums[$newlev - 1]; + $secstr = join('.', @secnums); + $secstr =~ s/(\.0)*$//; + print($secstr, ":\t\"", $_->content_list(), "\"\n"); + $curlev = $newlev; + } +} + +sub main() +{ + my $doc = HTML::TreeBuilder->new(); + while (<>) { + $doc->parse($_); + } + number_sections($doc); +} + +main(); |