summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorP. J. McDermott <pjm@nac.net>2012-06-05 21:12:29 (EDT)
committer P. J. McDermott <pjm@nac.net>2012-06-05 21:12:29 (EDT)
commit31b078cf8ff0fe7156332fa6cef033a351f805f4 (patch)
treeebf2860189cab6aa2ce908f62ebbbca9b22aadfb
parenta5d1ce13172e8d91d24ede48f28cc269e3d1aa23 (diff)
Add test script that numbers section headers.
-rwxr-xr-xnotes/number-sections.pl73
1 files changed, 73 insertions, 0 deletions
diff --git a/notes/number-sections.pl b/notes/number-sections.pl
new file mode 100755
index 0000000..3cef327
--- /dev/null
+++ b/notes/number-sections.pl
@@ -0,0 +1,73 @@
+#! /usr/bin/perl
+
+use strict;
+use warnings;
+
+use HTML::TreeBuilder;
+
+sub find_elements_by_tag_names($@)
+{
+ my ($elem, @tagnames) = @_;
+ if (ref($elem) ne 'HTML::TreeBuilder' && ref($elem) ne 'HTML::Element') {
+ die('Invalid argument to find_elements_by_tag_names');
+ }
+
+ my @list = $elem->content_list();
+ my $tag;
+ my @retlist = ();
+
+ foreach (@list) {
+ if (ref($_) ne 'HTML::Element') {
+ next;
+ }
+ $tag = $_->tag();
+ if (grep($_ eq $tag, @tagnames)) {
+ push(@retlist, $_);
+ }
+ }
+
+ return @retlist;
+}
+
+sub number_sections($)
+{
+ my ($doc) = @_;
+# my $doc = shift;
+ if (ref($doc) ne 'HTML::TreeBuilder') {
+ die('Invalid argument to number_sections');
+ }
+
+ my @body = find_elements_by_tag_names($doc, ('body'));
+ my @headers = find_elements_by_tag_names($body[0],
+ ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'));
+ my @secnums = (0, 0, 0, 0, 0, 0);
+ my $curlev = -1;
+ my $newlev;
+ my $secstr;
+
+ foreach (@headers) {
+ $newlev = $_->tag();
+ $newlev =~ s/^h(\d)$/$1/;
+ if ($newlev != $curlev) {
+ foreach (@secnums[$newlev .. $#secnums]) {
+ $_ = 0;
+ }
+ }
+ ++$secnums[$newlev - 1];
+ $secstr = join('.', @secnums);
+ $secstr =~ s/(\.0)*$//;
+ print($secstr, ":\t\"", $_->content_list(), "\"\n");
+ $curlev = $newlev;
+ }
+}
+
+sub main()
+{
+ my $doc = HTML::TreeBuilder->new();
+ while (<>) {
+ $doc->parse($_);
+ }
+ number_sections($doc);
+}
+
+main();