summaryrefslogtreecommitdiffstats
path: root/notes/number-sections.pl
blob: 3cef3271011132e4e4b99881e458e6a8e49cb38b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#! /usr/bin/perl

use strict;
use warnings;

use HTML::TreeBuilder;

sub find_elements_by_tag_names($@)
{
	my ($elem, @tagnames) = @_;
	if (ref($elem) ne 'HTML::TreeBuilder' && ref($elem) ne 'HTML::Element') {
		die('Invalid argument to find_elements_by_tag_names');
	}

	my @list = $elem->content_list();
	my $tag;
	my @retlist = ();

	foreach (@list) {
		if (ref($_) ne 'HTML::Element') {
			next;
		}
		$tag = $_->tag();
		if (grep($_ eq $tag, @tagnames)) {
			push(@retlist, $_);
		}
	}

	return @retlist;
}

sub number_sections($)
{
	my ($doc) = @_;
#	my $doc = shift;
	if (ref($doc) ne 'HTML::TreeBuilder') {
		die('Invalid argument to number_sections');
	}

	my @body = find_elements_by_tag_names($doc, ('body'));
	my @headers = find_elements_by_tag_names($body[0],
		('h1', 'h2', 'h3', 'h4', 'h5', 'h6'));
	my @secnums = (0, 0, 0, 0, 0, 0);
	my $curlev = -1;
	my $newlev;
	my $secstr;

	foreach (@headers) {
		$newlev = $_->tag();
		$newlev =~ s/^h(\d)$/$1/;
		if ($newlev != $curlev) {
			foreach (@secnums[$newlev .. $#secnums]) {
				$_ = 0;
			}
		}
		++$secnums[$newlev - 1];
		$secstr = join('.', @secnums);
		$secstr =~ s/(\.0)*$//;
		print($secstr, ":\t\"", $_->content_list(), "\"\n");
		$curlev = $newlev;
	}
}

sub main()
{
	my $doc = HTML::TreeBuilder->new();
	while (<>) {
		$doc->parse($_);
	}
	number_sections($doc);
}

main();