1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
# Copyright (C) 2012 Patrick "P. J." McDermott
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
use strict;
use warnings;
use MarkdownBook::Document;
package MarkdownBook::Document::HTML;
our @ISA = qw(MarkdownBook::Document);
sub parse_html
{
my ($self) = @_;
my $file = $self->{'book'}->dir() . '/' . $self->{'file'} . '.html.in';
$self->{'tree'} = HTML::TreeBuilder->new();
$self->{'tree'}->parse_file($file);
}
sub number_sections
{
my ($self) = @_;
my @headers;
my $header;
my @secnums = (0, 0);
my $curlev = -1;
my $newlev;
my $secstr;
my @children;
my $sectitle;
my $secid;
@{$self->{'tree_body'}} = MarkdownBook::HTMLTree::find_elements_by_tag_names(
$self->{'tree'}, ('body'));
@headers = MarkdownBook::HTMLTree::find_elements_by_tag_names(
@{$self->{'tree_body'}}[0], ('h1', 'h2'));
foreach $header (@headers) {
# Calculate section number.
$newlev = $header->tag();
$newlev =~ s/^h(\d)$/$1/;
if ($newlev != $curlev) {
foreach (@secnums[$newlev .. $#secnums]) {
$_ = 0;
}
}
$curlev = $newlev;
++$secnums[$newlev - 1];
$secstr = join('.', @secnums);
$secstr =~ s/(\.0)*$//;
MarkdownBook::HTMLTree::each_text(sub {
# Prefix section title with section number.
my $sectitle = $_[1];
$sectitle =~ s/^([^\[]+)[ ]*\[[^\]]+\]$/$_[2].$secstr $1/;
# Set "id" attribute.
if ($_[1] =~ m/^[^\[\]]*\[([^\]]+)\][^\[\]]*$/) {
my $secid = $_[1];
$secid =~ s/^[^\[\]]*\[([^\]]+)\][^\[\]]*$/$1/;
${$_[0]}->attr('id', $secid);
}
# Set section title.
$_[1] = $sectitle;
}, $header, $self->{'id'});
}
}
sub output
{
my ($self) = @_;
my $elem;
my %opt_end_tags;
my $out;
# Don't omit any end tags.
%opt_end_tags = map([$_ => 0], %HTML::Element::optionalEndTag);
foreach $elem (@{$self->{'tree_body'}}[0]->content_list()) {
# It's safe to assume (ref($elem) eq 'HTML::Element').
$out .= $elem->as_HTML('<>&', '', \%opt_end_tags) . "\n";
}
return $out;
}
1;
|