3 # Bacula Systems - Philippe Chauvat
6 # This script is designed to translate Bacula enterprise LaTeX2HTML
7 # documentation files to something more "tunable" / "adaptable" from a CSS
10 # $1 is an HTML file to analyze and translate
11 # The output is automatically send to $1.out
13 # - add some ids, class
14 # - re-order some piece of HTML code
16 # This script is based on HTML::Parser module
20 # -o: HTML output file
21 # -j: javascript directory
23 # -p: images (pictures) directory
25 # -r: Source directory (ako part of -i arg)
29 use HTML::TreeBuilder ;
30 use HTML::PullParser ;
35 print "translatedoc.pl -i | --input html-source-file
36 [ -o | --output html-destination-file ]
37 [ -j | --javascript javascript-diretory ]
38 [ -c | --css css-directory ]
39 [ -p | --pictures pictures-directory ]
40 [ -n | --name manual_name ]
42 [ -r | --source-directory the_original_root_directory ]
47 # Send message to output in case of debug only
48 # ======================
50 my ($what,$msg) = @_ ;
52 print "\n===============================\nBegin of $msg\n" ;
54 print "\n===============================\nEnd of $msg\n\n" ;
59 our($inputfile,$outputfile,$help,$debug,$mytree,$extractmenu,$picturesdir,
60 $cssdir,$javascriptdir,$manualname,$sourcedir) ;
62 # Usage in case of missing arguments
63 usage() unless($#ARGV > -1) ;
65 # Input file / Output file
66 GetOptions("input|i=s" => \$inputfile,
67 "output|o=s" => \$outputfile,
68 "pictures|p=s" => \$picturesdir,
69 "css|c=s" => \$cssdir,
70 "source-directory|r=s" => \$sourcedir,
71 "javascript|j=s" => \$javascriptdir,
72 "name|n=s" => \$manualname,
74 "help|?" => \$help) or usage() ;
76 usage() unless (defined $inputfile) ;
78 die "$inputfile does not exists.\n" unless -e $inputfile ;
80 if (! defined $outputfile ) {
81 $outputfile = "./" . basename($inputfile) . ".out" ;
84 if (! defined $picturesdir ) {
85 $picturesdir = "../images" ;
88 if (! defined $cssdir ) {
92 if (! defined $javascriptdir ) {
93 $javascriptdir = "../js" ;
95 if (! defined $manualname) {
96 $manualname = "main" ;
99 # Build HTML Tree of existing page
100 $mytree = HTML::TreeBuilder->new ;
101 $mytree->parse_file($inputfile) ;
102 debugdump($mytree,"E1: ") ;
104 # Find the beginning of the content
105 # Which is also a point where to put
107 $beginning_of_content = $mytree->look_down('_tag','h1') ;
108 $beginning_of_content = $mytree->look_down('_tag','h2') unless ($beginning_of_content) ;
109 die "The only thing we could test is a <H1> / <H2> tags, which does not exist there...:$!\n" unless($beginning_of_content) ;
111 # Remove every 'dirty' lines
112 # between <body> and <h1> tag
113 # What is "before" the <h1> tag (between <body> and <h1>) is just dropped
114 # my @lefts = $beginning_of_content->left() ;
115 # foreach my $l (@lefts) {
116 # $l->detach_content() ;
117 # $l->delete_content() ;
122 # # Remove Bacula community logo
123 # if ($childlinks = $beginning_of_content->look_down('_tag','img','alt','\\includegraphics{bacula-logo.eps}')) {
124 # $childlinks->detach() ;
125 # $childlinks->delete() ;
127 # # End remove Bacula logo
129 # # Remove 'address' tag
130 # if ($childlinks = $mytree->look_down('_tag','address')) {
131 # $childlinks->detach() ;
132 # $childlinks->delete() ;
134 # # End remove address
136 # End remove dirty lines
138 # Replace textregistered images with the HTML special char
139 my @images = $mytree->look_down('_tag','img') ;
140 foreach $childlinks (@images) {
141 my $alttext = $childlinks->attr('alt') ;
142 # print "Alt: $alttext\n" ;
143 if ($alttext =~ /.*registe.*/) {
144 $childlinks->preinsert(HTML::Element->new_from_lol(['span', {'class' => 'expochar' }, '®'])) ;
145 $childlinks->detach() ;
146 $childlinks->delete() ;
148 if ($alttext =~ /.*bacula.*-logo.*/) {
149 $childlinks->detach() ;
150 $childlinks->delete() ;
153 @images = $mytree->look_down('_tag','img') ;
154 foreach $childlinks (@images) {
155 my $img = $childlinks->attr('src') ;
156 if ($img =~ /^\.\//) {
158 $img = $picturesdir . '/' . $img ;
159 $childlinks->attr('src',$img) ;
160 # print "img: " . $img . "\n" ;
163 if ($childlinks = $mytree->look_down('_tag','title')) {
164 # foreach my $i ($childlinks->content_refs_list) {
166 # print "contenu: " . $$i . "\n" ;
168 $childlinks->postinsert(HTML::Element->new_from_lol(['meta',{ 'http-equiv' => 'content-type', 'content' => 'text/html; charset=iso-8859-1' } ])) ;
170 if ($childlinks = $mytree->look_down('_tag', 'div','class','author_info')) {
171 $childlinks->preinsert(
172 HTML::Element->new_from_lol(
173 [ 'h1', { 'align' => 'center' },
174 [ 'div', { 'align' => 'center' },
175 [ 'big', { 'class' => 'LARGE' }, "The Leading Open Source Backup Solution" ]
181 if (my @regs = $mytree->look_down('_tag', 'span','class','MATH')) {
182 foreach $childlinks (@regs) {
183 $childlinks->preinsert(HTML::Element->new_from_lol(['span', {'class' => 'expochar' }, '®'])) ;
184 $childlinks->detach() ;
185 $childlinks->delete() ;
187 if ($childlinks = $mytree->look_down('_tag', 'div','class','navigation')) {
188 $childlinks->postinsert(
189 HTML::Element->new_from_lol(
190 [ 'div', {'align' => 'center'} ,
191 [ 'img', { 'src' => $picturesdir . '/borg-logo.png', 'id' => 'borg_logo','alt' => 'Bacula Community Logo' }]
198 # Manage css to be located into ../css
199 my @links = $mytree->look_down('_tag','link') ;
200 foreach $childlinks (@links) {
201 my $link = $childlinks->attr('href') ;
202 if ($link =~ /^[a-zA-Z]+\.css/) {
203 $link = $cssdir . '/' . $link ;
204 $childlinks->attr('href',$link) ;
209 my @navs = $mytree->look_down('_tag','div','class','navigation') ;
210 foreach my $nav (@navs) {
211 my @imgs = $nav->look_down('_tag','img') ;
212 foreach $childlinks (@imgs) {
213 # print "Traitement des images de navigation...\n" ;
214 my $img = $childlinks->attr('src') ;
215 if ($img =~ /^next.+/) {
216 $childlinks->attr('class','navigation-next') ;
217 $childlinks->attr('src', $picturesdir . '/' . $img) ;
219 if ($img =~ /^index.+/) {
220 $childlinks->attr('class','navigation-next') ;
221 $childlinks->attr('src', $picturesdir . '/' . $img) ;
223 if ($img =~ /^content.+/) {
224 $childlinks->attr('class','navigation-next') ;
225 $childlinks->attr('src', $picturesdir . '/' . $img) ;
227 if ($img =~ /^prev.+/) {
228 $childlinks->attr('class','navigation-prev') ;
229 $childlinks->attr('src', $picturesdir . '/' . $img) ;
231 if ($img =~ /^up.+/) {
232 $childlinks->attr('class','navigation-up') ;
233 $childlinks->attr('src', $picturesdir . '/' . $img) ;
238 # Locate all <a name="whatever_but_SECTION...">
239 my @atags = $mytree->look_down('_tag','a') ;
241 open AFH, ">> list-of-anchors" or die "Unable to append to list-of-anchors file\n";
242 foreach $childlinks (@atags) {
244 if ($atagname = $childlinks->attr('name')) {
245 print AFH $manualname . "\t" . basename($inputfile) . "\t" . $atagname . "\n" ;
250 # Send the tree to an HTML file
253 open FH, ">" . $outputfile or die "Unable to create $outputfile: $!\n" ;
254 print FH $mytree->as_HTML("<>","\t",{}) ;
258 print $mytree->as_HTML("","\t",{}) ;