3 # Bacula Systems - Philippe Chauvat
6 # This script is designed to translate Bacula enterprise LaTeX2HTML
7 # documentation files to something more "tunable" / "adaptable" from a CSS
10 # $1 is an HTML file to analyze and translate
11 # The output is automatically send to $1.out
13 # - add some ids, class
14 # - re-order some piece of HTML code
16 # This script is based on HTML::Parser module
20 # -o: HTML output file
21 # -j: javascript directory
23 # -p: images (pictures) directory
25 # -e: Request a menu extraction
26 # -r: Source directory (ako part of -i arg)
30 use HTML::TreeBuilder ;
31 use HTML::PullParser ;
36 print "translatedoc.pl -i | --input html-source-file
37 [ -o | --output html-destination-file ]
38 [ -j | --javascript javascript-diretory ]
39 [ -c | --css css-directory ]
40 [ -p | --pictures pictures-directory ]
41 [ -n | --name manual_name ]
44 [ -r | --source-directory the_original_root_directory ]
49 # Send message to output in case of debug only
50 # ======================
52 my ($what,$msg) = @_ ;
54 print "\n===============================\nBegin of $msg\n" ;
56 print "\n===============================\nEnd of $msg\n\n" ;
60 # Build a references list
61 # Needed to link text and tables, images and so on (see figure xxx)
63 # IN: root dir and reference filename
64 # OUT: a reference file (ako hash)
65 # =======================
66 sub build_references {
68 my $referencefile = $_[1] ;
69 print "Root dir: $root\n" ;
70 print "References file: $referencefile" ;
74 for my $i (`find $root -iname "[a-zA-Z0-9]*.aux"`) {
75 print "Building references for $i\n" ;
76 open FH, "< $i" or die "Unable to open file $i\n" ;
80 if ($l =~ /newlabel/) {
81 ### \newlabel{figbs6:fdstorageaddress}{{2.1}{15}{Backup Over WAN\relax \relax }{figure.caption.15}{}}
82 my @elts = split('{|}',$l) ;
84 if ($elts[1] ne "" and $elts[4] ne "") {
85 print "Clef: $elts[1]\n" ;
89 $references{$elts[1]}{anchor} = $elts[4] ;
90 $references{$elts[1]}{file} = "" ;
91 $references{$elts[4]}{latexref} = $elts[1] ;
97 for my $i (`find $root -iname "[a-zA-Z0-9]*.html"`) {
98 print "Building anchors for $i\n" ;
99 open FH, "< $i" or die "Unable to open file $i\n" ;
102 foreach $l (@content) {
103 # print "ligne: $l\n" ;
104 if ($l =~ m/<A [^>]*NAME *= *"([^>]*)"/) {
106 print "L matche: $l / $1\n" ;
107 if (exists $references{$1}{latexref}) {
108 print "Bingo: $i --- " . $references{$1}{latexref} . "\n" ;
109 $references{$references{$1}{latexref}}{file} = $i ;
114 print "Writing references\n" ;
115 open FH,"> $referencefile" or die "Unable to create file $referencefile\n" ;
116 foreach $k (keys %references) {
117 if ($references{$k}{file} ne "") {
119 print FH $k . " " . $references{$k}{file} . " " . $references{$k}{anchor} . "\n" ;
126 # References reading method
127 # To be able to handle references accross HTML files
128 # =========================
129 sub read_references {
130 my $referencefile = $_[0] ;
133 open FH, "< $referencefile" or die "Unable to open $referencefile\n" ;
135 our($k,$f,$v) = split / /,$_ ;
136 $refences{$k}{file} = $f ;
137 $refences{$k}{anchor} = $v ;
144 our($inputfile,$outputfile,$help,$debug,$mytree,$extractmenu,$picturesdir,
145 $cssdir,$javascriptdir,$manualname,$sourcedir) ;
147 # Usage in case of missing arguments
148 usage() unless($#ARGV > -1) ;
150 # Input file / Output file
151 GetOptions("input|i=s" => \$inputfile,
152 "output|o=s" => \$outputfile,
153 "extract|e" => \$extractmenu,
154 "pictures|p=s" => \$picturesdir,
155 "css|c=s" => \$cssdir,
156 "source-directory|r=s" => \$sourcedir,
157 "javascript|j=s" => \$javascriptdir,
158 "name|n=s" => \$manualname,
159 "debug|d" => \$debug,
160 "help|?" => \$help) or usage() ;
162 usage() unless (defined $inputfile) ;
164 die "$inputfile does not exists.\n" unless -e $inputfile ;
166 if (! defined $outputfile ) {
167 $outputfile = "./" . basename($inputfile) . ".out" ;
170 if (! defined $picturesdir ) {
171 $picturesdir = "../images" ;
174 if (! defined $cssdir ) {
178 if (! defined $javascriptdir ) {
179 $javascriptdir = "../js" ;
181 if (! defined $manualname) {
182 $manualname = "main" ;
184 my $MENUFILE="./wholemenu_" . $manualname . ".html" ;
185 # my $REFERENCEFILE="./references_to_build.txt" ;
187 # if (defined $sourcedir) {
188 # %references = build_references($sourcedir,$REFERENCEFILE) ;
191 # %references = read_references($REFERENCEFILE) ;
194 # Build HTML Tree of existing page
195 $mytree = HTML::TreeBuilder->new ;
196 $mytree->parse_file($inputfile) ;
198 # Find the beginning of the content
199 # Which is also a point where to put
201 $beginning_of_content = $mytree->look_down('_tag','h1') ;
202 $beginning_of_content = $mytree->look_down('_tag','h2') unless ($beginning_of_content) ;
203 die "The only thing we could test is a <H1> / <H2> tags, which does not exist there...:$!\n" unless($beginning_of_content) ;
206 # Look for the table of contents
207 # we must translate it at a position before the content itself
209 if ($thecopy = $mytree->look_down('_tag', 'ul', 'class','ChildLinks')) {
210 $childlinks = $thecopy->detach() ;
211 debugdump($thecopy,"Navigation system copy 1") ;
213 # Clean up the content of table of contents
214 while ($d = $thecopy->look_down('_tag','br')) {
218 debugdump($thecopy,"Navigation system copy 2") ;
220 if ($childlinks = $mytree->look_down('_tag','a','name','CHILD_LINKS')) {
221 $childlinks->detach() ;
222 $childlinks->delete() ;
223 debugdump($thecopy,"Navigation system copy 3") ;
226 # Remove old navigation part.... (next, up, previous, and so on)
227 if ($childlinks = $mytree->look_down('_tag', 'div', 'class', 'navigation')) {
228 $childlinks->detach_content() ;
229 $childlinks->detach() ;
230 $childlinks->delete_content() ;
231 $childlinks->delete ;
232 debugdump($thecopy,"Navigation system copy 4") ;
234 # End removing navigation
237 # Remove every 'dirty' lines
238 # between <body> and <h1> tag
239 # What is "before" the <h1> tag (between <body> and <h1>) is just dropped
240 my @lefts = $beginning_of_content->left() ;
241 foreach my $l (@lefts) {
242 $l->detach_content() ;
243 $l->delete_content() ;
247 debugdump($thecopy,"Navigation system copy 5") ;
249 # Remove Bacula community logo
250 if ($childlinks = $beginning_of_content->look_down('_tag','img','alt','\\includegraphics{bacula-logo.eps}')) {
251 $childlinks->detach() ;
252 $childlinks->delete() ;
254 # End remove Bacula logo
256 # Remove 'address' tag
257 if ($childlinks = $mytree->look_down('_tag','address')) {
258 $childlinks->detach() ;
259 $childlinks->delete() ;
263 my $thebody = $mytree->look_down('_tag','body') ;
264 $thebody->attr('onload','menuonload(this);') ;
265 debugdump($thebody,"The body BEFORE") ;
266 my @content = $thebody->detach_content() ;
267 # End remove dirty lines
269 # What do we do with the menu?
270 # If the menu file exists then just use it
273 # Build the menu file
274 $thecopy = HTML::TreeBuilder->new ;
275 $thecopy->parse_file($MENUFILE) ;
277 debugdump($thecopy,"Navigation system copy 6") ;
281 # Create a div to manage the whole page
282 my $mainpage = HTML::Element->new_from_lol(
283 ['div', { 'class' => "bsys_mainpageclass", 'id' => "bsys_mainpageid" },
284 [ 'div', {'class' => 'bsys_topclass', 'id' => 'bsys_topid'},
285 [ 'img', { 'src' => $picturesdir . '/bsys-logo.png', 'id' => 'bsys_logo','alt' => 'Bacula Systems Logo' }],
286 [ 'img', { 'src' => $picturesdir . '/bsys-doctitle.png', 'id' => 'bsys_doctitle', 'alt' => 'Bacula Enterprise Documentation text image'}]
288 [ 'div', {'id' => 'bsys_breadnsearchid', 'class' => 'bsys_breadnsearchclass'},
289 ['div', { 'class' => 'bsys_searchclass', 'id' => 'bsys_searchid'},
290 ['span','Search' , {'class' => 'bsys_searchtitleclass','id' => 'bsys_searchtitleid'}],
291 [ 'input', { 'class' => 'bsys_searchfieldclass', 'id' => 'bsys_searchfieldid', 'type' => 'text', 'value' => 'Type your text here' }]
293 [ 'div', { 'class' => 'bsys_breadcrumbsclass', 'id' => 'bsys_breadcrumbsid'},
294 [ 'p', { 'class' => 'bsys_breadcrumbscontentclass', 'id' => 'bsys_breadcrumbscontentid' }, 'Main' ],
297 [ 'div', { 'class' => "bsys_pageclass", 'id' => "bsys_pageid"},
298 [ 'div', { 'class' => "bsys_leftnavigationclass", 'id' => "bsys_leftnavigationid" },
301 [ 'div', { 'class ' => 'bsys_contentclass', 'id' => 'bsys_contentid' },
302 # foreach (@content) {
303 # ['div', {'class' => 'bsys_truecontent'}, $_ ]
305 [ map (('div', {'class' => 'bsys_truecontent' }), $_ ), @content ]
310 debugdump($mainpage,"Main page build") ;
311 $beginning_of_content = $thebody->push_content($mainpage) ;
312 # Remove "Contents" links
313 if ($childlinks = $mytree->look_down('_tag','a','href','Contents.html')) {
314 $childlinks = $childlinks->parent() ;
315 $childlinks->delete() ;
317 debugdump($thecopy,"Navigation system copy 7") ;
319 # Now begins the modification for navigation
320 # ==========================================
321 # We must analyze what is below <ul class="Child_Links">
322 # At first level, we consider each <li> as part of the main menu
323 # At other levels, we consider each <li> as sub(sub | ...)menus
324 $childlinks = $mytree->look_down('_tag','ul','class','ChildLinks') ;
325 $childlinks->attr('id','childlinksid') ;
327 # This counter is for generating unique identifiers
329 my $ullevelcounter = 0 ;
330 my $ulpreviousdepth = 0 ;
332 # Browse all the <ul name="ChildLinks"> node
333 # ------------------------------------------
334 foreach my $d ($childlinks->descendants()) {
336 # Which tag are we checking ?
337 my $tag = $d->tag() ;
339 # Nothing to do with <a> tags
341 $d->attr('onclick',"menuonclick(this);") ;
344 # <ul>s represent "openable" menus
345 elsif ($tag =~ /ul/) {
346 if ($d->depth() > $ulpreviousdepth) {
352 $ulpreviousdepth=$d->depth() ;
354 # We need to identify uniquely this <ul> start tag to be able to "open" or "close" it
355 my $ullevel= 'level' . $ullevelcounter ;
356 my $idf = 'bsys_ul_' . $ulcounter++ ;
357 $d->attr('class',$ullevel . ' expandingMenu expandingMenuNotSelected') ;
358 $d->attr('id', $idf) ;
359 # $d->attr('style','display: none;') ;
361 # We now are knowing the previous <li> tag is a (sub)menu header too
362 # Adding the "onclick" behavior
363 my $previoustagli = $d->look_up('_tag','li') ; # <li> just above
364 $previoustagli->attr('pct_onmouseover',"over_expandingMenuHeader(this,\'" . $idf . "\')") ;
365 $previoustagli->attr('pct_onmouseout',"out_expandingMenuHeader(this,\'" . $idf . "\')") ;
366 my $previoustaga = $d->left('_tag','a') ; # <a> just above
367 $previoustaga->attr('onclick',"menuonclick(this);") ;
369 # Do not forgot what we defined earlier...
370 my $class = $previoustagli->attr('class') ;
371 $class = $class . ' expandingMenuHeader' ;
372 $previoustagli->attr('class', $class) ;
375 # <li>s represent at least menu items
376 # and sometimes menu headers (see <ul> treatment)
377 elsif ($tag =~ /li/) {
379 # At this stage we only know <li> is a menu item.
381 $d->attr('class', 'expandingMenuItem') ;
387 if ($manualname eq "main") {
388 if ($childlinks = $mytree->look_down('_tag','meta','name','description')) {
389 $childlinks->attr('content','Bacula Systems Enterprise Main Reference Manual') ;
391 if ($childlinks = $mytree->look_down('_tag','meta','name','keywords')) {
392 $childlinks->attr('content','Bacula Systems, Bacula Enterprise 6, Main Reference Manual') ;
394 if ($childlinks = $mytree->look_down('_tag','link','href','main.css')) {
395 $childlinks->attr('href',$cssdir . '/main.css') ;
396 $childlinks->postinsert(
397 HTML::Element->new_from_lol(
398 ['link',{ 'href' => $cssdir . '/bsys.css', 'rel' => 'stylesheet' } ],
399 ['script',{ 'type' => 'text/javascript', 'src' => $javascriptdir . '/bsys.js' } ]
404 elsif ($manualname eq "developers") {
405 if ($childlinks = $mytree->look_down('_tag','meta','name','description')) {
406 $childlinks->attr('content','Bacula Systems Enterprise Developer\'s Guide') ;
408 if ($childlinks = $mytree->look_down('_tag','meta','name','keywords')) {
409 $childlinks->attr('content','Bacula Systems, Bacula Enterprise 6, Developer\'s Guide') ;
411 if ($childlinks = $mytree->look_down('_tag','link','href','developers.css')) {
412 $childlinks->attr('href',$cssdir . '/developers.css') ;
413 $childlinks->postinsert(
414 HTML::Element->new_from_lol(
415 ['link',{ 'href' => $cssdir . '/bsys.css', 'rel' => 'stylesheet' } ],
416 ['script',{ 'type' => 'text/javascript', 'src' => $javascriptdir . '/bsys.js' } ]
421 elsif ($manualname eq "console") {
422 if ($childlinks = $mytree->look_down('_tag','meta','name','description')) {
423 $childlinks->attr('content','Bacula Enterprise Command Console and Operators Guide') ;
425 if ($childlinks = $mytree->look_down('_tag','meta','name','keywords')) {
426 $childlinks->attr('content','Bacula Systems, Bacula Enterprise 6, Command Console and Operators Guide') ;
428 if ($childlinks = $mytree->look_down('_tag','link','href','console.css')) {
429 $childlinks->attr('href',$cssdir . '/console.css') ;
430 $childlinks->postinsert(
431 HTML::Element->new_from_lol(
432 ['link',{ 'href' => $cssdir . '/bsys.css', 'rel' => 'stylesheet' } ],
433 ['script',{ 'type' => 'text/javascript', 'src' => $javascriptdir . '/bsys.js' } ]
438 elsif ($manualname eq "utility") {
439 if ($childlinks = $mytree->look_down('_tag','meta','name','description')) {
440 $childlinks->attr('content','Bacula Enterprise Utility Programs') ;
442 if ($childlinks = $mytree->look_down('_tag','meta','name','keywords')) {
443 $childlinks->attr('content','Bacula Systems, Bacula Enterprise 6, Utility Programs') ;
445 if ($childlinks = $mytree->look_down('_tag','link','href','utility.css')) {
446 $childlinks->attr('href',$cssdir . '/utility.css') ;
447 $childlinks->postinsert(
448 HTML::Element->new_from_lol(
449 ['link',{ 'href' => $cssdir . '/bsys.css', 'rel' => 'stylesheet' } ],
450 ['script',{ 'type' => 'text/javascript', 'src' => $javascriptdir . '/bsys.js' } ]
455 elsif ($manualname eq "problems") {
456 if ($childlinks = $mytree->look_down('_tag','meta','name','description')) {
457 $childlinks->attr('content','Bacula Enterprise Problem Resolution Guide') ;
459 if ($childlinks = $mytree->look_down('_tag','meta','name','keywords')) {
460 $childlinks->attr('content','Bacula Systems, Bacula Enterprise 6, Problem Resolution Guide') ;
462 if ($childlinks = $mytree->look_down('_tag','link','href','problems.css')) {
463 $childlinks->attr('href',$cssdir . '/problems.css') ;
464 $childlinks->postinsert(
465 HTML::Element->new_from_lol(
466 ['link',{ 'href' => $cssdir . '/bsys.css', 'rel' => 'stylesheet' } ],
467 ['script',{ 'type' => 'text/javascript', 'src' => $javascriptdir . '/bsys.js' } ]
472 elsif ($manualname eq "misc") {
473 if ($childlinks = $mytree->look_down('_tag','meta','name','description')) {
474 $childlinks->attr('content','Bacula Enterprise Miscellaneous Guide') ;
476 if ($childlinks = $mytree->look_down('_tag','meta','name','keywords')) {
477 $childlinks->attr('content','Bacula Systems, Bacula Enterprise 6, Miscellaneous Guide') ;
479 if ($childlinks = $mytree->look_down('_tag','link','href','misc.css')) {
480 $childlinks->attr('href',$cssdir . '/misc.css') ;
481 $childlinks->postinsert(
482 HTML::Element->new_from_lol(
483 ['link',{ 'href' => $cssdir . '/bsys.css', 'rel' => 'stylesheet' } ],
484 ['script',{ 'type' => 'text/javascript', 'src' => $javascriptdir . '/bsys.js' } ]
490 # Replace textregistered images with the HTML special char
491 my @images = $mytree->look_down('_tag','img') ;
492 foreach $childlinks (@images) {
493 my $alttext = $childlinks->attr('alt') ;
494 # print "Alt: $alttext\n" ;
495 if ($alttext =~ /.*registe.*/) {
496 $childlinks->preinsert(HTML::Element->new_from_lol(['span', {'class' => 'expochar' }, '®'])) ;
497 $childlinks->detach() ;
498 $childlinks->delete() ;
500 if ($alttext =~ /.*bacula.*-logo.*/) {
501 $childlinks->detach() ;
502 $childlinks->delete() ;
505 @images = $mytree->look_down('_tag','img') ;
506 foreach $childlinks (@images) {
507 my $img = $childlinks->attr('src') ;
508 if ($img =~ /^\.\//) {
510 $img = $picturesdir . '/' . $img ;
511 $childlinks->attr('src',$img) ;
512 print "img: " . $img . "\n" ;
515 # This li is at first level
518 open FH, ">" . $outputfile or die "Unable to create $outputfile: $!\n" ;
519 print FH $mytree->as_HTML("<>","\t",{}) ;
521 # open FH, "< $outputfile" or die "Unable to open $outputfile\n" ;
523 # while (my $l = <FH>) {
524 # foreach my $k (keys %references) {
525 # # print "==> The Clef: $k\n" ;
526 # my $anchor = sprintf("<a href=\"%s#%s\">%s</a>",
527 # $references{$k}{file},
528 # $references{$k}{anchor},
530 # $l =~ s/$k/$anchor/g ;
535 # open FH, ">" . $outputfile or die "Unable to create $outputfile: $!\n" ;
536 # for $l (@content) {
542 print $mytree->as_HTML("","\t",{}) ;
544 debugdump($thecopy,"TOC Copy") ;
547 open FH, ">" . $MENUFILE or die "Unable to create the menu file: $!\n" ;
548 print FH $thecopy->as_HTML("","\t",{}) ;