--- /dev/null
+#!/usr/bin/perl -w
+#
+# Bacula Systems - Philippe Chauvat
+# 27 jul 2012
+#
+# This script is designed to translate Bacula enterprise LaTeX2HTML
+# documentation files to something more "tunable" / "adaptable" from a CSS
+# point of view.
+#
+# $1 is an HTML file to analyze and translate
+# The output is automatically send to $1.out
+#
+# - add some ids, class
+# - re-order some piece of HTML code
+#
+# This script is based on HTML::Parser module
+#
+# args:
+# -i: HTML input file
+# -o: HTML output file
+# -j: javascript directory
+# -c: css directory
+# -p: images (pictures) directory
+# -n: Manual name
+# -e: Request a menu extraction
+# -r: Source directory (ako part of -i arg)
+# -?: help / usage
+# -d: debug requested
+use HTML::Parser ;
+use HTML::TreeBuilder ;
+use HTML::PullParser ;
+use Getopt::Long ;
+use File::Basename ;
+use Data::Dumper ;
+sub usage {
+ print "translatedoc.pl -i | --input html-source-file
+ [ -o | --output html-destination-file ]
+ [ -j | --javascript javascript-diretory ]
+ [ -c | --css css-directory ]
+ [ -p | --pictures pictures-directory ]
+ [ -n | --name manual_name ]
+ [ -e | --extract ]
+ [ -d | --debug ]
+ [ -r | --source-directory the_original_root_directory ]
+ [ --help | -? ]\n" ;
+ exit 1 ;
+}
+#
+# Send message to output in case of debug only
+# ======================
+sub debugdump {
+ my ($what,$msg) = @_ ;
+ if ($debug) {
+ print "\n===============================\nBegin of $msg\n" ;
+ $what->dump ;
+ print "\n===============================\nEnd of $msg\n\n" ;
+ }
+}
+#
+# Build a references list
+# Needed to link text and tables, images and so on (see figure xxx)
+# with an HTML href
+# IN: root dir and reference filename
+# OUT: a reference file (ako hash)
+# =======================
+sub build_references {
+ my $root = $_[0] ;
+ my $referencefile = $_[1] ;
+ print "Root dir: $root\n" ;
+ print "References file: $referencefile" ;
+ my %references ;
+ my @content ;
+ local *FH ;
+ for my $i (`find $root -iname "[a-zA-Z0-9]*.aux"`) {
+ print "Building references for $i\n" ;
+ open FH, "< $i" or die "Unable to open file $i\n" ;
+ @content = <FH> ;
+ close FH ;
+ for $l (@content) {
+ if ($l =~ /newlabel/) {
+### \newlabel{figbs6:fdstorageaddress}{{2.1}{15}{Backup Over WAN\relax \relax }{figure.caption.15}{}}
+ my @elts = split('{|}',$l) ;
+ if ($#elts >4) {
+ if ($elts[1] ne "" and $elts[4] ne "") {
+ print "Clef: $elts[1]\n" ;
+ chomp($i) ;
+ chomp($elts[1]) ;
+ chomp($elts[4]) ;
+ $references{$elts[1]}{anchor} = $elts[4] ;
+ $references{$elts[1]}{file} = "" ;
+ $references{$elts[4]}{latexref} = $elts[1] ;
+ }
+ }
+ }
+ }
+ }
+ for my $i (`find $root -iname "[a-zA-Z0-9]*.html"`) {
+ print "Building anchors for $i\n" ;
+ open FH, "< $i" or die "Unable to open file $i\n" ;
+ @content = <FH> ;
+ close FH ;
+ foreach $l (@content) {
+# print "ligne: $l\n" ;
+ if ($l =~ m/<A [^>]*NAME *= *"([^>]*)"/) {
+ chomp($l) ;
+ print "L matche: $l / $1\n" ;
+ if (exists $references{$1}{latexref}) {
+ print "Bingo: $i --- " . $references{$1}{latexref} . "\n" ;
+ $references{$references{$1}{latexref}}{file} = $i ;
+ }
+ }
+ }
+ }
+ print "Writing references\n" ;
+ open FH,"> $referencefile" or die "Unable to create file $referencefile\n" ;
+ foreach $k (keys %references) {
+ if ($references{$k}{file} ne "") {
+ print "Key: $k\n" ;
+ print FH $k . " " . $references{$k}{file} . " " . $references{$k}{anchor} . "\n" ;
+ }
+ }
+ close FH ;
+ return %references ;
+}
+#
+# References reading method
+# To be able to handle references accross HTML files
+# =========================
+sub read_references {
+ my $referencefile = $_[0] ;
+ my %references ;
+ local *FH ;
+ open FH, "< $referencefile" or die "Unable to open $referencefile\n" ;
+ while (<FH>) {
+ our($k,$f,$v) = split / /,$_ ;
+ $refences{$k}{file} = $f ;
+ $refences{$k}{anchor} = $v ;
+ }
+ close FH ;
+ return %refences ;
+}
+#
+# Args to Vars
+our($inputfile,$outputfile,$help,$debug,$mytree,$extractmenu,$picturesdir,
+ $cssdir,$javascriptdir,$manualname,$sourcedir) ;
+#
+# Usage in case of missing arguments
+usage() unless($#ARGV > -1) ;
+#
+# Input file / Output file
+GetOptions("input|i=s" => \$inputfile,
+ "output|o=s" => \$outputfile,
+ "extract|e" => \$extractmenu,
+ "pictures|p=s" => \$picturesdir,
+ "css|c=s" => \$cssdir,
+ "source-directory|r=s" => \$sourcedir,
+ "javascript|j=s" => \$javascriptdir,
+ "name|n=s" => \$manualname,
+ "debug|d" => \$debug,
+ "help|?" => \$help) or usage() ;
+usage() if ($help) ;
+usage() unless (defined $inputfile) ;
+
+die "$inputfile does not exists.\n" unless -e $inputfile ;
+
+if (! defined $outputfile ) {
+ $outputfile = "./" . basename($inputfile) . ".out" ;
+}
+
+if (! defined $picturesdir ) {
+ $picturesdir = "../images" ;
+}
+
+if (! defined $cssdir ) {
+ $cssdir = "../css" ;
+}
+
+if (! defined $javascriptdir ) {
+ $javascriptdir = "../js" ;
+}
+if (! defined $manualname) {
+ $manualname = "main" ;
+}
+my $MENUFILE="./wholemenu_" . $manualname . ".html" ;
+# my $REFERENCEFILE="./references_to_build.txt" ;
+# my %references ;
+# if (defined $sourcedir) {
+# %references = build_references($sourcedir,$REFERENCEFILE) ;
+# }
+# else {
+# %references = read_references($REFERENCEFILE) ;
+# }
+#
+# Build HTML Tree of existing page
+$mytree = HTML::TreeBuilder->new ;
+$mytree->parse_file($inputfile) ;
+#
+# Find the beginning of the content
+# Which is also a point where to put
+# the menu
+$beginning_of_content = $mytree->look_down('_tag','h1') ;
+$beginning_of_content = $mytree->look_down('_tag','h2') unless ($beginning_of_content) ;
+die "The only thing we could test is a <H1> / <H2> tags, which does not exist there...:$!\n" unless($beginning_of_content) ;
+
+#
+# Look for the table of contents
+# we must translate it at a position before the content itself
+my $thecopy ;
+if ($thecopy = $mytree->look_down('_tag', 'ul', 'class','ChildLinks')) {
+ $childlinks = $thecopy->detach() ;
+ debugdump($thecopy,"Navigation system copy 1") ;
+#
+# Clean up the content of table of contents
+ while ($d = $thecopy->look_down('_tag','br')) {
+ $d->detach() ;
+ $d->delete() ;
+ }
+ debugdump($thecopy,"Navigation system copy 2") ;
+}
+if ($childlinks = $mytree->look_down('_tag','a','name','CHILD_LINKS')) {
+ $childlinks->detach() ;
+ $childlinks->delete() ;
+ debugdump($thecopy,"Navigation system copy 3") ;
+}
+#
+# Remove old navigation part.... (next, up, previous, and so on)
+if ($childlinks = $mytree->look_down('_tag', 'div', 'class', 'navigation')) {
+ $childlinks->detach_content() ;
+ $childlinks->detach() ;
+ $childlinks->delete_content() ;
+ $childlinks->delete ;
+ debugdump($thecopy,"Navigation system copy 4") ;
+}
+# End removing navigation
+#
+#
+# Remove every 'dirty' lines
+# between <body> and <h1> tag
+# What is "before" the <h1> tag (between <body> and <h1>) is just dropped
+my @lefts = $beginning_of_content->left() ;
+foreach my $l (@lefts) {
+ $l->detach_content() ;
+ $l->delete_content() ;
+ $l->detach() ;
+ $l->delete() ;
+}
+debugdump($thecopy,"Navigation system copy 5") ;
+#
+# Remove Bacula community logo
+if ($childlinks = $beginning_of_content->look_down('_tag','img','alt','\\includegraphics{bacula-logo.eps}')) {
+ $childlinks->detach() ;
+ $childlinks->delete() ;
+}
+# End remove Bacula logo
+#
+# Remove 'address' tag
+if ($childlinks = $mytree->look_down('_tag','address')) {
+ $childlinks->detach() ;
+ $childlinks->delete() ;
+}
+# End remove address
+#
+my $thebody = $mytree->look_down('_tag','body') ;
+$thebody->attr('onload','menuonload(this);') ;
+debugdump($thebody,"The body BEFORE") ;
+my @content = $thebody->detach_content() ;
+# End remove dirty lines
+#
+# What do we do with the menu?
+# If the menu file exists then just use it
+if (-e $MENUFILE) {
+ #
+ # Build the menu file
+ $thecopy = HTML::TreeBuilder->new ;
+ $thecopy->parse_file($MENUFILE) ;
+}
+debugdump($thecopy,"Navigation system copy 6") ;
+
+#
+#
+# Create a div to manage the whole page
+my $mainpage = HTML::Element->new_from_lol(
+ ['div', { 'class' => "bsys_mainpageclass", 'id' => "bsys_mainpageid" },
+ [ 'div', {'class' => 'bsys_topclass', 'id' => 'bsys_topid'},
+ [ 'img', { 'src' => $picturesdir . '/bsys-logo.png', 'id' => 'bsys_logo','alt' => 'Bacula Systems Logo' }],
+ [ 'img', { 'src' => $picturesdir . '/bsys-doctitle.png', 'id' => 'bsys_doctitle', 'alt' => 'Bacula Enterprise Documentation text image'}]
+ ],
+ [ 'div', {'id' => 'bsys_breadnsearchid', 'class' => 'bsys_breadnsearchclass'},
+ ['div', { 'class' => 'bsys_searchclass', 'id' => 'bsys_searchid'},
+ ['span','Search' , {'class' => 'bsys_searchtitleclass','id' => 'bsys_searchtitleid'}],
+ [ 'input', { 'class' => 'bsys_searchfieldclass', 'id' => 'bsys_searchfieldid', 'type' => 'text', 'value' => 'Type your text here' }]
+ ],
+ [ 'div', { 'class' => 'bsys_breadcrumbsclass', 'id' => 'bsys_breadcrumbsid'},
+ [ 'p', { 'class' => 'bsys_breadcrumbscontentclass', 'id' => 'bsys_breadcrumbscontentid' }, 'Main' ],
+ ]
+ ],
+ [ 'div', { 'class' => "bsys_pageclass", 'id' => "bsys_pageid"},
+ [ 'div', { 'class' => "bsys_leftnavigationclass", 'id' => "bsys_leftnavigationid" },
+ $thecopy
+ ],
+ [ 'div', { 'class ' => 'bsys_contentclass', 'id' => 'bsys_contentid' },
+ # foreach (@content) {
+ # ['div', {'class' => 'bsys_truecontent'}, $_ ]
+ # }
+ [ map (('div', {'class' => 'bsys_truecontent' }), $_ ), @content ]
+ ]
+ ]
+ ]
+ ) ;
+debugdump($mainpage,"Main page build") ;
+$beginning_of_content = $thebody->push_content($mainpage) ;
+# Remove "Contents" links
+if ($childlinks = $mytree->look_down('_tag','a','href','Contents.html')) {
+ $childlinks = $childlinks->parent() ;
+ $childlinks->delete() ;
+}
+debugdump($thecopy,"Navigation system copy 7") ;
+#
+# Now begins the modification for navigation
+# ==========================================
+# We must analyze what is below <ul class="Child_Links">
+# At first level, we consider each <li> as part of the main menu
+# At other levels, we consider each <li> as sub(sub | ...)menus
+$childlinks = $mytree->look_down('_tag','ul','class','ChildLinks') ;
+$childlinks->attr('id','childlinksid') ;
+#
+# This counter is for generating unique identifiers
+my $ulcounter = 1 ;
+#
+# Browse all the <ul name="ChildLinks"> node
+# ------------------------------------------
+foreach my $d ($childlinks->descendants()) {
+ #
+ # Which tag are we checking ?
+ my $tag = $d->tag() ;
+ #
+ # Nothing to do with <a> tags
+ if ($tag =~ /a/) {
+ $d->attr('onclick',"menuonclick(this);") ;
+ }
+ #
+ # <ul>s represent "openable" menus
+ elsif ($tag =~ /ul/) {
+ #
+ # We need to identify uniquely this <ul> start tag to be able to "open" or "close" it
+ my $idf = 'bsys_ul_' . $ulcounter++ ;
+ $d->attr('class','expandingMenu expandingMenuNotSelected') ;
+ $d->attr('id', $idf) ;
+ $d->attr('style','display: none;') ;
+ #
+ # We now are knowing the previous <li> tag is a (sub)menu header too
+ # Adding the "onclick" behavior
+ my $previoustagli = $d->look_up('_tag','li') ; # <li> just above
+ $previoustagli->attr('onmouseover',"over_expandingMenuHeader(this,\'" . $idf . "\')") ;
+ $previoustagli->attr('onmouseout',"out_expandingMenuHeader(this,\'" . $idf . "\')") ;
+ my $previoustaga = $d->left('_tag','a') ; # <a> just above
+ $previoustaga->attr('onclick',"menuonclick(this);") ;
+ #
+ # Do not forgot what we defined earlier...
+ my $class = $previoustagli->attr('class') ;
+ $class = $class . ' expandingMenuHeader' ;
+ $previoustagli->attr('class', $class) ;
+ }
+ #
+ # <li>s represent at least menu items
+ # and sometimes menu headers (see <ul> treatment)
+ elsif ($tag =~ /li/) {
+ #
+ # At this stage we only know <li> is a menu item.
+ # nothing more...
+ $d->attr('class', 'expandingMenuItem') ;
+ }
+}
+#
+# <head> treatment
+# Add some stuff
+if ($manualname eq "main") {
+ if ($childlinks = $mytree->look_down('_tag','meta','name','description')) {
+ $childlinks->attr('content','Bacula Systems Enterprise Main Reference Manual') ;
+ }
+ if ($childlinks = $mytree->look_down('_tag','meta','name','keywords')) {
+ $childlinks->attr('content','Bacula Systems, Bacula Enterprise 6, Main Reference Manual') ;
+ }
+ if ($childlinks = $mytree->look_down('_tag','link','href','main.css')) {
+ $childlinks->attr('href',$cssdir . '/main.css') ;
+ $childlinks->postinsert(
+ HTML::Element->new_from_lol(
+ ['link',{ 'href' => $cssdir . '/bsys.css', 'rel' => 'stylesheet' } ],
+ ['script',{ 'type' => 'text/javascript', 'src' => $javascriptdir . '/bsys.js' } ]
+ )
+ ) ;
+ }
+}
+elsif ($manualname eq "developers") {
+ if ($childlinks = $mytree->look_down('_tag','meta','name','description')) {
+ $childlinks->attr('content','Bacula Systems Enterprise Developer\'s Guide') ;
+ }
+ if ($childlinks = $mytree->look_down('_tag','meta','name','keywords')) {
+ $childlinks->attr('content','Bacula Systems, Bacula Enterprise 6, Developer\'s Guide') ;
+ }
+ if ($childlinks = $mytree->look_down('_tag','link','href','developers.css')) {
+ $childlinks->attr('href',$cssdir . '/developers.css') ;
+ $childlinks->postinsert(
+ HTML::Element->new_from_lol(
+ ['link',{ 'href' => $cssdir . '/bsys.css', 'rel' => 'stylesheet' } ],
+ ['script',{ 'type' => 'text/javascript', 'src' => $javascriptdir . '/bsys.js' } ]
+ )
+ ) ;
+ }
+}
+elsif ($manualname eq "console") {
+ if ($childlinks = $mytree->look_down('_tag','meta','name','description')) {
+ $childlinks->attr('content','Bacula Enterprise Command Console and Operators Guide') ;
+ }
+ if ($childlinks = $mytree->look_down('_tag','meta','name','keywords')) {
+ $childlinks->attr('content','Bacula Systems, Bacula Enterprise 6, Command Console and Operators Guide') ;
+ }
+ if ($childlinks = $mytree->look_down('_tag','link','href','console.css')) {
+ $childlinks->attr('href',$cssdir . '/console.css') ;
+ $childlinks->postinsert(
+ HTML::Element->new_from_lol(
+ ['link',{ 'href' => $cssdir . '/bsys.css', 'rel' => 'stylesheet' } ],
+ ['script',{ 'type' => 'text/javascript', 'src' => $javascriptdir . '/bsys.js' } ]
+ )
+ ) ;
+ }
+}
+elsif ($manualname eq "utility") {
+ if ($childlinks = $mytree->look_down('_tag','meta','name','description')) {
+ $childlinks->attr('content','Bacula Enterprise Utility Programs') ;
+ }
+ if ($childlinks = $mytree->look_down('_tag','meta','name','keywords')) {
+ $childlinks->attr('content','Bacula Systems, Bacula Enterprise 6, Utility Programs') ;
+ }
+ if ($childlinks = $mytree->look_down('_tag','link','href','utility.css')) {
+ $childlinks->attr('href',$cssdir . '/utility.css') ;
+ $childlinks->postinsert(
+ HTML::Element->new_from_lol(
+ ['link',{ 'href' => $cssdir . '/bsys.css', 'rel' => 'stylesheet' } ],
+ ['script',{ 'type' => 'text/javascript', 'src' => $javascriptdir . '/bsys.js' } ]
+ )
+ ) ;
+ }
+}
+elsif ($manualname eq "problems") {
+ if ($childlinks = $mytree->look_down('_tag','meta','name','description')) {
+ $childlinks->attr('content','Bacula Enterprise Problem Resolution Guide') ;
+ }
+ if ($childlinks = $mytree->look_down('_tag','meta','name','keywords')) {
+ $childlinks->attr('content','Bacula Systems, Bacula Enterprise 6, Problem Resolution Guide') ;
+ }
+ if ($childlinks = $mytree->look_down('_tag','link','href','problems.css')) {
+ $childlinks->attr('href',$cssdir . '/problems.css') ;
+ $childlinks->postinsert(
+ HTML::Element->new_from_lol(
+ ['link',{ 'href' => $cssdir . '/bsys.css', 'rel' => 'stylesheet' } ],
+ ['script',{ 'type' => 'text/javascript', 'src' => $javascriptdir . '/bsys.js' } ]
+ )
+ ) ;
+ }
+}
+elsif ($manualname eq "misc") {
+ if ($childlinks = $mytree->look_down('_tag','meta','name','description')) {
+ $childlinks->attr('content','Bacula Enterprise Miscellaneous Guide') ;
+ }
+ if ($childlinks = $mytree->look_down('_tag','meta','name','keywords')) {
+ $childlinks->attr('content','Bacula Systems, Bacula Enterprise 6, Miscellaneous Guide') ;
+ }
+ if ($childlinks = $mytree->look_down('_tag','link','href','misc.css')) {
+ $childlinks->attr('href',$cssdir . '/misc.css') ;
+ $childlinks->postinsert(
+ HTML::Element->new_from_lol(
+ ['link',{ 'href' => $cssdir . '/bsys.css', 'rel' => 'stylesheet' } ],
+ ['script',{ 'type' => 'text/javascript', 'src' => $javascriptdir . '/bsys.js' } ]
+ )
+ ) ;
+ }
+}
+#
+# Replace textregistered images with the HTML special char
+my @images = $mytree->look_down('_tag','img') ;
+foreach $childlinks (@images) {
+ my $alttext = $childlinks->attr('alt') ;
+# print "Alt: $alttext\n" ;
+ if ($alttext =~ /.*registe.*/) {
+ $childlinks->preinsert(HTML::Element->new_from_lol(['span', {'class' => 'expochar' }, '®'])) ;
+ $childlinks->detach() ;
+ $childlinks->delete() ;
+ }
+ if ($alttext =~ /.*bacula.*-logo.*/) {
+ $childlinks->detach() ;
+ $childlinks->delete() ;
+ }
+}
+@images = $mytree->look_down('_tag','img') ;
+foreach $childlinks (@images) {
+ my $img = $childlinks->attr('src') ;
+ if ($img =~ /^\.\//) {
+ $img =~ s/\.\/// ;
+ $img = $picturesdir . '/' . $img ;
+ $childlinks->attr('src',$img) ;
+ print "img: " . $img . "\n" ;
+ }
+}
+# This li is at first level
+if ($outputfile) {
+ local *FH ;
+ open FH, ">" . $outputfile or die "Unable to create $outputfile: $!\n" ;
+ print FH $mytree->as_HTML("<>","\t",{}) ;
+ close FH ;
+# open FH, "< $outputfile" or die "Unable to open $outputfile\n" ;
+# my @content ;
+# while (my $l = <FH>) {
+# foreach my $k (keys %references) {
+# # print "==> The Clef: $k\n" ;
+# my $anchor = sprintf("<a href=\"%s#%s\">%s</a>",
+# $references{$k}{file},
+# $references{$k}{anchor},
+# $k) ;
+# $l =~ s/$k/$anchor/g ;
+# }
+# push @content,$l ;
+# }
+# close FH ;
+# open FH, ">" . $outputfile or die "Unable to create $outputfile: $!\n" ;
+# for $l (@content) {
+# print FH $l ;
+# }
+# close FH ;
+}
+else {
+ print $mytree->as_HTML("","\t",{}) ;
+}
+debugdump($thecopy,"TOC Copy") ;
+if ($extractmenu) {
+ local *FH ;
+ open FH, ">" . $MENUFILE or die "Unable to create the menu file: $!\n" ;
+ print FH $thecopy->as_HTML("","\t",{}) ;
+ close FH ;
+}
+
+1;