From 2711691b4adcd380ff15c807eb2ef20e9e6b135c Mon Sep 17 00:00:00 2001 From: Philippe Chauvat Date: Fri, 7 Dec 2012 16:47:49 +0100 Subject: [PATCH] HTML converter tools --- docs/tools/htmls.sh | 108 ++++++++ docs/tools/translatedoc.pl | 542 +++++++++++++++++++++++++++++++++++++ 2 files changed, 650 insertions(+) create mode 100755 docs/tools/htmls.sh create mode 100755 docs/tools/translatedoc.pl diff --git a/docs/tools/htmls.sh b/docs/tools/htmls.sh new file mode 100755 index 00000000..47dbe2ef --- /dev/null +++ b/docs/tools/htmls.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# +# Philippe Chauvat - BaculaSystems +# Last modification 04-dec-2012 +# +# This script fetch all HTML 'manual' directories +# to apply a conversion script (translatedoc.pl) +# and then produce the final HTML version of the manuals +# +# args: none +# +# vars +# SOURCEDIRHEAD: Where are located the original directories relatively to the +# current script +# Default: ../manuals/en +# +# TRANSLATOR: The script to apply to each HTML original file +# Default: ./translatedoc.pl +# +# DEBUG: You want some ? Specify -d there +# Default: "" +# +# FROMFILEMENUEXTRACT: We want to extract ako menu. This var give the name of the file +# from which to extract it +# Default: index.html +# +# DIRSTOCREATE: Specify here which directory(ies) must be created before running the conversion +# Default: "css js images" +# +# LIST The list of dirs containing the manuals to translate +# Default: `find ${SOURCEDIRHEAD} -mindepth 1 -maxdepth 1 -name "www-*" -type d` +# +# ROOTDIR All HTML files rely on CSS, JS and IMAGES. ROOTDIR specify the relative path to those. +# Default: "../.." +# +# CSSDIR The CSS directory used by HTMLs +# Default: ${ROOTDIR}/css +# +# JSDIR The JS (javascript) directory used by HTMLs +# Default: ${ROOTDIR}/js +# +# IMAGEDIR The images directory used by HTMLs +# Default: ${ROOTDIR}/images +# +# DOSUBFILES Do we want to generate all subfiles (not only the index.html). Debug and tests purpose +# Default: yes +# +SOURCEDIRHEAD="../manuals/en" +TRANSLATOR='./translatedoc.pl' +DEBUG="" # change to -d if you want some debug there +FROMFILEMENUEXTRACT="index.html" +DIRSTOCREATE="css js images" +LIST=`find ${SOURCEDIRHEAD} -mindepth 1 -maxdepth 1 -name "www-*" -type d` +#LIST=${SOURCEDIRHEAD}/www-main +ROOTDIR="../.." +CSSDIR=${ROOTDIR}/css +JSDIR=${ROOTDIR}/js +IMAGEDIR=${ROOTDIR}/images +SOURCEIMAGEDIR=../images +DESTIMAGEDIR=${SOURCEDIRHEAD}/images +# +# Do we want to generate HTML files for all +# files +DOSUBFILES=yes +# +# For each manual +for M in ${LIST} +do + # + # Extract the directory name: console, developers, main, etc. + thedirname=`echo $M | sed -e 's/.*www-\(.*\)/\1/g'` + # + # Message to indicate what we are building + echo "$thedirname Manual" + # + # Where to find HTML files + readdir=$M/$thedirname + # + # Where to store the result + DESTINATION_DIR=$SOURCEDIRHEAD/$thedirname/$thedirname + # + # Create the desitnation directory if needed + mkdir -p $DESTINATION_DIR + # + # Create otherdirs if needed + for D in ${DIRSTOCREATE} + do + mkdir -p $SOURCEDIRHEAD/$D + done + # + # Building the menu must be done without any existing file + rm -f wholemenu_${thedirname}.html + echo -n "Building navigation menu from ${readdir}/${FROMFILEMENUEXTRACT} to ${DESTINATION_DIR}/${FROMFILEMENUEXTRACT}..." + ./translatedoc.pl ${DEBUG} -i ${readdir}/${FROMFILEMENUEXTRACT} -e -o ${DESTINATION_DIR}/${FROMFILEMENUEXTRACT} -j ${JSDIR} -c ${CSSDIR} -p ${IMAGEDIR} -n ${thedirname} -r ${SOURCEDIRHEAD} + echo "Done." + if [ ${DOSUBFILES} == "yes" ] + then + for L in `ls ${readdir}|egrep html$` + do + echo -n "Translating $L..." + ./translatedoc.pl ${DEBUG} -i ${readdir}/${L} -o ${DESTINATION_DIR}/${L} -j ${JSDIR} -c ${CSSDIR} -p ${IMAGEDIR} -n ${thedirname} + echo "Done." + done + fi +done +# +# Copy images +cp -v ${SOURCEIMAGEDIR}/png/*.png ${DESTIMAGEDIR}/ diff --git a/docs/tools/translatedoc.pl b/docs/tools/translatedoc.pl new file mode 100755 index 00000000..2f75ea98 --- /dev/null +++ b/docs/tools/translatedoc.pl @@ -0,0 +1,542 @@ +#!/usr/bin/perl -w +# +# Bacula Systems - Philippe Chauvat +# 27 jul 2012 +# +# This script is designed to translate Bacula enterprise LaTeX2HTML +# documentation files to something more "tunable" / "adaptable" from a CSS +# point of view. +# +# $1 is an HTML file to analyze and translate +# The output is automatically send to $1.out +# +# - add some ids, class +# - re-order some piece of HTML code +# +# This script is based on HTML::Parser module +# +# args: +# -i: HTML input file +# -o: HTML output file +# -j: javascript directory +# -c: css directory +# -p: images (pictures) directory +# -n: Manual name +# -e: Request a menu extraction +# -r: Source directory (ako part of -i arg) +# -?: help / usage +# -d: debug requested +use HTML::Parser ; +use HTML::TreeBuilder ; +use HTML::PullParser ; +use Getopt::Long ; +use File::Basename ; +use Data::Dumper ; +sub usage { + print "translatedoc.pl -i | --input html-source-file + [ -o | --output html-destination-file ] + [ -j | --javascript javascript-diretory ] + [ -c | --css css-directory ] + [ -p | --pictures pictures-directory ] + [ -n | --name manual_name ] + [ -e | --extract ] + [ -d | --debug ] + [ -r | --source-directory the_original_root_directory ] + [ --help | -? ]\n" ; + exit 1 ; +} +# +# Send message to output in case of debug only +# ====================== +sub debugdump { + my ($what,$msg) = @_ ; + if ($debug) { + print "\n===============================\nBegin of $msg\n" ; + $what->dump ; + print "\n===============================\nEnd of $msg\n\n" ; + } +} +# +# Build a references list +# Needed to link text and tables, images and so on (see figure xxx) +# with an HTML href +# IN: root dir and reference filename +# OUT: a reference file (ako hash) +# ======================= +sub build_references { + my $root = $_[0] ; + my $referencefile = $_[1] ; + print "Root dir: $root\n" ; + print "References file: $referencefile" ; + my %references ; + my @content ; + local *FH ; + for my $i (`find $root -iname "[a-zA-Z0-9]*.aux"`) { + print "Building references for $i\n" ; + open FH, "< $i" or die "Unable to open file $i\n" ; + @content = ; + close FH ; + for $l (@content) { + if ($l =~ /newlabel/) { +### \newlabel{figbs6:fdstorageaddress}{{2.1}{15}{Backup Over WAN\relax \relax }{figure.caption.15}{}} + my @elts = split('{|}',$l) ; + if ($#elts >4) { + if ($elts[1] ne "" and $elts[4] ne "") { + print "Clef: $elts[1]\n" ; + chomp($i) ; + chomp($elts[1]) ; + chomp($elts[4]) ; + $references{$elts[1]}{anchor} = $elts[4] ; + $references{$elts[1]}{file} = "" ; + $references{$elts[4]}{latexref} = $elts[1] ; + } + } + } + } + } + for my $i (`find $root -iname "[a-zA-Z0-9]*.html"`) { + print "Building anchors for $i\n" ; + open FH, "< $i" or die "Unable to open file $i\n" ; + @content = ; + close FH ; + foreach $l (@content) { +# print "ligne: $l\n" ; + if ($l =~ m/]*NAME *= *"([^>]*)"/) { + chomp($l) ; + print "L matche: $l / $1\n" ; + if (exists $references{$1}{latexref}) { + print "Bingo: $i --- " . $references{$1}{latexref} . "\n" ; + $references{$references{$1}{latexref}}{file} = $i ; + } + } + } + } + print "Writing references\n" ; + open FH,"> $referencefile" or die "Unable to create file $referencefile\n" ; + foreach $k (keys %references) { + if ($references{$k}{file} ne "") { + print "Key: $k\n" ; + print FH $k . " " . $references{$k}{file} . " " . $references{$k}{anchor} . "\n" ; + } + } + close FH ; + return %references ; +} +# +# References reading method +# To be able to handle references accross HTML files +# ========================= +sub read_references { + my $referencefile = $_[0] ; + my %references ; + local *FH ; + open FH, "< $referencefile" or die "Unable to open $referencefile\n" ; + while () { + our($k,$f,$v) = split / /,$_ ; + $refences{$k}{file} = $f ; + $refences{$k}{anchor} = $v ; + } + close FH ; + return %refences ; +} +# +# Args to Vars +our($inputfile,$outputfile,$help,$debug,$mytree,$extractmenu,$picturesdir, + $cssdir,$javascriptdir,$manualname,$sourcedir) ; +# +# Usage in case of missing arguments +usage() unless($#ARGV > -1) ; +# +# Input file / Output file +GetOptions("input|i=s" => \$inputfile, + "output|o=s" => \$outputfile, + "extract|e" => \$extractmenu, + "pictures|p=s" => \$picturesdir, + "css|c=s" => \$cssdir, + "source-directory|r=s" => \$sourcedir, + "javascript|j=s" => \$javascriptdir, + "name|n=s" => \$manualname, + "debug|d" => \$debug, + "help|?" => \$help) or usage() ; +usage() if ($help) ; +usage() unless (defined $inputfile) ; + +die "$inputfile does not exists.\n" unless -e $inputfile ; + +if (! defined $outputfile ) { + $outputfile = "./" . basename($inputfile) . ".out" ; +} + +if (! defined $picturesdir ) { + $picturesdir = "../images" ; +} + +if (! defined $cssdir ) { + $cssdir = "../css" ; +} + +if (! defined $javascriptdir ) { + $javascriptdir = "../js" ; +} +if (! defined $manualname) { + $manualname = "main" ; +} +my $MENUFILE="./wholemenu_" . $manualname . ".html" ; +# my $REFERENCEFILE="./references_to_build.txt" ; +# my %references ; +# if (defined $sourcedir) { +# %references = build_references($sourcedir,$REFERENCEFILE) ; +# } +# else { +# %references = read_references($REFERENCEFILE) ; +# } +# +# Build HTML Tree of existing page +$mytree = HTML::TreeBuilder->new ; +$mytree->parse_file($inputfile) ; +# +# Find the beginning of the content +# Which is also a point where to put +# the menu +$beginning_of_content = $mytree->look_down('_tag','h1') ; +$beginning_of_content = $mytree->look_down('_tag','h2') unless ($beginning_of_content) ; +die "The only thing we could test is a

/

tags, which does not exist there...:$!\n" unless($beginning_of_content) ; + +# +# Look for the table of contents +# we must translate it at a position before the content itself +my $thecopy ; +if ($thecopy = $mytree->look_down('_tag', 'ul', 'class','ChildLinks')) { + $childlinks = $thecopy->detach() ; + debugdump($thecopy,"Navigation system copy 1") ; +# +# Clean up the content of table of contents + while ($d = $thecopy->look_down('_tag','br')) { + $d->detach() ; + $d->delete() ; + } + debugdump($thecopy,"Navigation system copy 2") ; +} +if ($childlinks = $mytree->look_down('_tag','a','name','CHILD_LINKS')) { + $childlinks->detach() ; + $childlinks->delete() ; + debugdump($thecopy,"Navigation system copy 3") ; +} +# +# Remove old navigation part.... (next, up, previous, and so on) +if ($childlinks = $mytree->look_down('_tag', 'div', 'class', 'navigation')) { + $childlinks->detach_content() ; + $childlinks->detach() ; + $childlinks->delete_content() ; + $childlinks->delete ; + debugdump($thecopy,"Navigation system copy 4") ; +} +# End removing navigation +# +# +# Remove every 'dirty' lines +# between and

tag +# What is "before" the

tag (between and

) is just dropped +my @lefts = $beginning_of_content->left() ; +foreach my $l (@lefts) { + $l->detach_content() ; + $l->delete_content() ; + $l->detach() ; + $l->delete() ; +} +debugdump($thecopy,"Navigation system copy 5") ; +# +# Remove Bacula community logo +if ($childlinks = $beginning_of_content->look_down('_tag','img','alt','\\includegraphics{bacula-logo.eps}')) { + $childlinks->detach() ; + $childlinks->delete() ; +} +# End remove Bacula logo +# +# Remove 'address' tag +if ($childlinks = $mytree->look_down('_tag','address')) { + $childlinks->detach() ; + $childlinks->delete() ; +} +# End remove address +# +my $thebody = $mytree->look_down('_tag','body') ; +$thebody->attr('onload','menuonload(this);') ; +debugdump($thebody,"The body BEFORE") ; +my @content = $thebody->detach_content() ; +# End remove dirty lines +# +# What do we do with the menu? +# If the menu file exists then just use it +if (-e $MENUFILE) { + # + # Build the menu file + $thecopy = HTML::TreeBuilder->new ; + $thecopy->parse_file($MENUFILE) ; +} +debugdump($thecopy,"Navigation system copy 6") ; + +# +# +# Create a div to manage the whole page +my $mainpage = HTML::Element->new_from_lol( + ['div', { 'class' => "bsys_mainpageclass", 'id' => "bsys_mainpageid" }, + [ 'div', {'class' => 'bsys_topclass', 'id' => 'bsys_topid'}, + [ 'img', { 'src' => $picturesdir . '/bsys-logo.png', 'id' => 'bsys_logo','alt' => 'Bacula Systems Logo' }], + [ 'img', { 'src' => $picturesdir . '/bsys-doctitle.png', 'id' => 'bsys_doctitle', 'alt' => 'Bacula Enterprise Documentation text image'}] + ], + [ 'div', {'id' => 'bsys_breadnsearchid', 'class' => 'bsys_breadnsearchclass'}, + ['div', { 'class' => 'bsys_searchclass', 'id' => 'bsys_searchid'}, + ['span','Search' , {'class' => 'bsys_searchtitleclass','id' => 'bsys_searchtitleid'}], + [ 'input', { 'class' => 'bsys_searchfieldclass', 'id' => 'bsys_searchfieldid', 'type' => 'text', 'value' => 'Type your text here' }] + ], + [ 'div', { 'class' => 'bsys_breadcrumbsclass', 'id' => 'bsys_breadcrumbsid'}, + [ 'p', { 'class' => 'bsys_breadcrumbscontentclass', 'id' => 'bsys_breadcrumbscontentid' }, 'Main' ], + ] + ], + [ 'div', { 'class' => "bsys_pageclass", 'id' => "bsys_pageid"}, + [ 'div', { 'class' => "bsys_leftnavigationclass", 'id' => "bsys_leftnavigationid" }, + $thecopy + ], + [ 'div', { 'class ' => 'bsys_contentclass', 'id' => 'bsys_contentid' }, + # foreach (@content) { + # ['div', {'class' => 'bsys_truecontent'}, $_ ] + # } + [ map (('div', {'class' => 'bsys_truecontent' }), $_ ), @content ] + ] + ] + ] + ) ; +debugdump($mainpage,"Main page build") ; +$beginning_of_content = $thebody->push_content($mainpage) ; +# Remove "Contents" links +if ($childlinks = $mytree->look_down('_tag','a','href','Contents.html')) { + $childlinks = $childlinks->parent() ; + $childlinks->delete() ; +} +debugdump($thecopy,"Navigation system copy 7") ; +# +# Now begins the modification for navigation +# ========================================== +# We must analyze what is below