# This file contains subroutines for use by the latex2html system. # This file is executed due to a \usepackage{bacula} statement # in the LaTeX source. The subroutines here impliment functionality # specific to the generation of html manuals for the Bacula project. # Some of the added functionality is designed to extend the capabiltites # of latex2html and some is to change its behavior. # Returns the minimum of any number of numeric arguments. sub min { my $tmp = shift; while ($test = shift) { $tmp = $test if ($test < $tmp); } return $tmp; } # These two are copied from # /usr/lib/latex2html/style/hthtml.perl, # from the subroutine do_cmd_htmladdnormallink. # They have been renamed, then removed the # name argument and reversed the other two arguments. sub do_cmd_elink{ local($_) = @_; local($text, $url, $href); local($opt, $dummy) = &get_next_optional_argument; $text = &missing_braces unless ((s/$next_pair_pr_rx/$text = $2; ''/eo) ||(s/$next_pair_rx/$text = $2; ''/eo)); $url = &missing_braces unless ((s/$next_pair_pr_rx/$url = $2; ''/eo) ||(s/$next_pair_rx/$url = $2; ''/eo)); $*=1; s/^\s+/\n/; $*=0; $href = &make_href($url,$text); print "\nHREF:$href" if ($VERBOSITY > 3); join ('',$href,$_); } sub do_cmd_ilink { local($_) = @_; local($text); local($opt, $dummy) = &get_next_optional_argument; $text = &missing_braces unless ((s/$next_pair_pr_rx/$text = $2; ''/eo) ||(s/$next_pair_rx/$text = $2; ''/eo)); &process_ref($cross_ref_mark,$cross_ref_mark,$text); } sub do_cmd_lt { join('',"\<",$_[0]); } sub do_cmd_gt { join('',"\>",$_[0]); } # KEC Copied from latex2html.pl and modified to prevent # filename collisions. This is done with a static hash of # already-used filenames. An integer is appended to the # filename if a collision would result without it. # The addition of the integer is done by removing # character(s) before .html if adding the integer would result # in a filename longer than 32 characters. Usually just removing # the character before .html would resolve the collision, but we # add the integer anyway. The first integer that resolves the # collision is used. # If a filename is desired that is 'index.html' or any case # variation of that, it is changed to index_page.html, # index_page1.html, etc. #RRM Extended to allow customised filenames, set $CUSTOM_TITLES # or long title from the section-name, set $LONG_TITLES # { my %used_names; # Static hash. sub make_name { local($sec_name, $packed_curr_sec_id) = @_; local($title,$making_name,$saved) = ('',1,''); my $final_name; if ($LONG_TITLES) { $saved = $_; # This alerts the subroutine textohtmlindex not to increment its index counter on the next call. &do_cmd_textohtmlindex("\001noincrement"); &process_command($sections_rx, $_) if /^$sections_rx/; $title = &make_bacula_title($TITLE) unless ((! $TITLE) || ($TITLE eq $default_title)); $_ = $saved; } elsif ($CUSTOM_TITLES) { $saved = $_; # This alerts the subroutine textohtmlindex not to increment its index counter on the next call. &do_cmd_textohtmlindex("\001noincrement"); &process_command($sections_rx, $_) if /^$sections_rx/; $title = &custom_title_hook($TITLE) unless ((! $TITLE) || ($TITLE eq $default_title)); $_ = $saved; } if ($title) { #ensure no more than 32 characters, including .html extension $title =~ s/^(.{1,27}).*$/$1/; ++$OUT_NODE; $final_name = join("", ${PREFIX}, $title, $EXTN); } else { # Remove 0's from the end of $packed_curr_sec_id $packed_curr_sec_id =~ s/(_0)*$//; $packed_curr_sec_id =~ s/^\d+$//o; # Top level file $final_name = join("",($packed_curr_sec_id ? "${PREFIX}$NODE_NAME". ++$OUT_NODE : $sec_name), $EXTN); } # Change the name from index to index_page to avoid conflicts with # index.html. $final_name =~ s/^(index)\.html$/$1_Page.html/i; # If the $final_name is already used, put an integer before the # # .html to make it unique. my $integer = 0; my $saved_name = $final_name; while (exists($used_names{$final_name})) { $final_name = $saved_name; my ($filename,$ext) = $final_name =~ /(.*)(\..*)$/; my $numlen = length(++$integer); # If the filename (after adding the integer) would be longer than # 32 characters, insert the integer within it. if (((my $namelen = length($final_name)) + $numlen) >= 32) { substr($filename,-$numlen) = $integer; } else { $filename .= $integer; } $final_name = $filename . $ext; } # Save the $final_name in the hash to mark it as being used. $used_names{$final_name} = undef; # Save the first name evaluated here. This is the name of the top-level html file, and # can be used to produce the index.html hard link at the end. $OVERALL_TITLE = $final_name if (!defined $OVERALL_TITLE); return $final_name; } } sub make_bacula_title { local($_)= @_; local($num_words) = $LONG_TITLES; #RRM: scan twice for short words, due to the $4 overlap # Cannot use \b , else words break at accented letters $_ =~ s/(^|\s)\s*($GENERIC_WORDS)(\'|(\s))/$4/ig; $_ =~ s/(^|\s)\s*($GENERIC_WORDS)(\'|(\s))/$4/ig; #remove leading numbering, unless that's all there is. local($sec_num); if (!(/^\d+(\.\d*)*\s*$/)&&(s/^\s*(\d+(\.\d*)*)\s*/$sec_num=$1;''/e)) { $num_words-- }; &remove_markers; s/<[^>]*>//g; #remove tags #revert entities, etc. to TeX-form... s/([\200-\377])/"\&#".ord($1).";"/eg; $_ = &revert_to_raw_tex($_); # get $LONG_TITLES number of words from what remains $_ = &get_bacula_words($_, $num_words) if ($num_words); # ...and cleanup accents, spaces and punctuation $_ = join('', ($SHOW_SECTION_NUMBERS ? $sec_num : ''), $_); s/\\\W\{?|\}//g; s/\s/_/g; s/\'s/s/ig; # Replace 's with just the s. s/\W/_/g; s/__+/_/g; s/_+$//; $_; } #JCL(jcl-tcl) # changed completely # KEC 2-21-05 Changed completely again. # # We take the first real words specified by $min from the string. # REmove all markers and markups. # Split the line into words. # Determine how many words we should process. # Return if no words to process. # Determine lengths of the words. # Reduce the length of the longest words in the list until the # total length of all the words is acceptable. # Put the words back together and return the result. # sub get_bacula_words { local($_, $min) = @_; local($words,$i); local($id,%markup); # KEC my ($oalength,@lengths,$last,$thislen); my $maxlen = 28; #no limit if $min is negative $min = 1000 if ($min < 0); &remove_anchors; #strip unwanted HTML constructs s/<\/?(P|BR|H)[^>]*>/ /g; #remove leading white space and \001 characters s/^\s+|\001//g; #lift html markup s/(<[^>]*>(#[^#]*#)?)//ge; # Split $_ into a list of words. my @wrds = split /\s+|\-{3,}/; $last = &min($min - 1,$#wrds); return '' if ($last < 0); # Get a list of word lengths up to the last word we're to process. # Add one to each for the separator. @lengths = map (length($_)+1,@wrds[0..$last]); $thislen = $maxlen + 1; # One more than the desired max length. do { $thislen--; @lengths = map (&min($_,$thislen),@lengths); $oalength = 0; foreach (@lengths) {$oalength += $_;} } until ($oalength <= $maxlen); $words = join(" ",map (substr($wrds[$_],0,$lengths[$_]-1),0..$last)); return $words; } sub do_cmd_htmlfilename { my $input = shift; my ($id,$filename) = $input =~ /^<#(\d+)#>(.*?)<#\d+#>/; } # KEC 2-26-05 # do_cmd_addcontentsline adds support for the addcontentsline latex command. It evaluates # the arguments to the addcontentsline command and determines where to put the information. Three # global lists are kept: for table of contents, list of tables, and list of figures entries. # Entries are saved in the lists in the order they are encountered so they can be retrieved # in the same order. my (%toc_data); sub do_cmd_addcontentsline { &do_cmd_real_addcontentsline(@_); } sub do_cmd_real_addcontentsline { my $data = shift; my ($extension,$pat,$unit,$entry); # The data is sent to us as fields delimited by their ID #'s. Extract the # fields. The first is the extension of the file to which the cross-reference # would be written by LaTeX, such as {toc}, {lot} or {lof}. The second is either # {section}, {subsection}, etc. for a toc entry, or , {table}, or {figure} # for a lot, or lof extension (must match the first argument), and # the third is the name of the entry. The position in the document represents # and anchor that must be built to provide the linkage from the entry. $extension = &missing_braces unless ( ($data =~ s/$next_pair_pr_rx/$extension=$2;''/eo) ||($data =~ s/$next_pair_rx/$extension=$2;''/eo)); $unit = &missing_braces unless ( ($data =~ s/$next_pair_pr_rx/$unit=$2;''/eo) ||($data =~ s/$next_pair_rx/$unit=$2;''/eo)); $entry = &missing_braces unless ( ($data =~ s/$next_pair_pr_rx/$pat=$1;$entry=$2;''/eo) ||($data =~ s/$next_pair_rx/$pat=$1;$entry=$2;''/eo)); $contents_entry = &make_contents_entry($extension,$pat,$entry,$unit); return ($contents_entry . $data); } # Creates and saves a contents entry (toc, lot, lof) to strings for later use, # and returns the entry to be inserted into the stream. # sub make_contents_entry { local($extension,$br_id, $str, $unit) = @_; my $words = ''; my ($thisref); # If TITLE is not yet available use $before. $TITLE = $saved_title if (($saved_title)&&(!($TITLE)||($TITLE eq $default_title))); $TITLE = $before unless $TITLE; # Save the reference if ($SHOW_SECTION_NUMBERS) { $words = &get_first_words($TITLE, 1); } else { $words = &get_first_words($TITLE, 4); } $words = 'no title' unless $words; # # any \label in the $str will have already # created a label where the \addcontentsline occurred. # This has to be removed, so that the desired label # will be found on the toc page. # if ($str =~ /tex2html_anchor_mark/ ) { $str =~ s/><\/A>]+)>$cross_ref_mark/ do { ($label,$id) = ($1,$2); $ref_label = $external_labels{$label} unless ($ref_label = $ref_files{$label}); '"' . "$ref_label#$label" . '">' . &get_ref_mark($label,$id)} /geo; } $str =~ s/<\#[^\#>]*\#>//go; #RRM # recognise \char combinations, for a \backslash # $str =~ s/\&\#;\'134/\\/g; # restore \\s $str =~ s/\&\#;\`
/\\/g; # ditto $str =~ s/\&\#;*SPMquot;92/\\/g; # ditto $thisref = &make_named_href('',"$CURRENT_FILE#$br_id",$str); $thisref =~ s/\n//g; # Now we build the actual entry that will go in the lot and lof. # If this is the first entry, we have to put a leading newline. if ($unit eq 'table' ) { if (!$table_captions) { $table_captions = "\n";} $table_captions .= "
  • $thisref\n"; } elsif ($unit eq 'figure') { if (!$figure_captions) { $figure_captions = "\n"; } $figure_captions .= "
  • $thisref\n"; } "$anchor_invisible_mark<\/A>"; } # This is needed to keep latex2html from trying to make an image for the registered # trademark symbol (R). This wraps the command in a deferred wrapper so it can be # processed as a normal command later on. If this subroutine is not put in latex2html # invokes latex to create an image for the symbol, which looks bad. sub wrap_cmd_textregistered { local($cmd, $_) = @_; (&make_deferred_wrapper(1).$cmd.&make_deferred_wrapper(0),$_) } # KEC # Copied from latex2html.pl and modified to create a file of image translations. # The problem is that latex2html creates new image filenames like imgXXX.png, where # XXX is a number sequentially assigned. This is fine but makes for very unfriendly # image filenames. I looked into changing this behavior and it seems very much embedded # into the latex2html code, not easy to change without risking breaking something. # So I'm taking the approach here to write out a file of image filename translations, # to reference the original filenames from the new filenames. THis was post-processing # can be done outside of latex2html to rename the files and substitute the meaningful # image names in the html code generated by latex2html. This post-processing is done # by a program external to latex2html. # # What we do is this: This subroutine is called to output images.tex, a tex file passed to # latex to convert the original images to .ps. The string $latex_body contains info for # each image file, in the form of a unique id and the orininal filename. We extract both, use # the id is used to look up the new filename in the %id_map hash. The new and old filenames # are output into the file 'filename_translations' separated by \001. # sub make_image_file { do { print "\nWriting image file ...\n"; open(ENV,">.$dd${PREFIX}images.tex") || die "\nCannot write '${PREFIX}images.tex': $!\n"; print ENV &make_latex($latex_body); print ENV "\n"; close ENV; ©_file($FILE, "bbl"); ©_file($FILE, "aux"); } if ((%latex_body) && ($latex_body =~ /newpage/)); } # KEC # Copied from latex2html.pl and modified to create a file of image translations. # The problem is that latex2html creates new image filenames like imgXXX.png, where # XXX is a number sequentially assigned. This is fine but makes for very unfriendly # image filenames. I looked into changing this behavior and it seems very much embedded # into the latex2html code, not easy to change without risking breaking something. # So I'm taking the approach here to write out a file of image filename translations, # to reference the original filenames from the new filenames. THis post-processing # can be done outside of latex2html to rename the files and substitute the meaningful # image names in the html code generated by latex2html. This post-processing is done # by a program external to latex2html. # # What we do is this: This subroutine is called to output process images. Code has been inserted # about 100 lines below this to create the list of filenames to translate. See comments there for # details. # # Generate images for unknown environments, equations etc, and replace # the markers in the main text with them. # - $cached_env_img maps encoded contents to image URL's # - $id_map maps $env$id to page numbers in the generated latex file and after # the images are generated, maps page numbers to image URL's # - $page_map maps page_numbers to image URL's (temporary map); # Uses global variables $id_map and $cached_env_img, # $new_page_num and $latex_body sub make_images { local($name, $contents, $raw_contents, $uucontents, $page_num, $uucontents, %page_map, $img); # It is necessary to run LaTeX this early because we need the log file # which contains information used to determine equation alignment if ( $latex_body =~ /newpage/) { print "\n"; if ($LATEX_DUMP) { # dump a pre-compiled format if (!(-f "${PREFIX}images.fmt")) { print "$INILATEX ./${PREFIX}images.tex\n" if (($DEBUG)||($VERBOSITY > 1)); print "dumping ${PREFIX}images.fmt\n" unless ( L2hos->syswait("$INILATEX ./${PREFIX}images.tex")); } local ($img_fmt) = (-f "${PREFIX}images.fmt"); if ($img_fmt) { # use the pre-compiled format print "$TEX \"&./${PREFIX}images\" ./${PREFIX}images.tex\n" if (($DEBUG)||($VERBOSITY > 1)); L2hos->syswait("$TEX \"&./${PREFIX}images\" ./${PREFIX}images.tex"); } elsif (-f "${PREFIX}images.dvi") { print "${PREFIX}images.fmt failed, proceeding anyway\n"; } else { print "${PREFIX}images.fmt failed, trying without it\n"; print "$LATEX ./${PREFIX}images.tex\n" if (($DEBUG)||($VERBOSITY > 1)); L2hos->syswait("$LATEX ./${PREFIX}images.tex"); } } else { &make_latex_images() } # local($latex_call) = "$LATEX .$dd${PREFIX}images.tex"; # print "$latex_call\n" if (($DEBUG)||($VERBOSITY > 1)); # L2hos->syswait("$latex_call"); ## print "$LATEX ./${PREFIX}images.tex\n" if (($DEBUG)||($VERBOSITY > 1)); ## L2hos->syswait("$LATEX ./${PREFIX}images.tex"); ## } $LaTeXERROR = 0; &process_log_file("./${PREFIX}images.log"); # Get image size info } if ($NO_IMAGES) { my $img = "image.$IMAGE_TYPE"; my $img_path = "$LATEX2HTMLDIR${dd}icons$dd$img"; L2hos->Copy($img_path, ".$dd$img") if(-e $img_path && !-e $img); } elsif ((!$NOLATEX) && ($latex_body =~ /newpage/) && !($LaTeXERROR)) { print "\nGenerating postscript images using dvips ...\n"; &make_tmp_dir; # sets $TMPDIR and $DESTDIR $IMAGE_PREFIX =~ s/^_//o if ($TMPDIR); local($dvips_call) = "$DVIPS -S1 -i $DVIPSOPT -o$TMPDIR$dd$IMAGE_PREFIX .${dd}${PREFIX}images.dvi\n"; print $dvips_call if (($DEBUG)||($VERBOSITY > 1)); if ((($PREFIX=~/\./)||($TMPDIR=~/\./)) && not($DVIPS_SAFE)) { print " *** There is a '.' in $TMPDIR or $PREFIX filename;\n" . " dvips will fail, so image-generation is aborted ***\n"; } else { &close_dbm_database if $DJGPP; L2hos->syswait($dvips_call) && print "Error: $!\n"; &open_dbm_database if $DJGPP; } # append .ps suffix to the filenames if(opendir(DIR, $TMPDIR || '.')) { # use list-context instead; thanks De-Wei Yin my @ALL_IMAGE_FILES = grep /^$IMAGE_PREFIX\d+$/o, readdir(DIR); foreach (@ALL_IMAGE_FILES) { L2hos->Rename("$TMPDIR$dd$_", "$TMPDIR$dd$_.ps"); } closedir(DIR); } else { print "\nError: Cannot read dir '$TMPDIR': $!\n"; } } do {print "\n\n*** LaTeXERROR"; return()} if ($LaTeXERROR); return() if ($LaTeXERROR); # empty .dvi file L2hos->Unlink(".$dd${PREFIX}images.dvi") unless $DEBUG; print "\n *** updating image cache\n" if ($VERBOSITY > 1); while ( ($uucontents, $_) = each %cached_env_img) { delete $cached_env_img{$uucontents} if ((/$PREFIX$img_rx\.$IMAGE_TYPE/o)&&!($DESTDIR&&$NO_SUBDIR)); $cached_env_img{$uucontents} = $_ if (s/$PREFIX$img_rx\.new/$PREFIX$1.$IMAGE_TYPE/go); } # Modified from the original latex2html to translate image filenames to meaningful ones. # KEC 5-22-05. print "\nWriting imagename_translations file\n"; open KC,">imagename_translations" or die "Cannot open filename translation file for writing"; my ($oldname_kc,$newname_kc,$temp_kc,%done_kc); while ((undef,$temp_kc) = each %id_map) { # Here we generate the file containing the list if old and new filenames. # The old and new names are extracted from variables in scope at the time # this is run. The values of the %id_map has contain either the number of the # image file to be created (if an old image file doesn't exist) or the tag to be placed # inside the html file (if an old image file does exist). We extract the info in either # case. if ($temp_kc =~ /^\d+\#\d+$/) { my $kcname; $kcname = $orig_name_map{$temp_kc}; $kcname =~ s/\*/star/; ($oldname_kc) = $img_params{$kcname} =~ /ALT=\"\\includegraphics\{(.*?)\}/s; ($newname_kc) = split (/#/,$temp_kc); $newname_kc = "img" . $newname_kc . ".png"; } else { ($newname_kc,$oldname_kc) = $temp_kc =~ /SRC=\"(.*?)\".*ALT=\"\\includegraphics\{(.*?)\}/s; } # If this is a math-type image, $oldname_kc will be blank. Don't do anything in that case since # there is no meaningful image filename. if (!exists($done_kc{$newname_kc}) and $oldname_kc) { print KC "$newname_kc\001$oldname_kc\n"; } $done_kc{$newname_kc} = ''; } close KC; print "\n *** removing unnecessary images ***\n" if ($VERBOSITY > 1); while ( ($name, $page_num) = each %id_map) { $contents = $latex_body{$name}; if ($page_num =~ /^\d+\#\d+$/) { # If it is a page number do { # Extract the page, convert and save it $img = &extract_image($page_num,$orig_name_map{$page_num}); if ($contents =~ /$htmlimage_rx/) { $uucontents = &special_encoding($env,$2,$contents); } elsif ($contents =~ /$htmlimage_pr_rx/) { $uucontents = &special_encoding($env,$2,$contents); } else { $uucontents = &encode(&addto_encoding($contents,$contents)); } if (($HTML_VERSION >=3.2)||!($contents=~/$order_sensitive_rx/)){ $cached_env_img{$uucontents} = $img; } else { # Blow it away so it is not saved for next time delete $cached_env_img{$uucontents}; print "\nimage $name not recycled, contents may change (e.g. numbering)"; } $page_map{$page_num} = $img; } unless ($img = $page_map{$page_num}); # unless we've just done it $id_map{$name} = $img; } else { $img = $page_num; # it is already available from previous runs } print STDOUT " *** image done ***\n" if ($VERBOSITY > 2); } &write_warnings( "\nOne of the images is more than one page long.\n". "This may cause the rest of the images to get out of sync.\n\n") if (-f sprintf("%s%.3d%s", $IMAGE_PREFIX, ++$new_page_num, ".ps")); print "\n *** no more images ***\n" if ($VERBOSITY > 1); # MRO: The following cleanup seems to be incorrect: The DBM is # still open at this stage, this causes a lot of unlink errors # #do { &cleanup; print "\n *** clean ***\n" if ($VERBOSITY > 1);} # unless $DJGPP; } ## KEC: Copied &text_cleanup here to modify it. It was filtering out double # dashes such as {-}{-}sysconfig. This would be used as an illustration # of a command-line arguement. It was being changed to a single dash. # This routine must be called once on the text only, # else it will "eat up" sensitive constructs. sub text_cleanup { # MRO: replaced $* with /m s/(\s*\n){3,}/\n\n/gom; # Replace consecutive blank lines with one s/<(\/?)P>\s*(\w)/<$1P>\n$2/gom; # clean up paragraph starts and ends s/$O\d+$C//go; # Get rid of bracket id's s/$OP\d+$CP//go; # Get rid of processed bracket id's # KEC: This is the line causing trouble... #s/()?/(length($1) || length($2)) ? "$1--$2" : "-"/ge; s/()?/(length($1) || length($2)) ? "$1--$2" : $&/ge; # Spacing commands s/\\( |$)/ /go; #JKR: There should be no more comments in the source now. #s/([^\\]?)%/$1/go; # Remove the comment character # Cannot treat \, as a command because , is a delimiter ... s/\\,/ /go; # Replace tilde's with non-breaking spaces s/ *~/ /g; ### DANGEROUS ?? ### # remove redundant (not

    ) empty tags, incl. with attributes s/\n?<([^PD >][^>]*)>\s*<\/\1>//g; s/\n?<([^PD >][^>]*)>\s*<\/\1>//g; # remove redundant empty tags (not

    or or ) s/<\/(TT|[^PTH][A-Z]+)><\1>//g; s/<([^PD ]+)(\s[^>]*)?>\n*<\/\1>//g; #JCL(jcl-hex) # Replace ^^ special chars (according to p.47 of the TeX book) # Useful when coming from the .aux file (german umlauts, etc.) s/\^\^([^0-9a-f])/chr((64+ord($1))&127)/ge; s/\^\^([0-9a-f][0-9a-f])/chr(hex($1))/ge; } 1; # Must be present as the last line.