1 # This file contains subroutines for use by the latex2html system.
2 # This file is executed due to a \usepackage{bacula} statement
3 # in the LaTeX source. The subroutines here impliment functionality
4 # specific to the generation of html manuals for the Bacula project.
5 # Some of the added functionality is designed to extend the capabiltites
6 # of latex2html and some is to change its behavior.
9 # Returns the minimum of any number of numeric arguments.
12 while ($test = shift) {
13 $tmp = $test if ($test < $tmp);
18 # These two are copied from
19 # /usr/lib/latex2html/style/hthtml.perl,
20 # from the subroutine do_cmd_htmladdnormallink.
21 # They have been renamed, then removed the
22 # name argument and reversed the other two arguments.
25 local($text, $url, $href);
26 local($opt, $dummy) = &get_next_optional_argument;
27 $text = &missing_braces unless
28 ((s/$next_pair_pr_rx/$text = $2; ''/eo)
29 ||(s/$next_pair_rx/$text = $2; ''/eo));
30 $url = &missing_braces unless
31 ((s/$next_pair_pr_rx/$url = $2; ''/eo)
32 ||(s/$next_pair_rx/$url = $2; ''/eo));
33 $*=1; s/^\s+/\n/; $*=0;
34 $href = &make_href($url,$text);
35 print "\nHREF:$href" if ($VERBOSITY > 3);
42 local($opt, $dummy) = &get_next_optional_argument;
43 $text = &missing_braces unless
44 ((s/$next_pair_pr_rx/$text = $2; ''/eo)
45 ||(s/$next_pair_rx/$text = $2; ''/eo));
46 &process_ref($cross_ref_mark,$cross_ref_mark,$text);
49 sub do_cmd_lt { join('',"\<",$_[0]); }
50 sub do_cmd_gt { join('',"\>",$_[0]); }
52 # KEC Copied from latex2html.pl and modified to prevent
53 # filename collisions. This is done with a static hash of
54 # already-used filenames. An integer is appended to the
55 # filename if a collision would result without it.
56 # The addition of the integer is done by removing
57 # character(s) before .html if adding the integer would result
58 # in a filename longer than 32 characters. Usually just removing
59 # the character before .html would resolve the collision, but we
60 # add the integer anyway. The first integer that resolves the
62 # If a filename is desired that is 'index.html' or any case
63 # variation of that, it is changed to index_page.html,
64 # index_page1.html, etc.
67 #RRM Extended to allow customised filenames, set $CUSTOM_TITLES
68 # or long title from the section-name, set $LONG_TITLES
70 { my %used_names; # Static hash.
72 local($sec_name, $packed_curr_sec_id) = @_;
73 local($title,$making_name,$saved) = ('',1,'');
77 # This alerts the subroutine textohtmlindex not to increment its index counter on the next call.
78 &do_cmd_textohtmlindex("\001noincrement");
79 &process_command($sections_rx, $_) if /^$sections_rx/;
80 $title = &make_bacula_title($TITLE)
81 unless ((! $TITLE) || ($TITLE eq $default_title));
83 } elsif ($CUSTOM_TITLES) {
85 # This alerts the subroutine textohtmlindex not to increment its index counter on the next call.
86 &do_cmd_textohtmlindex("\001noincrement");
87 &process_command($sections_rx, $_) if /^$sections_rx/;
88 $title = &custom_title_hook($TITLE)
89 unless ((! $TITLE) || ($TITLE eq $default_title));
93 #ensure no more than 32 characters, including .html extension
94 $title =~ s/^(.{1,27}).*$/$1/;
96 $final_name = join("", ${PREFIX}, $title, $EXTN);
98 # Remove 0's from the end of $packed_curr_sec_id
99 $packed_curr_sec_id =~ s/(_0)*$//;
100 $packed_curr_sec_id =~ s/^\d+$//o; # Top level file
101 $final_name = join("",($packed_curr_sec_id ?
102 "${PREFIX}$NODE_NAME". ++$OUT_NODE : $sec_name), $EXTN);
105 # Change the name from index to index_page to avoid conflicts with
107 $final_name =~ s/^(index)\.html$/$1_Page.html/i;
109 # If the $final_name is already used, put an integer before the
110 # # .html to make it unique.
112 my $saved_name = $final_name;
113 while (exists($used_names{$final_name})) {
114 $final_name = $saved_name;
115 my ($filename,$ext) = $final_name =~ /(.*)(\..*)$/;
116 my $numlen = length(++$integer);
118 # If the filename (after adding the integer) would be longer than
119 # 32 characters, insert the integer within it.
120 if (((my $namelen = length($final_name)) + $numlen) >= 32) {
121 substr($filename,-$numlen) = $integer;
123 $filename .= $integer;
125 $final_name = $filename . $ext;
128 # Save the $final_name in the hash to mark it as being used.
129 $used_names{$final_name} = undef;
131 # Save the first name evaluated here. This is the name of the top-level html file, and
132 # can be used to produce the index.html hard link at the end.
133 $OVERALL_TITLE = $final_name if (!defined $OVERALL_TITLE);
139 sub make_bacula_title {
141 local($num_words) = $LONG_TITLES;
142 #RRM: scan twice for short words, due to the $4 overlap
143 # Cannot use \b , else words break at accented letters
144 $_ =~ s/(^|\s)\s*($GENERIC_WORDS)(\'|(\s))/$4/ig;
145 $_ =~ s/(^|\s)\s*($GENERIC_WORDS)(\'|(\s))/$4/ig;
146 #remove leading numbering, unless that's all there is.
148 if (!(/^\d+(\.\d*)*\s*$/)&&(s/^\s*(\d+(\.\d*)*)\s*/$sec_num=$1;''/e))
150 &remove_markers; s/<[^>]*>//g; #remove tags
151 #revert entities, etc. to TeX-form...
152 s/([\200-\377])/"\&#".ord($1).";"/eg;
153 $_ = &revert_to_raw_tex($_);
155 # get $LONG_TITLES number of words from what remains
156 $_ = &get_bacula_words($_, $num_words) if ($num_words);
157 # ...and cleanup accents, spaces and punctuation
158 $_ = join('', ($SHOW_SECTION_NUMBERS ? $sec_num : ''), $_);
161 s/\'s/s/ig; # Replace 's with just the s.
170 # KEC 2-21-05 Changed completely again.
172 # We take the first real words specified by $min from the string.
173 # REmove all markers and markups.
174 # Split the line into words.
175 # Determine how many words we should process.
176 # Return if no words to process.
177 # Determine lengths of the words.
178 # Reduce the length of the longest words in the list until the
179 # total length of all the words is acceptable.
180 # Put the words back together and return the result.
182 sub get_bacula_words {
183 local($_, $min) = @_;
187 my ($oalength,@lengths,$last,$thislen);
190 #no limit if $min is negative
191 $min = 1000 if ($min < 0);
194 #strip unwanted HTML constructs
195 s/<\/?(P|BR|H)[^>]*>/ /g;
196 #remove leading white space and \001 characters
199 s/(<[^>]*>(#[^#]*#)?)//ge;
201 # Split $_ into a list of words.
202 my @wrds = split /\s+|\-{3,}/;
203 $last = &min($min - 1,$#wrds);
204 return '' if ($last < 0);
206 # Get a list of word lengths up to the last word we're to process.
207 # Add one to each for the separator.
208 @lengths = map (length($_)+1,@wrds[0..$last]);
210 $thislen = $maxlen + 1; # One more than the desired max length.
213 @lengths = map (&min($_,$thislen),@lengths);
215 foreach (@lengths) {$oalength += $_;}
216 } until ($oalength <= $maxlen);
217 $words = join(" ",map (substr($wrds[$_],0,$lengths[$_]-1),0..$last));
221 sub do_cmd_htmlfilename {
224 my ($id,$filename) = $input =~ /^<#(\d+)#>(.*?)<#\d+#>/;
228 # do_cmd_addcontentsline adds support for the addcontentsline latex command. It evaluates
229 # the arguments to the addcontentsline command and determines where to put the information. Three
230 # global lists are kept: for table of contents, list of tables, and list of figures entries.
231 # Entries are saved in the lists in the order they are encountered so they can be retrieved
234 sub do_cmd_addcontentsline {
235 &do_cmd_real_addcontentsline(@_);
237 sub do_cmd_real_addcontentsline {
239 my ($extension,$pat,$unit,$entry);
241 # The data is sent to us as fields delimited by their ID #'s. Extract the
242 # fields. The first is the extension of the file to which the cross-reference
243 # would be written by LaTeX, such as {toc}, {lot} or {lof}. The second is either
244 # {section}, {subsection}, etc. for a toc entry, or , {table}, or {figure}
245 # for a lot, or lof extension (must match the first argument), and
246 # the third is the name of the entry. The position in the document represents
247 # and anchor that must be built to provide the linkage from the entry.
248 $extension = &missing_braces unless (
249 ($data =~ s/$next_pair_pr_rx/$extension=$2;''/eo)
250 ||($data =~ s/$next_pair_rx/$extension=$2;''/eo));
251 $unit = &missing_braces unless (
252 ($data =~ s/$next_pair_pr_rx/$unit=$2;''/eo)
253 ||($data =~ s/$next_pair_rx/$unit=$2;''/eo));
254 $entry = &missing_braces unless (
255 ($data =~ s/$next_pair_pr_rx/$pat=$1;$entry=$2;''/eo)
256 ||($data =~ s/$next_pair_rx/$pat=$1;$entry=$2;''/eo));
258 $contents_entry = &make_contents_entry($extension,$pat,$entry,$unit);
259 return ($contents_entry . $data);
262 # Creates and saves a contents entry (toc, lot, lof) to strings for later use,
263 # and returns the entry to be inserted into the stream.
265 sub make_contents_entry {
266 local($extension,$br_id, $str, $unit) = @_;
270 # If TITLE is not yet available use $before.
271 $TITLE = $saved_title if (($saved_title)&&(!($TITLE)||($TITLE eq $default_title)));
272 $TITLE = $before unless $TITLE;
274 if ($SHOW_SECTION_NUMBERS) {
275 $words = &get_first_words($TITLE, 1);
277 $words = &get_first_words($TITLE, 4);
279 $words = 'no title' unless $words;
282 # any \label in the $str will have already
283 # created a label where the \addcontentsline occurred.
284 # This has to be removed, so that the desired label
285 # will be found on the toc page.
287 if ($str =~ /tex2html_anchor_mark/ ) {
288 $str =~ s/><tex2html_anchor_mark><\/A><A//g;
291 # resolve and clean-up the hyperlink entries
292 # so they can be saved
294 if ($str =~ /$cross_ref_mark/ ) {
295 my ($label,$id,$ref_label);
296 $str =~ s/$cross_ref_mark#([^#]+)#([^>]+)>$cross_ref_mark/
297 do { ($label,$id) = ($1,$2);
298 $ref_label = $external_labels{$label} unless
299 ($ref_label = $ref_files{$label});
300 '"' . "$ref_label#$label" . '">' .
301 &get_ref_mark($label,$id)}
304 $str =~ s/<\#[^\#>]*\#>//go;
306 # recognise \char combinations, for a \backslash
308 $str =~ s/\&\#;\'134/\\/g; # restore \\s
309 $str =~ s/\&\#;\`<BR> /\\/g; # ditto
310 $str =~ s/\&\#;*SPMquot;92/\\/g; # ditto
312 $thisref = &make_named_href('',"$CURRENT_FILE#$br_id",$str);
315 # Now we build the actual entry that will go in the lot and lof.
316 # If this is the first entry, we have to put a leading newline.
317 if ($unit eq 'table' ) {
318 if (!$table_captions) { $table_captions = "\n";}
319 $table_captions .= "<LI>$thisref\n";
320 } elsif ($unit eq 'figure') {
321 if (!$figure_captions) { $figure_captions = "\n"; }
322 $figure_captions .= "<LI>$thisref\n";
324 "<A NAME=\"$br_id\">$anchor_invisible_mark<\/A>";
327 # This is needed to keep latex2html from trying to make an image for the registered
328 # trademark symbol (R). This wraps the command in a deferred wrapper so it can be
329 # processed as a normal command later on. If this subroutine is not put in latex2html
330 # invokes latex to create an image for the symbol, which looks bad.
331 sub wrap_cmd_textregistered {
332 local($cmd, $_) = @_;
333 (&make_deferred_wrapper(1).$cmd.&make_deferred_wrapper(0),$_)
337 # Copied from latex2html.pl and modified to create a file of image translations.
338 # The problem is that latex2html creates new image filenames like imgXXX.png, where
339 # XXX is a number sequentially assigned. This is fine but makes for very unfriendly
340 # image filenames. I looked into changing this behavior and it seems very much embedded
341 # into the latex2html code, not easy to change without risking breaking something.
342 # So I'm taking the approach here to write out a file of image filename translations,
343 # to reference the original filenames from the new filenames. THis was post-processing
344 # can be done outside of latex2html to rename the files and substitute the meaningful
345 # image names in the html code generated by latex2html. This post-processing is done
346 # by a program external to latex2html.
348 # What we do is this: This subroutine is called to output images.tex, a tex file passed to
349 # latex to convert the original images to .ps. The string $latex_body contains info for
350 # each image file, in the form of a unique id and the orininal filename. We extract both, use
351 # the id is used to look up the new filename in the %id_map hash. The new and old filenames
352 # are output into the file 'filename_translations' separated by \001.
354 sub make_image_file {
356 print "\nWriting image file ...\n";
357 open(ENV,">.$dd${PREFIX}images.tex")
358 || die "\nCannot write '${PREFIX}images.tex': $!\n";
359 print ENV &make_latex($latex_body);
362 ©_file($FILE, "bbl");
363 ©_file($FILE, "aux");
364 } if ((%latex_body) && ($latex_body =~ /newpage/));
369 # Copied from latex2html.pl and modified to create a file of image translations.
371 # The problem is that latex2html creates new image filenames like imgXXX.png, where
372 # XXX is a number sequentially assigned. This is fine but makes for very unfriendly
373 # image filenames. I looked into changing this behavior and it seems very much embedded
374 # into the latex2html code, not easy to change without risking breaking something.
375 # So I'm taking the approach here to write out a file of image filename translations,
376 # to reference the original filenames from the new filenames. THis post-processing
377 # can be done outside of latex2html to rename the files and substitute the meaningful
378 # image names in the html code generated by latex2html. This post-processing is done
379 # by a program external to latex2html.
381 # What we do is this: This subroutine is called to output process images. Code has been inserted
382 # about 100 lines below this to create the list of filenames to translate. See comments there for
386 # Generate images for unknown environments, equations etc, and replace
387 # the markers in the main text with them.
388 # - $cached_env_img maps encoded contents to image URL's
389 # - $id_map maps $env$id to page numbers in the generated latex file and after
390 # the images are generated, maps page numbers to image URL's
391 # - $page_map maps page_numbers to image URL's (temporary map);
392 # Uses global variables $id_map and $cached_env_img,
393 # $new_page_num and $latex_body
397 local($name, $contents, $raw_contents, $uucontents, $page_num,
398 $uucontents, %page_map, $img);
399 # It is necessary to run LaTeX this early because we need the log file
400 # which contains information used to determine equation alignment
401 if ( $latex_body =~ /newpage/) {
404 # dump a pre-compiled format
405 if (!(-f "${PREFIX}images.fmt")) {
406 print "$INILATEX ./${PREFIX}images.tex\n"
407 if (($DEBUG)||($VERBOSITY > 1));
408 print "dumping ${PREFIX}images.fmt\n"
409 unless ( L2hos->syswait("$INILATEX ./${PREFIX}images.tex"));
411 local ($img_fmt) = (-f "${PREFIX}images.fmt");
413 # use the pre-compiled format
414 print "$TEX \"&./${PREFIX}images\" ./${PREFIX}images.tex\n"
415 if (($DEBUG)||($VERBOSITY > 1));
416 L2hos->syswait("$TEX \"&./${PREFIX}images\" ./${PREFIX}images.tex");
417 } elsif (-f "${PREFIX}images.dvi") {
418 print "${PREFIX}images.fmt failed, proceeding anyway\n";
420 print "${PREFIX}images.fmt failed, trying without it\n";
421 print "$LATEX ./${PREFIX}images.tex\n"
422 if (($DEBUG)||($VERBOSITY > 1));
423 L2hos->syswait("$LATEX ./${PREFIX}images.tex");
425 } else { &make_latex_images() }
426 # local($latex_call) = "$LATEX .$dd${PREFIX}images.tex";
427 # print "$latex_call\n" if (($DEBUG)||($VERBOSITY > 1));
428 # L2hos->syswait("$latex_call");
429 ## print "$LATEX ./${PREFIX}images.tex\n" if (($DEBUG)||($VERBOSITY > 1));
430 ## L2hos->syswait("$LATEX ./${PREFIX}images.tex");
433 &process_log_file("./${PREFIX}images.log"); # Get image size info
436 my $img = "image.$IMAGE_TYPE";
437 my $img_path = "$LATEX2HTMLDIR${dd}icons$dd$img";
438 L2hos->Copy($img_path, ".$dd$img")
439 if(-e $img_path && !-e $img);
441 elsif ((!$NOLATEX) && ($latex_body =~ /newpage/) && !($LaTeXERROR)) {
442 print "\nGenerating postscript images using dvips ...\n";
443 &make_tmp_dir; # sets $TMPDIR and $DESTDIR
444 $IMAGE_PREFIX =~ s/^_//o if ($TMPDIR);
447 "$DVIPS -S1 -i $DVIPSOPT -o$TMPDIR$dd$IMAGE_PREFIX .${dd}${PREFIX}images.dvi\n";
448 print $dvips_call if (($DEBUG)||($VERBOSITY > 1));
450 if ((($PREFIX=~/\./)||($TMPDIR=~/\./)) && not($DVIPS_SAFE)) {
451 print " *** There is a '.' in $TMPDIR or $PREFIX filename;\n"
452 . " dvips will fail, so image-generation is aborted ***\n";
454 &close_dbm_database if $DJGPP;
455 L2hos->syswait($dvips_call) && print "Error: $!\n";
456 &open_dbm_database if $DJGPP;
459 # append .ps suffix to the filenames
460 if(opendir(DIR, $TMPDIR || '.')) {
461 # use list-context instead; thanks De-Wei Yin <yin@asc.on.ca>
462 my @ALL_IMAGE_FILES = grep /^$IMAGE_PREFIX\d+$/o, readdir(DIR);
463 foreach (@ALL_IMAGE_FILES) {
464 L2hos->Rename("$TMPDIR$dd$_", "$TMPDIR$dd$_.ps");
468 print "\nError: Cannot read dir '$TMPDIR': $!\n";
471 do {print "\n\n*** LaTeXERROR"; return()} if ($LaTeXERROR);
472 return() if ($LaTeXERROR); # empty .dvi file
473 L2hos->Unlink(".$dd${PREFIX}images.dvi") unless $DEBUG;
475 print "\n *** updating image cache\n" if ($VERBOSITY > 1);
476 while ( ($uucontents, $_) = each %cached_env_img) {
477 delete $cached_env_img{$uucontents}
478 if ((/$PREFIX$img_rx\.$IMAGE_TYPE/o)&&!($DESTDIR&&$NO_SUBDIR));
479 $cached_env_img{$uucontents} = $_
480 if (s/$PREFIX$img_rx\.new/$PREFIX$1.$IMAGE_TYPE/go);
483 # Modified from the original latex2html to translate image filenames to meaningful ones.
485 print "\nWriting imagename_translations file\n";
486 open KC,">imagename_translations" or die "Cannot open filename translation file for writing";
487 my ($oldname_kc,$newname_kc,$temp_kc,%done_kc);
488 while ((undef,$temp_kc) = each %id_map) {
489 # Here we generate the file containing the list if old and new filenames.
490 # The old and new names are extracted from variables in scope at the time
491 # this is run. The values of the %id_map has contain either the number of the
492 # image file to be created (if an old image file doesn't exist) or the tag to be placed
493 # inside the html file (if an old image file does exist). We extract the info in either
495 if ($temp_kc =~ /^\d+\#\d+$/) {
497 $kcname = $orig_name_map{$temp_kc};
498 $kcname =~ s/\*/star/;
499 ($oldname_kc) = $img_params{$kcname} =~ /ALT=\"\\includegraphics\{(.*?)\}/s;
500 ($newname_kc) = split (/#/,$temp_kc);
501 $newname_kc = "img" . $newname_kc . ".png";
503 ($newname_kc,$oldname_kc) = $temp_kc =~ /SRC=\"(.*?)\".*ALT=\"\\includegraphics\{(.*?)\}/s;
505 # If this is a math-type image, $oldname_kc will be blank. Don't do anything in that case since
506 # there is no meaningful image filename.
507 if (!exists($done_kc{$newname_kc}) and $oldname_kc) {
508 print KC "$newname_kc\001$oldname_kc\n";
510 $done_kc{$newname_kc} = '';
514 print "\n *** removing unnecessary images ***\n" if ($VERBOSITY > 1);
515 while ( ($name, $page_num) = each %id_map) {
516 $contents = $latex_body{$name};
518 if ($page_num =~ /^\d+\#\d+$/) { # If it is a page number
519 do { # Extract the page, convert and save it
520 $img = &extract_image($page_num,$orig_name_map{$page_num});
521 if ($contents =~ /$htmlimage_rx/) {
522 $uucontents = &special_encoding($env,$2,$contents);
523 } elsif ($contents =~ /$htmlimage_pr_rx/) {
524 $uucontents = &special_encoding($env,$2,$contents);
526 $uucontents = &encode(&addto_encoding($contents,$contents));
528 if (($HTML_VERSION >=3.2)||!($contents=~/$order_sensitive_rx/)){
529 $cached_env_img{$uucontents} = $img;
531 # Blow it away so it is not saved for next time
532 delete $cached_env_img{$uucontents};
533 print "\nimage $name not recycled, contents may change (e.g. numbering)";
535 $page_map{$page_num} = $img;
536 } unless ($img = $page_map{$page_num}); # unless we've just done it
537 $id_map{$name} = $img;
539 $img = $page_num; # it is already available from previous runs
541 print STDOUT " *** image done ***\n" if ($VERBOSITY > 2);
544 "\nOne of the images is more than one page long.\n".
545 "This may cause the rest of the images to get out of sync.\n\n")
546 if (-f sprintf("%s%.3d%s", $IMAGE_PREFIX, ++$new_page_num, ".ps"));
547 print "\n *** no more images ***\n" if ($VERBOSITY > 1);
548 # MRO: The following cleanup seems to be incorrect: The DBM is
549 # still open at this stage, this causes a lot of unlink errors
551 #do { &cleanup; print "\n *** clean ***\n" if ($VERBOSITY > 1);}
555 ## KEC: Copied &text_cleanup here to modify it. It was filtering out double
556 # dashes such as {-}{-}sysconfig. This would be used as an illustration
557 # of a command-line arguement. It was being changed to a single dash.
559 # This routine must be called once on the text only,
560 # else it will "eat up" sensitive constructs.
562 # MRO: replaced $* with /m
563 s/(\s*\n){3,}/\n\n/gom; # Replace consecutive blank lines with one
564 s/<(\/?)P>\s*(\w)/<$1P>\n$2/gom; # clean up paragraph starts and ends
565 s/$O\d+$C//go; # Get rid of bracket id's
566 s/$OP\d+$CP//go; # Get rid of processed bracket id's
567 # KEC: This is the line causing trouble...
568 #s/(<!)?--?(>)?/(length($1) || length($2)) ? "$1--$2" : "-"/ge;
569 s/(<!)?--?(>)?/(length($1) || length($2)) ? "$1--$2" : $&/ge;
572 #JKR: There should be no more comments in the source now.
573 #s/([^\\]?)%/$1/go; # Remove the comment character
574 # Cannot treat \, as a command because , is a delimiter ...
576 # Replace tilde's with non-breaking spaces
580 # remove redundant (not <P></P>) empty tags, incl. with attributes
581 s/\n?<([^PD >][^>]*)>\s*<\/\1>//g;
582 s/\n?<([^PD >][^>]*)>\s*<\/\1>//g;
583 # remove redundant empty tags (not </P><P> or <TD> or <TH>)
584 s/<\/(TT|[^PTH][A-Z]+)><\1>//g;
585 s/<([^PD ]+)(\s[^>]*)?>\n*<\/\1>//g;
589 # Replace ^^ special chars (according to p.47 of the TeX book)
590 # Useful when coming from the .aux file (german umlauts, etc.)
591 s/\^\^([^0-9a-f])/chr((64+ord($1))&127)/ge;
592 s/\^\^([0-9a-f][0-9a-f])/chr(hex($1))/ge;
598 1; # Must be present as the last line.