1 # This module does multiple indices, supporting the style of the LaTex 'index'
5 # 16-Feb-2005 -- Original Creation. Karl E. Cunningham
6 # 14-Mar-2005 -- Clarified and Consolodated some of the code.
7 # Changed to smoothly handle single and multiple indices.
9 # Two LaTeX index formats are supported...
10 # --- SINGLE INDEX ---
11 # \usepackage{makeidx}
19 # --- MULTIPLE INDICES ---
21 # \usepackage{makeidx}
23 # \makeindex -- latex2html doesn't care but LaTeX does.
24 # \newindex{ref1}{ext1}{ext2}{title1}
25 # \newindex{ref2}{ext1}{ext2}{title2}
26 # \newindex{ref3}{ext1}{ext2}{title3}
27 # \index[ref1]{entry1}
28 # \index[ref1]{entry2}
29 # \index[ref3]{entry3}
30 # \index[ref2]{entry4}
32 # \index[ref3]{entry6}
40 # For the multiple-index style, each index is identified by the ref argument to \newindex, \index,
41 # and \printindex. A default index is allowed, which is indicated by omitting the optional
42 # argument. The default index does not require a \newindex command. As \index commands
43 # are encountered, their entries are stored according
44 # to the ref argument. When the \printindex command is encountered, the stored index
45 # entries for that argument are retrieved and printed. The title for each index is taken
46 # from the last argument in the \newindex command.
47 # While processing \index and \printindex commands, if no argument is given the index entries
48 # are built into a default index. The title of the default index is simply "Index".
49 # This makes the difference between single- and multiple-index processing trivial.
51 # Another method can be used by omitting the \printindex command and just using \include to
52 # pull in index files created by the makeindex program. These files will start with
53 # \begin{theindex}. This command is used to determine where to print the index. Using this
54 # approach, the indices will be output in the same order as the newindex commands were
55 # originally found (see below). Using a combination of \printindex and \include{indexfile} has not
56 # been tested and may produce undesireable results.
58 # The index data are stored in a hash for later sorting and output. As \printindex
59 # commands are handled, the order in which they were found in the tex filea is saved,
60 # associated with the ref argument to \printindex.
62 # We use the original %index hash to store the index data into. We append a \002 followed by the
63 # name of the index to isolate the entries in different indices from each other. This is necessary
64 # so that different indices can have entries with the same name. For the default index, the \002 is
65 # appended without the name.
67 # Since the index order in the output cannot be determined if the \include{indexfile}
68 # command is used, the order will be assumed from the order in which the \newindex
69 # commands were originally seen in the TeX files. This order is saved as well as the
70 # order determined from any printindex{ref} commands. If \printindex commnads are used
71 # to specify the index output, that order will be used. If the \include{idxfile} command
72 # is used, the order of the original newindex commands will be used. In this case the
73 # default index will be printed last since it doesn't have a corresponding \newindex
74 # command and its order cannot be determined. Mixing \printindex and \include{idxfile}
75 # commands in the same file is likely to produce less than satisfactory results.
78 # The hash containing index data is named %indices. It contains the following data:
81 # $ref1 => $indextitle ,
82 # $ref2 => $indextitle ,
85 # 'newcmdorder' => [ ref1, ref2, ..., * ], # asterisk indicates the position of the default index.
86 # 'printindorder' => [ ref1, ref2, ..., * ], # asterisk indicates the position of the default index.
90 # Globals to handle multiple indices.
93 # This tells the system to use up to 7 words in index entries.
97 # Handles the \newindex command. This is called if the \newindex command is
98 # encountered in the LaTex source. Gets the index ref and title from the arguments.
99 # Saves the index ref and title.
100 # Note that we are called once to handle multiple \newindex commands that are
102 sub do_cmd_newindex {
104 # The data is sent to us as fields delimited by their ID #'s. We extract the
106 foreach my $line (split("\n",$data)) {
107 my @fields = split (/(?:\<\#\d+?\#\>)+/,$line);
109 # The index name and title are the second and fourth fields in the data.
110 if ($line =~ /^</ or $line =~ /^\\newindex/) {
111 my ($indexref,$indextitle) = ($fields[1],$fields[4]);
112 $indices{'title'}{$indexref} = $indextitle;
113 push (@{$indices{'newcmdorder'}},$indexref);
119 # KEC -- Copied from makeidx.perl and modified to do multiple indices.
120 # Processes an \index entry from the LaTex file.
121 # Gets the optional argument from the index command, which is the name of the index
122 # into which to place the entry.
123 # Drops the brackets from the index_name
124 # Puts the index entry into the html stream
125 # Creates the tokenized index entry (which also saves the index entry info
128 local($pat,$idx_entry,$index_name);
129 # catches opt-arg from \index commands for index.sty
130 $index_name = &get_next_optional_argument;
131 $index_name = "" unless defined $index_name;
132 # Drop leading and trailing brackets from the index name.
133 $index_name =~ s/^\[|\]$//g;
135 $idx_entry = &missing_braces unless (
136 (s/$next_pair_pr_rx/$pat=$1;$idx_entry=$2;''/e)
137 ||(s/$next_pair_rx/$pat=$1;$idx_entry=$2;''/e));
139 if ($index_name and defined $idx_entry and
140 !defined $indices{'title'}{$index_name}) {
141 print STDERR "\nInvalid Index Name: \\index \[$index_name\]\{$idx_entry\}\n";
144 $idx_entry = &named_index_entry($pat, $idx_entry,$index_name);
148 # Creates and saves an index entry in the index hashes.
149 # Modified to do multiple indices.
150 # Creates an index_key that allows index entries to have the same characteristics but be in
151 # different indices. This index_key is the regular key with the index name appended.
152 # Save the index order for the entry in the %index_order hash.
153 sub named_index_entry {
154 local($br_id, $str, $index_name) = @_;
156 # escape the quoting etc characters
160 $* = 1; $str =~ s/\n\s*/ /g; $* = 0; # remove any newlines
161 # protect \001 occurring with images
162 $str =~ s/\001/\016/g; # 0x1 to 0xF
163 $str =~ s/\\\\/\011/g; # Double backslash -> 0xB
164 $str =~ s/\\;SPMquot;/\012/g; # \;SPMquot; -> 0xC
165 $str =~ s/;SPMquot;!/\013/g; # ;SPMquot; -> 0xD
166 $str =~ s/!/\001/g; # Exclamation point -> 0x1
167 $str =~ s/\013/!/g; # 0xD -> Exclaimation point
168 $str =~ s/;SPMquot;@/\015/g; # ;SPMquot;@ to 0xF
169 $str =~ s/@/\002/g; # At sign -> 0x2
170 $str =~ s/\015/@/g; # 0xF to At sign
171 $str =~ s/;SPMquot;\|/\017/g; # ;SMPquot;| to 0x11
172 $str =~ s/\|/\003/g; # Vertical line to 0x3
173 $str =~ s/\017/|/g; # 0x11 to vertical line
174 $str =~ s/;SPMquot;(.)/\1/g; # ;SPMquot; -> whatever the next character is
175 $str =~ s/\012/;SPMquot;/g; # 0x12 to ;SPMquot;
176 $str =~ s/\011/\\\\/g; # 0x11 to double backslash
177 local($key_part, $pageref) = split("\003", $str, 2);
179 # For any keys of the form: blablabla!blablabla, which want to be split at the
180 # exclamation point, replace the ! with a comma and a space. We don't do it
181 # that way for this index.
182 $key_part =~ s/\001/, /g;
183 local(@keys) = split("\001", $key_part);
184 # If TITLE is not yet available use $before.
185 $TITLE = $saved_title if (($saved_title)&&(!($TITLE)||($TITLE eq $default_title)));
186 $TITLE = $before unless $TITLE;
189 if ($SHOW_SECTION_NUMBERS) { $words = &make_idxnum; }
190 elsif ($SHORT_INDEX) { $words = &make_shortidxname; }
191 else { $words = &make_idxname; }
192 local($super_key) = '';
193 local($sort_key, $printable_key, $cur_key);
194 foreach $key (@keys) {
195 $key =~ s/\016/\001/g; # revert protected \001s
196 ($sort_key, $printable_key) = split("\002", $key);
199 # any \label in the printable-key will have already
200 # created a label where the \index occurred.
201 # This has to be removed, so that the desired label
202 # will be found on the Index page instead.
204 if ($printable_key =~ /tex2html_anchor_mark/ ) {
205 $printable_key =~ s/><tex2html_anchor_mark><\/A><A//g;
206 local($tmpA,$tmpB) = split("NAME=\"", $printable_key);
207 ($tmpA,$tmpB) = split("\"", $tmpB);
208 $ref_files{$tmpA}='';
209 $index_labels{$tmpA} = 1;
212 # resolve and clean-up the hyperlink index-entries
213 # so they can be saved in an index.pl file
215 if ($printable_key =~ /$cross_ref_mark/ ) {
216 local($label,$id,$ref_label);
217 # $printable_key =~ s/$cross_ref_mark#(\w+)#(\w+)>$cross_ref_mark/
218 $printable_key =~ s/$cross_ref_mark#([^#]+)#([^>]+)>$cross_ref_mark/
219 do { ($label,$id) = ($1,$2);
220 $ref_label = $external_labels{$label} unless
221 ($ref_label = $ref_files{$label});
222 '"' . "$ref_label#$label" . '">' .
223 &get_ref_mark($label,$id)}
226 $printable_key =~ s/<\#[^\#>]*\#>//go;
228 # recognise \char combinations, for a \backslash
230 $printable_key =~ s/\&\#;\'134/\\/g; # restore \\s
231 $printable_key =~ s/\&\#;\`<BR> /\\/g; # ditto
232 $printable_key =~ s/\&\#;*SPMquot;92/\\/g; # ditto
234 # $sort_key .= "@$printable_key" if !($printable_key); # RRM
235 $sort_key .= "@$printable_key" if !($sort_key); # RRM
236 $sort_key =~ tr/A-Z/a-z/;
238 $cur_key = $super_key . "\001" . $sort_key;
239 $sub_index{$super_key} .= $cur_key . "\004";
241 $cur_key = $sort_key;
244 # Append the $index_name to the current key with a \002 delimiter. This will
245 # allow the same index entry to appear in more than one index.
246 $index_key = $cur_key . "\002$index_name";
248 $index{$index_key} .= "";
252 # if there is no printable key, but one is known from
253 # a previous index-entry, then use it.
255 if (!($printable_key) && ($printable_key{$index_key}))
256 { $printable_key = $printable_key{$index_key}; }
257 # if (!($printable_key) && ($printable_key{$cur_key}))
258 # { $printable_key = $printable_key{$cur_key}; }
260 # do not overwrite the printable_key if it contains an anchor
262 if (!($printable_key{$index_key} =~ /tex2html_anchor_mark/ ))
263 { $printable_key{$index_key} = $printable_key || $key; }
264 # if (!($printable_key{$cur_key} =~ /tex2html_anchor_mark/ ))
265 # { $printable_key{$cur_key} = $printable_key || $key; }
267 $super_key = $cur_key;
271 # page-ranges, from |( and |) and |see
274 if ($pageref eq "\(" ) {
277 } elsif ($pageref eq "\)" ) {
279 local($next) = $index{$index_key};
280 # local($next) = $index{$cur_key};
281 # $next =~ s/[\|] *$//;
282 $next =~ s/(\n )?\| $//;
283 $index{$index_key} = "$next to ";
284 # $index{$cur_key} = "$next to ";
289 $pageref =~ s/\s*$//g; # remove trailing spaces
290 if (!$pageref) { $pageref = ' ' }
291 $pageref =~ s/see/<i>see <\/i> /g;
294 # check if $pageref corresponds to a style command.
295 # If so, apply it to the $words.
297 local($tmp) = "do_cmd_$pageref";
299 $words = &$tmp("<#0#>$words<#0#>");
300 $words =~ s/<\#[^\#]*\#>//go;
306 # any \label in the pageref section will have already
307 # created a label where the \index occurred.
308 # This has to be removed, so that the desired label
309 # will be found on the Index page instead.
312 if ($pageref =~ /tex2html_anchor_mark/ ) {
313 $pageref =~ s/><tex2html_anchor_mark><\/A><A//g;
314 local($tmpA,$tmpB) = split("NAME=\"", $pageref);
315 ($tmpA,$tmpB) = split("\"", $tmpB);
316 $ref_files{$tmpA}='';
317 $index_labels{$tmpA} = 1;
320 # resolve and clean-up any hyperlinks in the page-ref,
321 # so they can be saved in an index.pl file
323 if ($pageref =~ /$cross_ref_mark/ ) {
324 local($label,$id,$ref_label);
325 # $pageref =~ s/$cross_ref_mark#(\w+)#(\w+)>$cross_ref_mark/
326 $pageref =~ s/$cross_ref_mark#([^#]+)#([^>]+)>$cross_ref_mark/
327 do { ($label,$id) = ($1,$2);
328 $ref_files{$label} = ''; # ???? RRM
329 if ($index_labels{$label}) { $ref_label = ''; }
330 else { $ref_label = $external_labels{$label}
331 unless ($ref_label = $ref_files{$label});
333 '"' . "$ref_label#$label" . '">' . &get_ref_mark($label,$id)}/geo;
335 $pageref =~ s/<\#[^\#>]*\#>//go;
337 if ($pageref eq ' ') { $index{$index_key}='@'; }
338 else { $index{$index_key} .= $pageref . "\n | "; }
340 local($thisref) = &make_named_href('',"$CURRENT_FILE#$br_id",$words);
342 $index{$index_key} .= $thisref."\n | ";
344 #print "\nREF: $sort_key : $index_key :$index{$index_key}";
346 #join('',"<A NAME=$br_id>$anchor_invisible_mark<\/A>",$_);
348 "<A NAME=\"$br_id\">$anchor_invisible_mark<\/A>";
352 # KEC. -- Copied from makeidx.perl, then modified to do multiple indices.
353 # Feeds the index entries to the output. This is called for each index to be built.
355 # Generates a list of lookup keys for index entries, from both %printable_keys
357 # Sorts the keys according to index-sorting rules.
358 # Removes keys with a 0x01 token. (duplicates?)
359 # Builds a string to go to the index file.
360 # Adds the index entries to the string if they belong in this index.
361 # Keeps track of which index is being worked on, so only the proper entries
363 # Places the index just built in to the output at the proper place.
364 { my $index_number = 0;
366 print "\nDoing the index ... Index Number $index_number\n";
367 local($key, @keys, $next, $index, $old_key, $old_html);
368 my ($idx_ref,$keyref);
369 # RRM, 15.6.96: index constructed from %printable_key, not %index
370 @keys = keys %printable_key;
372 while (/$idx_mark/) {
373 # Get the index reference from what follows the $idx_mark and
374 # remove it from the string.
375 s/$idxmark\002(.*?)\002/$idxmark/;
378 # include non- makeidx index-entries
379 foreach $key (keys %index) {
380 next if $printable_key{$key};
382 if ($key =~ s/###(.*)$//) {
383 next if $printable_key{$key};
385 $printable_key{$key} = $key;
386 if ($index{$old_key} =~ /HREF="([^"]*)"/i) {
388 $old_html =~ /$dd?([^#\Q$dd\E]*)#/;
390 } else { $old_html = '' }
391 $index{$key} = $index{$old_key} . $old_html."</A>\n | ";
394 @keys = sort makeidx_keysort @keys;
395 @keys = grep(!/\001/, @keys);
397 foreach $key (@keys) {
398 my ($keyref) = $key =~ /.*\002(.*)/;
399 next unless ($idx_ref eq $keyref); # KEC.
400 $index .= &add_idx_key($key);
403 print "$cnt Index Entries Added\n";
404 $index = '<DD>'.$index unless ($index =~ /^\s*<D(D|T)>/);
405 $index_number++; # KEC.
407 print "(compact version with Legend)";
408 local($num) = ( $index =~ s/\<D/<D/g );
410 s/$idx_mark/$preindex<HR><DL>\n$index\n<\/DL>$preindex/o;
412 s/$idx_mark/$preindex<HR><DL>\n$index\n<\/DL>/o;
415 s/$idx_mark/<DL COMPACT>\n$index\n<\/DL>/o; }
420 # KEC. Copied from latex2html.pl and modified to support multiple indices.
421 # The bibliography and the index should be treated as separate sections
422 # in their own HTML files. The \bibliography{} command acts as a sectioning command
423 # that has the desired effect. But when the bibliography is constructed
424 # manually using the thebibliography environment, or when using the
425 # theindex environment it is not possible to use the normal sectioning
426 # mechanism. This subroutine inserts a \bibliography{} or a dummy
427 # \textohtmlindex command just before the appropriate environments
428 # to force sectioning.
429 sub add_bbl_and_idx_dummy_commands {
430 local($id) = $global{'max_id'};
432 s/([\\]begin\s*$O\d+$C\s*thebibliography)/$bbl_cnt++; $1/eg;
433 ## if ($bbl_cnt == 1) {
434 s/([\\]begin\s*$O\d+$C\s*thebibliography)/$id++; "\\bibliography$O$id$C$O$id$C $1"/geo;
436 $global{'max_id'} = $id;
437 # KEC. Modified to global substitution to place multiple index tokens.
438 s/[\\]begin\s*($O\d+$C)\s*theindex/\\textohtmlindex$1/go;
439 # KEC. Modified to pick up the optional argument to \printindex
440 s/[\\]printindex\s*(\[.*?\])?/
441 do { (defined $1) ? "\\textohtmlindex $1" : "\\textohtmlindex []"; } /ego;
442 &lib_add_bbl_and_idx_dummy_commands() if defined(&lib_add_bbl_and_idx_dummy_commands);
445 # KEC. Copied from latex2html.pl and modified to support multiple indices.
446 # For each textohtmlindex mark found, determine the index titles and headers.
447 # We place the index ref in the header so the proper index can be generated later.
448 # For the default index, the index ref is blank.
450 # One problem is that this routine is called twice.. Once for processing the
451 # command as originally seen, and once for processing the command when
452 # doing the name for the index file. We can detect that by looking at the
453 # id numbers (or ref) surrounding the \theindex command, and not incrementing
454 # index_number unless a new id (or ref) is seen. This has the side effect of
455 # having to unconventionally start the index_number at -1. But it works.
457 # Gets the title from the list of indices.
458 # If this is the first index, save the title in $first_idx_file. This is what's referenced
459 # in the navigation buttons.
460 # Increment the index_number for next time.
461 # If the indexname command is defined or a newcommand defined for indexname, do it.
462 # Save the index TITLE in the toc
463 # Save the first_idx_file into the idxfile. This goes into the nav buttons.
464 # Build index_labels if needed.
465 # Create the index headings and put them in the output stream.
467 { my $index_number = 0; # Will be incremented before use.
468 my $first_idx_file; # Static
469 my $no_increment = 0;
471 sub do_cmd_textohtmlindex {
473 my ($idxref,$idxnum,$index_name);
475 # We get called from make_name with the first argument = "\001noincrement". This is a sign
476 # to not increment $index_number the next time we are called. We get called twice, once
477 # my make_name and once by process_command. Unfortunately, make_name calls us just to set the name
478 # but doesn't use the result so we get called a second time by process_command. This works fine
479 # except for cases where there are multiple indices except if they aren't named, which is the case
480 # when the index is inserted by an include command in latex. In these cases we are only able to use
481 # the index number to decide which index to draw from, and we don't know how to increment that index
482 # number if we get called a variable number of times for the same index, as is the case between
483 # making html (one output file) and web (multiple output files) output formats.
484 if (/\001noincrement/) {
489 # Remove (but save) the index reference
490 s/^\s*\[(.*?)\]/{$idxref = $1; "";}/e;
492 # If we have an $idxref, the index name was specified. In this case, we have all the
493 # information we need to carry on. Otherwise, we need to get the idxref
494 # from the $index_number and set the name to "Index".
496 $index_name = $indices{'title'}{$idxref};
498 if (defined ($idxref = $indices{'newcmdorder'}->[$index_number])) {
499 $index_name = $indices{'title'}{$idxref};
502 $index_name = "Index";
506 $idx_title = "Index"; # The name displayed in the nav bar text.
508 # Only set $idxfile if we are at the first index. This will point the
509 # navigation panel to the first index file rather than the last.
510 $first_idx_file = $CURRENT_FILE if ($index_number == 0);
511 $idxfile = $first_idx_file; # Pointer for the Index button in the nav bar.
512 $toc_sec_title = $index_name; # Index link text in the toc.
513 $TITLE = $toc_sec_title; # Title for this index, from which its filename is built.
514 if (%index_labels) { &make_index_labels(); }
515 if (($SHORT_INDEX) && (%index_segment)) { &make_preindex(); }
516 else { $preindex = ''; }
517 local $idx_head = $section_headings{'textohtmlindex'};
518 local($heading) = join(''
519 , &make_section_heading($TITLE, $idx_head)
520 , $idx_mark, "\002", $idxref, "\002" );
521 local($pre,$post) = &minimize_open_tags($heading);
522 $index_number++ unless ($no_increment);
524 join('',"<BR>\n" , $pre, $_);
528 # Returns an index key, given the key passed as the first argument.
529 # Not modified for multiple indices.
532 local($index, $next);
533 if (($index{$key} eq '@' )&&(!($index_printed{$key}))) {
534 if ($SHORT_INDEX) { $index .= "<DD><BR>\n<DT>".&print_key."\n<DD>"; }
535 else { $index .= "<DT><DD><BR>\n<DT>".&print_key."\n<DD>"; }
536 } elsif (($index{$key})&&(!($index_printed{$key}))) {
538 $next = "<DD>".&print_key."\n : ". &print_idx_links;
540 $next = "<DT>".&print_key."\n<DD>". &print_idx_links;
542 $index .= $next."\n";
543 $index_printed{$key} = 1;
546 if ($sub_index{$key}) {
547 local($subkey, @subkeys, $subnext, $subindex);
548 @subkeys = sort(split("\004", $sub_index{$key}));
550 $index .= "<DD>".&print_key unless $index_printed{$key};
553 $index .= "<DT>".&print_key."\n<DD>" unless $index_printed{$key};
554 $index .= "<DL COMPACT>\n";
556 foreach $subkey (@subkeys) {
557 $index .= &add_sub_idx_key($subkey) unless ($index_printed{$subkey});
564 1; # Must be present as the last line.