2 # Finds potential problems in tex files, and issues warnings to the console
3 # about what it finds. Takes a list of files as its only arguments,
4 # and does checks on all the files listed. The assumption is that these are
5 # valid (or close to valid) LaTeX files. It follows \include statements
6 # recursively to pick up any included tex files.
10 # Currently the following checks are made:
12 # -- Multiple hyphens not inside a verbatim environment (or \verb). These
13 # should be placed inside a \verb{} contruct so they will not be converted
14 # to single hyphen by latex and latex2html.
17 # Original creation 3-8-05 by Karl Cunningham karlc -at- keckec -dot- com
23 # The following builds the test string to identify and change multiple
24 # hyphens in the tex files. Several constructs are identified but only
25 # multiple hyphens are changed; the others are fed to the output
27 my $b = '\\\\begin\\*?\\s*\\{\\s*'; # \begin{
28 my $e = '\\\\end\\*?\\s*\\{\\s*'; # \end{
29 my $c = '\\s*\\}'; # closing curly brace
31 # This captures entire verbatim environments. These are passed to the output
33 my $verbatimenv = $b . "verbatim" . $c . ".*?" . $e . "verbatim" . $c;
35 # This captures \verb{..{ constructs. They are passed to the output unchanged.
36 my $verb = '\\\\verb\\*?(.).*?\\1';
38 # This captures multiple hyphens with a leading and trailing space. These are not changed.
39 my $hyphsp = '\\s\\-{2,}\\s';
41 # This identifies other multiple hyphens.
42 my $hyphens = '\\-{2,}';
44 # This identifies \hyperpage{..} commands, which should be ignored.
45 my $hyperpage = '\\\\hyperpage\\*?\\{.*?\\}';
47 # This builds the actual test string from the above strings.
48 #my $teststr = "$verbatimenv|$verb|$tocentry|$hyphens";
49 my $teststr = "$verbatimenv|$verb|$hyphsp|$hyperpage|$hyphens";
53 # Get a list of include files from the top-level tex file. The first
54 # argument is a pointer to the list of files found. The rest of the
55 # arguments is a list of filenames to check for includes.
57 my ($fileline,$includefile,$includes);
59 while (my $filename = shift) {
60 # Get a list of all the html files in the directory.
61 open my $if,"<$filename" or die "Cannot open input file $filename\n";
67 # If a file is found in an include, process it.
68 if (($includefile) = /\\include\s*\{(.*?)\}/) {
70 # Append .tex to the filename
71 $includefile .= '.tex';
73 # If the include file has already been processed, issue a warning
74 # and don't do it again.
77 if ($_ eq $includefile) {
83 print "$includefile found at line $fileline in $filename was previously included\n";
85 # The file has not been previously found. Save it and
86 # recursively process it.
87 push (@$files,$includefile);
88 get_includes($files,$includefile);
99 my ($filedata,$this,$linecnt,$before);
101 # Build the test string to check for the various environments.
102 # We only do the conversion if the multiple hyphens are outside of a
103 # verbatim environment (either \begin{verbatim}...\end{verbatim} or
104 # \verb{--}). Capture those environments and pass them to the output
107 foreach my $file (@files) {
108 # Open the file and load the whole thing into $filedata. A bit wasteful but
109 # easier to deal with, and we don't have a problem with speed here.
111 open IF,"<$file" or die "Cannot open input file $file";
117 # Set up to process the file data.
120 # Go through the file data from beginning to end. For each match, save what
121 # came before it and what matched. $filedata now becomes only what came
123 # Chech the match to see if it starts with a multiple-hyphen. If so
124 # warn the user. Keep track of line numbers so they can be output
125 # with the warning message.
126 while ($filedata =~ /$teststr/os) {
130 $linecnt += $before =~ tr/\n/\n/;
132 # Check if the multiple hyphen is present outside of one of the
133 # acceptable constructs.
134 if ($this =~ /^\-+/) {
135 print "Possible unwanted multiple hyphen found in line ",
136 "$linecnt of file $file\n";
138 $linecnt += $this =~ tr/\n/\n/;
142 ##################################################################
144 ##################################################################
148 # Examine the file pointed to by the first argument to get a list of
150 get_includes(\@includes,@ARGV);
152 check_hyphens(@includes);