#!/usr/local/bin/perl 'di'; 'ig00'; require 'getcwd.pl'; # Comes with the Perl distribution # This variable can be set manually or by the installation script; $LATEX2HTMLDIR='/usr/local/etc/latex2html';# Inserted by installation script # LaTeX2HTML by Nikos Drakos # # **************************************************************** # LaTeX To HTML Translation ************************************** # **************************************************************** # LaTeX2HTML is a Perl program that translates LaTeX source # files into HTML (HyperText Markup Language). For each source # file given as an argument the translator will create a # directory containing the corresponding HTML files. # # The man page for this program is included at the end of this file # and can be viewed using # %nroff -man latex2html # # For more information on this program and some examples of its # capabilities see # http://cbl.leeds.ac.uk/nikos/tex2html/doc/latex2html/latex2html.html # # Written by Nikos Drakos, July 1993. # # Address: Computer Based Learning Unit # University of Leeds # Leeds, LS2 9JT # # Copyright (c) 1993. All rights reserved. # # See general license below. # # **************************************************************** # General License Agreement and Lack of Warranty ***************** # **************************************************************** # # This software is distributed in the hope that it will be useful # but WITHOUT ANY WARRANTY. The author(s) do not accept responsibility # to anyone for the consequences of using it or for whether it serves # any particular purpose or works at all. No warranty is made about # the software or its performance. # # Use and copying of this software and the preparation of derivative # works based on this software are permitted, so long as the following # conditions are met: # o The copyright notice and this entire notice are included intact # and prominently carried on all copies and supporting documentation. # o No fees or compensation are charged for use, copies, or # access to this software. You may charge a nominal # distribution fee for the physical act of transferring a # copy, but you may not charge for the program itself. # o If you modify this software, you must cause the modified # file(s) to carry prominent notices (a Change Log) # describing the changes, who made the changes, and the date # of those changes. # o Any work distributed or published that in whole or in part # contains or is a derivative of this software or any part # thereof is subject to the terms of this agreement. The # aggregation of another unrelated program with this software # or its derivative on a volume of storage or distribution # medium does not bring the other program under the scope # of these terms. # # This software is made available AS IS, and is distributed without # warranty of any kind, either expressed or implied. # # In no event will the author(s) or their institutions be liable to you # for damages, including lost profits, lost monies, or other special, # incidental or consequential damages arising out of or in connection # with the use or inability to use (including but not limited to loss of # data or data being rendered inaccurate or losses sustained by third # parties or a failure of the program to operate as documented) the # program, even if you have been advised of the possibility of such # damages, or for any claim by any other party, whether in an action of # contract, negligence, or other tortious action. # # Please send bug reports, comments, questions and suggestions to # nikos@cbl.leeds.ac.uk. We would also appreciate receiving any changes # or improvements you may make. # ############################# System Parameters ########################## # # The $TEXEXPAND, $LATEX, and $DVIPS variables # should be set in $HOME/.latex2html-init $TEXEXPAND = 'texexpand'; $LATEX = 'latex'; $DVIPS = 'dvips'; $TEX2HTMLVERSION = '0.6.4 (Tues Aug 30 1994)'; $TEX2HTMLADDRESS = "http://cbl.leeds.ac.uk/nikos/tex2html/doc/latex2html/latex2html.html"; $AUTHORADDRESS = "http://cbl.leeds.ac.uk/nikos/personal.html"; push(@INC,$ENV{'HOME'}); $| = 1; # flush stdout with every print -- gives better feedback during # long computations ######################### Command Line Argument Defaults ######################## # Destination directory $DESTDIR = '.'; # Determines the level at which it will stop splitting sections # into individual files (e.g. $MAX_SPLIT_DEPTH = 0 will generate a single # HTML document, $MAX_SPLIT_DEPTH = 1 will split individual parts, etc.) $MAX_SPLIT_DEPTH = 8; # Determines how much of the document structure to provide links to, from # each node. A value of 0 will show NO links to child nodes, a value of # 1 will show only the immediate child nodes (as in Texinfo), etc. A value # at least as big as $MAX_SPLIT_DEPTH will produce a table of contents for # the tree structure rooted at any given node. $MAX_LINK_DEPTH = 4; # Determines whether LaTeX should be invoked to process unknown environments. # It is useful to set it to 1 in order to get a quick draft of the basic # Structure of a document without any fancy figures, tables etc. $NOLATEX = 0; # Determines whether any generated images will be inlined or not. $EXTERNAL_IMAGES = 0; # Determines whether the navigation and cross-reference marks are iconic # or textual. In ascii mode the output of the translator can be used on # character based browsers which do not support inlined images # (the tag). Setting this variable also sets the $EXTERNAL_IMAGES. $ASCII_MODE = 0; # The document title. $default_title = 'No Title'; $TITLE = $default_title; #Determines whether to include navigation links. $NO_NAVIGATION = 0; #Determines whether to navigation links should be at the top or the bottom #of each page. The default is at the top. $TOP_NAVIGATION = 1; #Determines whether to navigation links should be at the top or the bottom #of each page. The default is at the top. $BOTTOM_NAVIGATION = 0; #Puts navigation links at the top of each page. If the page has more than #$WORDS_IN_PAGE it also puts one at the bottom. $AUTO_NAVIGATION = 1; $WORDS_IN_PAGE = 200; #Puts a link to the index in the navigation panel if there is one. $INDEX_IN_NAVIGATION = 1; #Puts a link to the table of contents in the navigation panel if there is one. $CONTENTS_IN_NAVIGATION = 1; #Puts a link to the next logical page in the navigation panel if there is one. #Next page will visit any subsections instead of the next section i.e. #it allows a depth first visiting order. $NEXT_PAGE_IN_NAVIGATION = 1; #Puts a link to the previous logical page in the navigation panel if there is one. $PREVIOUS_PAGE_IN_NAVIGATION = 1; # Information page $INFO = 1; #This causes a new part to be added to the document with information abouthe source file, the translator etc. # Separator between the body of text in a page and the child links $CHILDLINE = "

\n"; # Names of style-files and tex inputs that the translator should *not* look at # (because they contain macrology which is too complex for it to handle). # Commands defined by these styles and used in environments which don't # go to tex (i.e., outside of math, figures, tables, etc.) need to be # handled in some other way --- preferably by means of a &process_to_bitmap # routine analogous to &ignore (n.b. &process_to_bitmap doesn't exist yet). # For instance, the \psfig command defined by either of the following style # files is already special-cased below. $DONT_INCLUDE = ""; # Image recycling $REUSE = 0; # If 1 it Causes images to be reused # and switches off the interactive session # When this is 1, the section numbers are shown. The section numbers should # then match those that would have bee produced by LaTeX. # The correct section numbers are obtained from the $FILE.aux file generated # by LaTeX. # Hiding the seciton numbers encourages # use of particular sections as standalone documents. In this case the # cross reference to a section is shown using the default symbol rather # than the section number. $SHOW_SECTION_NUMBERS = 0; # This is the line width measured in pixels and it is used to right justify # equations and equation arrays; $LINE_WIDTH = 450; # This number will determine the size of the equations, special characters, # and anything which will be converted into an inlined image # *except* "image generating environments" such as "figure", "table" # or "minipage". # Effective values are those greater than 0. # Sensible values are between 0.1 - 4. $MATH_SCALE_FACTOR = 1.6; # This number will determine the size of # image generating environments such as "figure", "table" or "minipage". # Effective values are those greater than 0. # Sensible values are between 0.1 - 4. $FIGURE_SCALE_FACTOR = 1.6; # This affects ONLY the way accents are processed $default_language = 'english'; # No arguments!! (&usage && die "No files to process!\n") unless @ARGV; # Set $HOME to the environment variable just in case tries to use it! $HOME = $ENV{'HOME'}; # Author address @address_data = &address_data; # Read latex2html.config require("$LATEX2HTMLDIR/latex2html.config") if ((-f "$LATEX2HTMLDIR/latex2html.config") || die "LaTeX2HTML has not been installed correctly:". "\nCould not find file $LATEX2HTMLDIR/latex2html.config\n"); # Read .late2html-init file if one is found require("$ENV{'HOME'}/.latex2html-init") if (-f "$ENV{'HOME'}/.latex2html-init"); # Read .late2html-init file if one is found in current directory require("./.latex2html-init") if ( (! (&getcwd eq $ENV{'HOME'} )) && (-f "./.latex2html-init")); $ADDRESS = "$address_data[0]\n$address_data[1]<\/EM>" unless $ADDRESS; # Process switches $argv = join(' ',@ARGV); # Save the command line arguments while ($ARGV[0] =~ /^-/) { $_ = shift; if (/^-split$/) { $_ = shift; ((($MAX_SPLIT_DEPTH) = /^(\d+)$/) || print("Unrecognised value for -split: $_\n") && &usage && die); $NO_NAVIGATION = 1 unless $MAX_SPLIT_DEPTH; } elsif (/^-link$/) { $_ = shift; ((($MAX_LINK_DEPTH) = /^(\d+)$/) || print("Unrecognised value for -link: $_\n") && &usage && die); } elsif (/^-nolatex$/) { $NOLATEX = 1; } elsif (/^-external_images$/) { $EXTERNAL_IMAGES = 1; } elsif (/^-ascii_mode$/) { $ASCII_MODE = 1; $EXTERNAL_IMAGES = 1; } elsif (/^-no_navigation$/) { $NO_NAVIGATION = 1; } elsif (/^-top_navigation$/) { $TOP_NAVIGATION = 1; } elsif (/^-bottom_navigation$/) { $BOTTOM_NAVIGATION = 1; } elsif (/^-auto_navigation$/) { $AUTO_NAVIGATION = 1; } elsif (/^-index_in_navigation$/) { $INDEX_IN_NAVIGATION = 1; } elsif (/^-contents_in_navigation$/) { $CONTENTS_IN_NAVIGATION = 1; } elsif (/^-next_page_in_navigation$/) { $NEXT_PAGE_IN_NAVIGATION = 1; } elsif (/^-previous_page_in_navigation$/) { $PREVIOUS_PAGE_IN_NAVIGATION = 1; } elsif (/^-t$/) { $_ = shift; ((($TITLE) = /^(.+)$/) || print("No title for -t? $_\n") && &usage && die); } elsif (/^-dir$/) { $_ = shift; ((($DESTDIR) = /^(.+)$/) || print("No directory for -d? $_\n") && &usage && die); ($DESTDIR) = &get_full_path($DESTDIR); } elsif (/^-address$/) { $_ = shift; ((($ADDRESS) = /^(.+)$/) || print("No address for -address? $_\n") && &usage && die); } elsif (/^-info$/) { $_ = shift; ((($INFO) = /^(.+)$/) || print("No string for -info: Will not generate information page.\n") ); } elsif (/^-dont_include/) { $DONT_INCLUDE .= ':' . shift; } elsif (/^-reuse/) { $REUSE = 1; } elsif (/^-show_section_numbers/) { $SHOW_SECTION_NUMBERS = 1; } elsif (/^-init_file/) { $init_file = shift; require($init_file) if (-f $init_file); } elsif (/^-h(elp)?$/) { &usage; } else { &usage; die "Unrecognised switch: $_\n"; } } &driver; # Process each file ... sub driver { local($FILE, $texfilepath, $orig_cwd, %unknown_commands); $orig_cwd = &getcwd; &initialise; # Initialise some global variables &ascii_mode if $ASCII_MODE; # Must come after initialization foreach $FILE (@ARGV) { local($max_id) = (0); local($max_page_num) = (0); # For pages passed to latex local($pid, $input, $preamble, $sections_rx, $sections_no_delim_rx, @verbatim, @verb, @verb_delim, $verb_counter, $verbatim_counter, $outermost_section, %cached_env_img, %id_map, %latex_body, $latex_body, $warnings, %symbolic_labels, %latex_labels, %encoded_section_number, %encoded_figure_number, %encoded_table_number); local (%new_command, %new_environment); # name-code associations ($texfilepath, $FILE) = &get_full_path($FILE); if (-f "$texfilepath/$FILE") { print "This is LaTeX2HTML Version $TEX2HTMLVERSION by Nikos Drakos, \nComputer Based Learning Unit, University of Leeds.\n\n"; # Tell texexpand which files we *don't* want to look at. $ENV{'TEXE_DONT_INCLUDE'} = $DONT_INCLUDE; $FILE =~ s/.tex$//; $DESTDIR = $DESTDIR . "/". $FILE; print "OPENING $texfilepath/$FILE.tex \n"; next unless &new_dir($DESTDIR); &deal_with_texinputs($texfilepath, $DESTDIR); # This needs $DESTDIR to have been created ... system("$TEXEXPAND -auto_exclude -save_styles $DESTDIR/$$_styles $texfilepath/$FILE.tex > $DESTDIR/$$_$FILE") && print "Error: $!\n"; chdir($DESTDIR) || die "$!\n"; open(INPUT,"$$_$FILE"); &load_style_file_translations; &make_language_rx; &make_raw_arg_cmd_rx; print "\nReading ..."; &slurp_input; &pre_process; # and pre_process \verb and \verbatim $preamble = &make_preamble; # Handle newcommand, newenvironment ... &substitute_meta_cmds if /$meta_cmd_rx/; # Create a regular expressions &set_depth_levels; &make_sections_rx; &make_order_sensitive_rx; &add_document_info_page if $INFO; &add_bbl_and_idx_dummy_commands; &translate; # Destructive! &cleanup; print "\n\n*********** WARNINGS *********** \n$warnings" if $warnings; &image_message if ($warnings =~ /Failed to convert/io); chdir($orig_cwd);# Go back to the source directory } else { print "Cannot read $texfilepath/$FILE \n";} } print "\nUnknown commands: ". join(" ",keys %unknown_commands) if %unknown_commands; print "\nDone.\n"; $_; } # Reads in a latex generated file (e.g. .bbl or .aux) # It returns success or failure # ****** and binds $_ in the caller as a side-effect ****** sub process_ext_file { local($ext) = @_; local($found) = 0; print "\nReading $FILE.$ext ..."; local($file) = &fulltexpath("$FILE.$ext"); $warnings .= "\n$FILE.tex is newer than $FILE.$ext: Please rerun latex" . (($ext =~ /bbl/) ? " and bibtex.\n" : ".\n") if ( ($found = (-f $file)) && &newer(&fulltexpath("$FILE.tex"), $file)); if ( $found && (open(INPUT, "<$file"))) { &slurp_input; &pre_process; &substitute_meta_cmds if /$meta_cmd_rx/; &wrap_shorthand_environments; $_ = &translate_commands(&translate_environments($_)); }; $found; } sub deal_with_texinputs { local($source, $dest) = @_; $ENV{'TEXINPUTS'} = join(":", $ENV{'TEXINPUTS'}, $source, ".",":"); } sub add_document_info_page { # Uses $outermost_level local($X,$Y) = ($max_id++, $max_id++); $_ = join('', $_, "\\$outermost_level$O$X$C $O$Y$C\\em About this document ...$O$Y$C $O$X$C \n \\textohtml_info_page"); } # For each style file name in $$_styles (generated by texexpand) look for a # perl file in $LATEX2HTMLDIR/styles and load it. sub load_style_file_translations { local($_, $file); open(STYLES, "<$$_styles"); while() { s/\s//g; $file = $_; if (-f ($_ = "$texfilepath/$file.perl")) { print "\nLoading $_...\n"; require ($_)} elsif (-f ($_ = "$LATEX2HTMLSTYLES/$file.perl")) { print "\nLoading $_...\n"; require($_)} } close(STYLES); } ################## Weird Special case ################## # The new texexpand can be told to leave in \input and \include # commands which contain code that the translator should simply pass # to latex, such as the psfig stuff. These should still be seen by # TeX, so we add them to the preamble ... sub do_include_lines { while (s/$include_line_rx//o) { local($include_line) = &revert_to_raw_tex($&); &add_to_preamble ('include', $include_line); } } ########################## Preprocessing ############################ # The \verb declaration and the verbatim environment contain simulated # typed text and should not be processed. Characters such as $,\,{,and } # loose their special meanings and should not be considered when marking # brackets etc. To achieve this \verb declarations and the contents of # verbatim environments are replaced by markers. At the end the original # text is put back into the document. # The markers for verb and verbatim are different so that these commands # can be restored to what the raw input was just in case they need to # be passed to latex. sub pre_process { # Modifies $_; &replace_html_special_chars; s/\\\\/\\\\ /go; # Makes it unnecessary to look for escaped cmds local($next, $esc_del); &normalize_language_changes; while (/\\begin$verbatim_env_rx/o) { local($before, $contents, $after, $env); ($before, $after, $env) = ($`, $', $1); if ($after =~ /\s*\\end{$env[*]?}/) { # Must NOT use the s///o option!!! ($contents, $after) = ($`, $'); $contents =~ s/^\n*//; $verbatim[++$verbatim_counter] = $contents; $after = join("",$verbatim_mark,$env,$verbatim_counter,$after);} else { print "Cannot find \\end{$env}\n"; } $_ = join("",$before,$after); } # Now do the \verb declarations while ((($del) = /\\verb[*]?(.)/)) { $esc_del = &escape_rx_chars($del); s/\\verb[*]?[$esc_del]([^$esc_del]*)[$esc_del]/$verb[++$verb_counter]=$1; $verb_delim[$verb_counter]=$del;join('',$verb_mark,$verb_counter)/e; } s/^%.*\n|([^\\])%.*\n/\1/go; # Remove Comments &mark_string; $_; } #################### Marking Matching Brackets ###################### # Reads the entire input file into a # single string. sub slurp_input { local($result_str); while () { $result_str .= $_;}; close(INPUT); $_ = $result_str; undef $result_str; $_; } sub special { ($x) = @_; $y= $html_specials{$x}; ($y ? $y : $x)} sub special_inv { ($x) = @_; $y= $html_specials_inv{$x}; ($y ? $y : $x)} # Mark each matching opening and closing bracket with a unique id. sub mark_string { # Modifies $_ in the caller; s/^\\{|([^\\])\\{/\1tex2html_escaped_opening_bracket/go; s/^\\}|([^\\])\\}/\1tex2html_escaped_closing_bracket/go; for (;;) { # Infinite loop last unless s/{([^{}]*)}/join("",$O,++$max_id,$C,$1,$O,$max_id,$C)/geo; } s/tex2html_escaped_opening_bracket/\\{/go; s/tex2html_escaped_closing_bracket/\\}/go; $_; } sub replace_html_special_chars { # Replaces html special characters with markers unless preceded by "\" s/([^\\])(<|>|&|\")/&special($1).&special($2)/geo; # MUST DO IT AGAIN JUST IN CASE THERE ARE CONSECUTIVE HTML SPECIALS s/([^\\])(<|>|&|\")/&special($1).&special($2)/geo; s/^(<|>|&|\")/&special($1)/geo; } # The bibliography and the index should be treated as separate sections # in their own HTML files. The \bibliography{} command acts as a sectioning command # that has the desired effect. But when the bibliography is constructed # manually using the thebibliography environment, or when using the # theindex environment it is not possible to use the normal sectioning # mechanism. This subroutine inserts a \bibliography{} or a dummy # \tex2htmlindex command just before the appropriate environments # to force sectioning. sub add_bbl_and_idx_dummy_commands { $max_id++; s/([\\]begin\s*$O\d+$C\s*thebibliography)/\\bibliography$O$max_id$C$O$max_id$C \1/o; s/([\\]begin\s*$O\d+$C\s*theindex)/\\tex2htmlindex \1/o; s/[\\]printindex/\\tex2htmlindex /o; } # Uses and modifies $default_language sub convert_iso_latin_chars { local($_) = @_; local($next_language, $pattern, $before, $after, $funct); if (/$language_rx/o) { ($next_language, $pattern, $before, $after) = (($1||$2), $&, $`, $'); $before = &convert_iso_latin_chars($before); $default_language = $next_language; $_ = join($pattern, $before, &convert_iso_latin_chars($after)); } else { $funct = $language_translations{$default_language}; (defined(&$funct) ? $_ = &$funct($_) : do {$warnings .= "\nCould not find translation function for $default_language.\n\n" unless $warnings =~ /$default_language/}) } undef ($next_language, $pattern, $before, $after, $funct); $_; } # May need to add something here later sub english_translation { @_[0]; } # This replaces \setlanguage{\language} with \languageTeX # This makes the identification of language chunks easier. sub normalize_language_changes { s/$setlanguage_rx/\\\1TeX/go; } sub translate { &tokenize($sections_no_delim_rx); # Inserts space after a sectioning command &normalize_sections; # Deal with the *-form of sectioning commands # Split the input into sections local(@sections) = split(/$sections_no_delim_rx /o, $_); local($sections) = int(scalar(@sections) / 2); # Initialises $curr_sec_id to a list of 0's equal to # the number of sectioning commands. local(@curr_sec_id) = split(' ', &make_first_key); local($i, $current_depth) = (0,0); local($curr_sec) = ($FILE); local(%section_info, %toc_section_info, $CURRENT_FILE, %cite_info, %ref_files); # These filenames may be set when translating the corresponding commands. local($tocfile, $loffile, $lotfile, $footfile, $citefile, $idxfile, $figure_captions, $table_captions, $footnotes, $citations, %index, $t_title, $t_author, $t_date); &process_aux_file if (/\\ref/o || /\\caption/o || ($SHOW_SECTION_NUMBERS)); print "\nTranslating ..."; while ($i <= @sections) { $_ = $sections[$i]; s/^[\s]*//; # Remove initial blank lines # The section command was removed when splitting ... s/^/\\$curr_sec / if ($i > 0); # ... so put it back if ($current_depth < $MAX_SPLIT_DEPTH) { $CURRENT_FILE = &make_name($curr_sec, join('_',@curr_sec_id)); open(OUTPUT, ">$CURRENT_FILE") || die "Cannot open $DESTDIR/$FILE $!"; }; &remove_document_env; &wrap_shorthand_environments; print $i/2 . "/$sections..."; # Must do this early ... It also sets $TITLE &process_command($sections_rx, *_) if /$sections_rx/; $_ = &translate_commands(&translate_environments($_)); print OUTPUT $_; # Associate each id with the depth, the filename and the title $TITLE = $CURRENT_FILE unless ($TITLE); $toc_section_info{join(' ',@curr_sec_id)} = "$current_depth$delim$CURRENT_FILE$delim$TITLE"; $section_info{join(' ',@curr_sec_id)} = "$current_depth$delim$CURRENT_FILE$delim$TITLE" if ($current_depth < $MAX_SPLIT_DEPTH); ++$i; # Get the depth of the current section; $curr_sec = $sections[$i]; $current_depth = $section_commands{$curr_sec}; @curr_sec_id = &new_level($current_depth, @curr_sec_id); $TITLE = ''; ++$i; } close OUTPUT; # Link sections, add head/body/address do cross-refs etc &pass_to_latex; &post_process; &make_footnotes if $footfile; &save_labels_in_file; &save_image_cache_in_file; } ############################ Processing Environments ########################## sub wrap_shorthand_environments { # This wraps a dummy environment around environments that do not use # the begin-end convention. The wrapper will force them to be # evaluated by Latex rather than them being translated. # Wrap a dummy environment around matching $$s. # s/^\$\$|([^\\])\$\$/{$1.&next_wrapper('tex2html_double_dollar')}/ge; # Wrap a dummy environment around matching $s. # s/^\$|([^\\])\$/{$1.&next_wrapper('$')}/ge; # s/tex2html_double_dollar/\$\$/go; # Do \(s and \[s s/(^\\[(])|([^\\])(\\[(])/{$2.&make_wrapper(1).$1.$3}/geo; s/(^\\[)]|[^\\]\\[)])/{$1.&make_wrapper(0)}/geo; s/(^\\[[])|([^\\])(\\[[])/{&make_any_wrapper(1,"displaymath")}/geo; s/(^\\[\]]|[^\\]\\[\]])/{&make_any_wrapper(0,"displaymath")}/geo; $double_dol_rx = '(^|[^\\\\])\\$\\$'; $single_dol_rx = '(^|[^\\\\])\\$'; $_ = &wrap_math_environment; $_ = &wrap_raw_arg_cmds; } sub wrap_math_environment { # This wraps math-type environments # The trick here is that the opening brace is the same as the close, # but they *can* still nest, in cases like this: # # $ outer stuff ... \hbox{ ... $ inner stuff $ ... } ... $ # # Note that the inner pair of $'s is nested within a group. So, to # handle these cases correctly, we need to make sure that the outer # brace-level is the same as the inner. --- rst # # And yet another problem: there is a scungy local idiom to do # this: $\_$ for a boldfaced underscore. xmosaic can't display the # resulting itty-bitty bitmap, for some reason; even if it could, it # would probably come out as an overbar because of the floating- # baseline problem. So, we have to special case this. --- rst again. local ($processed_text, $before, $end_rx, $delim); local ($underscore_match_rx) = "^\\s*\\\\\\_\\s*\\\$"; local ($wrapper) = "tex2html_wrap"; while (/$single_dol_rx/) { $processed_text .= $`.$1; $_ = $'; $end_rx = $single_dol_rx; # Default, unless we begin with $$. $delim = "\$"; if (/^\$/ && (! $`)) { s/^\$//; $end_rx = $double_dol_rx; $delim = ""; # Cannot say "\$\$" inside displaymath $wrapper = "displaymath"; } elsif (/$underscore_match_rx/ && (! $`)) { # Special case for $\_$ ... s/$underscore_match_rx//; $processed_text .= '\\_'; next; } # Have an opening $ or $$. Find matching close, at same bracket level $processed_text .= &make_any_wrapper(1,$wrapper).$delim; while (/$end_rx/) { # Forget the $$ if we are going to replace it with "displaymath" $before = $` . (($wrapper eq "displaymath")? "$1" : $&); $processed_text .= $before; $_ = $'; # Found dollar sign inside open subgroup ... now see if it's # at the same brace-level ... local ($losing, $br_rx) = (0, ''); while ($before =~ /$begin_cmd_rx/) { $br_rx = &make_end_cmd_rx($1); $before = $'; if ($before =~ /$br_rx/) { $before = $'; } else { $losing = 1; last; } } last unless $losing; # It wasn't ... find the matching close brace farther on; then # keep going. /$br_rx/; $processed_text .= $`.$&; $_ = $'; } # Got to the end. Whew! $processed_text .= &make_any_wrapper(0,$wrapper); } $processed_text . $_; } sub translate_environments { local ($_) = @_; local($tmp); #print "\nTranslating environments ..."; for (;;) { last unless (/$begin_env_rx/o); local ($contents, $before, $br_id, $env, $after, $pattern); # $1 : br_id (at the beginning) # $2 : environment ($before, $br_id, $env, $after, $pattern) = ($`, $1, $2, $', $&); $contents = ""; # Sets $contents and modifies $after if (&find_end_env($env,*contents,*after)) { &process_command($counters_rx, *before) if ($before =~ /$counters_rx/); # This may modify $before and $after &extract_captions($env) if ($env =~ /(figure)|(table)/o); # Modifies $contents $contents = &translate_environments($contents) if (&defined_env($env) && (! ($env =~ /latexonly/o)) && (! $raw_arg_cmds{$env})); &process_environment($env, $br_id); undef $_; $_ = join("", $before, $contents, $after); undef ($before, $contents, $after, $pattern);} ### Evan Welsh added the next 24 lines ## elsif (&defined_env($env)) { # If I specify a function for the environment then it # calls it with the contents truncated at the next section. # It assumes I know what I'm doing and doesn't give a # deferred warning. &extract_captions($env) if ($env =~ /(figure)|(table)/); $contents = $after; $contents = &process_environment($env, $br_id, $contents); $_ = join("", $before, $contents); } elsif ($ignored{$env}) { # If I specify that the environment should be ignored then # it is but I get a deferred warning. $_ = join("", $before, $contents, $after); $warnings .= "\n\\end{$env} not found (ignored).\n"; } elsif ($raw_arg_cmds{$env}) { # If I specify that the environment should be passed to tex # then it is with the environment truncated at the next # section and I get a deferred warning. &extract_captions($env) if ($env =~ /(figure)|(table)/); $contents = $after; $contents = &process_environment($env, $br_id, $contents); $_ = join("", $before, $contents); $warnings .= "\n\\end{$env} not found (truncated at next section boundary).\n";} else { $pattern = &escape_rx_chars($pattern); s/$pattern//; print "Cannot find \\end{$env}\n"; } undef ($contents, $before, $br_id, $env, $after, $pattern); } $tmp = $_; undef $_; &process_command($counters_rx, *tmp) if ($tmp =~ /$counters_rx/); $_ = $tmp; undef $tmp; $_ } sub find_end_env { local ($env, *ref_contents, *rest) = @_; local ($be_rx) = &make_begin_end_env_rx ($env); local ($count) = 1; while ($rest =~ /$be_rx/) { $ref_contents .= $`; if ($1 eq "begin") { ++$count } else { --$count }; $rest = $'; last if $count == 0; $ref_contents .= $&; } if ($count != 0) { $rest = join('', $ref_contents, $rest); $ref_contents = ""; return(0)} else { return(1)} } # MODIFIES $contents sub process_environment { local($env, $id) = @_; local($env_sub) = ("do_env_$env"); if (&defined_env($env)) { print "."; $contents = &$env_sub($contents); } elsif (&special_env) { # &special_env modifies $contents } elsif ($ignore{$env}) { "" } else { # Generate picture $contents = &process_undefined_environment($env, $id, $contents); $env_sub = "post_latex_$env_sub"; # i.e. post_latex_do_env_ENV ( defined &$env_sub ? $contents = &$env_sub($contents) : do {$contents = join('',"

",$contents,"

") unless ($env =~ /tex2html_wrap/o)}); }; } # The $<$, $>$, $|$ and $=>$, etc strings are replaced with their textual # equivalents instead of passing them on to latex for processing in math-mode. # This will not be necessary when the mechanism for passing environments # to Latex is improved. # RETURNS SUCCESS OR FAILURE sub special_env { # Modifies $contents in its caller $contents =~ s/^\$(\s*($html_specials_inv_rx|[<>|=+])+\s*)\$/\1/go; } sub defined_env { local($env) = @_; local($env_sub) = ("do_env_$env"); # The test using declarations should not be necessary but 'defined' # doesn't seem to recognise subroutines generated dynamically using 'eval'. # Remember that each entry in $declarations generates a dynamic prodedure ... ((defined &$env_sub) || ($declarations{$env})); } sub process_undefined_environment { local($env, $id, $contents) = @_; local($name,$cached,$raw_contents,$uucontents) = ("$env$id"); $contents = "% latex2html id marker $id\n$contents" if $contents =~ /$order_sensitive_rx/; $contents = "\\begin{$env}$contents\\end{$env}"; $latex_body{$name} = $contents; $uucontents = &encode($contents); if ($NOLATEX) { $id_map{$name} = "[$name]";} elsif ($cached = $cached_env_img{$uucontents}) { $id_map{$name} = $cached;} else { $id_map{$name} = ++$max_page_num; $cached_env_img{$uucontents} = $max_page_num; $raw_contents = &revert_to_raw_tex($contents); $raw_contents =~ s/\\pagebreak/\\\\/go; $latex_body .= "\\newpage\n\n{\\samepage \\clearpage $raw_contents\n}\n\n\n"; } undef ($cached,$raw_contents,$uucontents); &do_labels($contents,"$image_mark#$name#"); # Anchor the labels and put a marker in the text; } # Generate images for unknown environments, equations etc, and replace # the markers in the main text with them. # - $cached_env_img maps encoded contents to image URL's # - $id_map maps $env$id to page numbers in the generated latex file and after # the images are generated, maps page numbers to image URL's # - $page_map maps page_numbers to image URL's (temporary map); # Uses global variables $id_map and $cached_env_img, # $max_page_num, $latex_body sub pass_to_latex { local($name, $contents, $raw_contents, $uucontents, $page_num, $uucontents, %page_map, $img); do { print "\nGenerating images using latex ...\n"; open(ENV,">$$_images.tex") || die "Cannot open $name.tex $!\n"; print ENV &make_latex($latex_body); print ENV "\n"; close ENV; ©_file($FILE, "bbl"); ©_file($FILE, "aux"); system("$LATEX $$_images.tex"); print "\nGenerating postscript images using dvips ...\n"; system("$DVIPS -M -S 1 -i -o $$_image $$_images.dvi") && print "Error: $!\n"; open(IMAGE, "echo $$_image* | tr -s ' \t\r\f' '\\012\\012\\012\\012'|"); while () {chop; rename($_, "$_.ps") if /\d\d\d$/}; } if ((!$NOLATEX) && ($latex_body =~ /newpage/)); while ( ($name, $page_num) = each %id_map) { $contents = $latex_body{$name}; if ($page_num =~ /^\d+$/) { # If it is a page number do { # Extract the page, convert and save it $img = &extract_image($page_num,$name,$contents); $uucontents = &encode($contents); # Arrggh if (! ($contents =~ /$order_sensitive_rx/)) { $cached_env_img{$uucontents} = $img; } else { # Blow it away so it is not saved for next time delete $cached_env_img{$uucontents} } $page_map{$page_num} = $img; } unless ($img = $page_map{$page_num}); # unless we've just done it $id_map{$name} = $img; } else { $img = $page_num;}} # it is already available from previous runs $warnings .= "\nOne of the images ($page_num) is more than one page long.\n". "This may cause the rest of the images to get out of sync.\n\n" if (-f sprintf("%s%.3d%s", "$$_image", ++$max_page_num, ".ps")); &cleanup; } sub extract_image { local($page_num,$name,$contents) = @_; local($scale, $external, $thumbnail); $name =~ s/\*//; local($env,$basename,$img) = ($name,"_$$_$name"); $env =~ s/\d+$//; $psname = sprintf("%s%.3d", "$$_image", $page_num); $img = "$basename.gif"; ($scale, $external, $thumbnail) = &extract_parameters($contents); if ( ($basename =~ /figure/) || ($contents =~ /\.ps/) || $scale || $thumbnail) { $scale = $FIGURE_SCALE_FACTOR unless (defined $scale); &convert_image("$psname.ps", $img, $scale, ""); if ($thumbnail) { # $thumbnail contains the reduction factor &convert_image("$psname.ps", "T$img", $thumbnail, ""); $thumbnail = "T$img"}} else { &convert_image("$psname.ps", $img, $MATH_SCALE_FACTOR , 1)} if ($basename =~ /(equation|eqnarray|displaymath)/) { &right_justify($basename)} &make_transparent($img); &make_transparent("T$img") if $thumbnail; $warnings .= "\nFailed to convert image $psname.ps" if ! -r $img ; &embed_image($img, $env, $external, $thumbnail); } sub extract_parameters { local($contents) = @_; local($_, $scale, $external, $thumbnail); $contents =~ s/$htmlimage_rx/$_ = $2;''/ego; $parameters =~ s/\s//g; # Remove spaces ($scale) = /scale=([\.\d]*)/; $external = /external/; ($thumbnail) = /thumbnail=([\.\d]*)/; ($scale, $external, $thumbnail); } sub convert_image { local($in_img, $out_img, $scale, $depth) = @_; system( "$PSTOGIF " . do {" -depth $depth " if $depth} . do {" -scale $scale " if ($scale > 0)} . " -out $out_img $in_img") && print "Error while converting image: $!\n"; } sub make_transparent { local($img) = @_; return unless $GIFTRANS; # Making the white color transparent - this may not # always be a good idea... system("$GIFTRANS -t '#ffffff' $img > $$_tmp.gif") && do {print "Could not make $img transparent: $!\n"; return}; rename("$$_tmp.gif", $img) ; } # This takes a filename (an GIF of an equation or equation array) and # prepends enough whitespace so that it is right justified. # It uses the global variable $LINE_WIDTH and the pbmplus routines # giftoppm (to convert to ppm), pnmfile (to ask for its dimensions), # pnmtile (to replicate a blank PBM of 1 pixel), pnmcat # (to prepend the replicated blank bitmap) and ppmtogif # (to convert the result back to GIF). sub right_justify { local($basename) = @_; local($_, $img_width, $justification_width) = (0,0); system("$GIFTOPPM $basename.gif > $basename.ppm") && print "Error: $!\n"; $_ = `$PNMFILE $basename.ppm`; s/([\d]+) by/$img_width = $1/eo; do { $justification_width = ($LINE_WIDTH - $img_width); $justification_width = ($justification_width / 2) if $basename =~ /displaymath/; system("$PNMTILE $justification_width 1 $BLANKPBM |$PNMCAT -l - $basename.ppm|$PPMTOGIF - > $basename.gif") && print "Error: $!\n"; unlink "$basename.ppm"; } if ($img_width < $LINE_WIDTH); } sub process_in_latex { # This is just a wrapper for process_undefined_environment. # @[0] = contents $max_id++; &process_undefined_environment('tex2html_wrap',$max_id,@_[0]); } sub copy_file { local($file, $ext) = @_; $file = &fulltexpath("$FILE.$ext"); system("cp $file $$_images.$ext") if (-e $file); } ############################ Processing Commands ########################## sub translate_commands { local ($_) = @_; #print "\nTranslating commands ..."; &replace_strange_accents; for (;;) { # For each opening bracket ... last unless (/$begin_cmd_rx/o); local($before, $contents, $br_id, $after, $pattern); ($before, $br_id, $after, $pattern) = ($`, $1, $', $&); local($end_cmd_rx) = &make_end_cmd_rx($br_id); if ($after =~ /$end_cmd_rx/) { # ... find the the matching closing one ($contents, $after) = ($`, $'); undef $_; $contents = &translate_commands($contents) if ($contents =~ /$match_br_rx/o); # Modifies $contents &process_command($single_cmd_rx,*contents) if ($contents =~ /\\/o); # THIS MARKS THE OPEN-CLOSE DELIMITERS AS PROCESSED $_ = join("", $before,"$OP$br_id$CP", $contents,"$OP$br_id$CP", $after); } else { $pattern = &escape_rx_chars($pattern); s/$pattern//; print "\nCannot find matching bracket for $br_id"; } undef ($contents, $before, $br_id, $pattern, $after, $end_cmd_rx); } # Now do any top level commands that are not inside any brackets # MODIFIES $_ &process_command($single_cmd_rx,*_); } # Modifies $contents sub process_command { local ($cmd_rx, *ref_contents) = @_; local (@open_font_tags); $ref_contents = &convert_iso_latin_chars($ref_contents); for (;;) { # Do NOT use the o option last unless ($ref_contents =~ /$cmd_rx/); local($ref_before, $cmd, $after) = ($`, $1, "$2$'"); print("."); $cmd = &normalize($cmd); $after =~ s/^\s*//o; # Eat all spaces that follow a command local($cmd_sub, $cmd_trans) = ("do_cmd_$cmd", $declarations{$cmd}); if (defined &$cmd_sub) { # $ref_before may also be modified ... $after = &$cmd_sub($after, @open_font_tags);} elsif ($cmd_trans) { # One to one transform $after = "<$cmd_trans>" . $after . ""; push(@open_font_tags, $cmd) if ($cmd =~ /$fontchange_rx/o);} elsif ($ignore{$cmd}) { # Ignored command print "."} elsif ($cmd =~ /^the(.+)$/) { # Counter $counter = $1; $after = &do_cmd_thecounter($after);} else { # Do not add if reading an auxiliary file ++$unknown_commands{$cmd} unless $AUX_FILE; } $ref_contents = join('', $ref_before, $after); undef ($ref_before, $after); } $ref_contents; } ####################### Processing Meta Commands ############################ # This is a specialised version of process_command above. # The special commands (newcommand, newenvironment etc.) # must be processed before translating their arguments, # and before we cut up the document into sections # (there might be sectioning commands in the new definitions etc.). # \newtheorem commands are treated during normal processing by # generating code for the environments they define. sub substitute_meta_cmds { local ($next_def); local ($cmd, $argn, $body, $before, $after, $new_cmd_rx, $new_env_rx); &tokenize($meta_cmd_rx); # Inserts a space after meta commands ... print "\nReading new definitions ..."; while (/$meta_cmd_rx /o) { # ... and uses the space ($before, $cmd, $after) = ($`, $1, $'); print "."; $next_def = "\n\\$cmd"; local($cmd_sub) = "get_body_$cmd"; $_ = join('',$before, &$cmd_sub($after)); &add_to_preamble($cmd, $next_def); } # All the definitions have now moved to the $preamble and their bodies # are stored in %new_command and %new_environment # # Now substitute the new commands and environments: # (must do them all together because of cross definitions) ($new_cmd_rx, $new_env_rx) = (&make_new_cmd_rx, &make_new_env_rx('begin')); do { print "\nExpanding new definitions ..."; while (($cmd, $code) = each %new_command) { $new_command{$cmd} = &expand_code($code);} while (($cmd, $code) = each %new_environment) { $new_environment{$cmd} = &expand_code($code);} print "\nSubstituting new definitions ..."; &tokenize($new_cmd_rx); # Inserts a space after the new commands ... if ($new_cmd_rx) { while (/$new_cmd_rx /o && (($before, $cmd, $after) = ($`, $1, $'))) { print "."; $_ = join('',$before, &substitute_newcmd);} } if ($new_env_rx) { while (/$new_env_rx/o && (($before, $cmd, $after) = ($`, $2, $'))) { print "."; $_ = join('',$before, &substitute_newenv);}} } if (%new_command || %new_environment); $_; } sub expand_code { local($_) = @_; # Uses $new_cmd_rx and $new_env_rx set in the caller if ($new_cmd_rx eq "0") { $new_cmd_rx = "<<{this cant possibly match}>>"; } if ($new_env_rx eq "0") { $new_env_rx = "<<{this cant possibly match}>>"; } local($cmd, $before, $after); &tokenize($new_cmd_rx); # Inserts a space after the new commands ... while ((/$new_cmd_rx /o && (($before, $cmd, $after) = ($`, $1, $'))) || (/$new_env_rx/o && (($before, $cmd, $after) = ($`, $2, $')))) { if ($new_command{$cmd}) { # We have a command $_ = join('',$before, &substitute_newcmd); } else { $_ = join('',$before, &substitute_newenv); } &tokenize($new_cmd_rx); # Must do it for any newly inserted code $cmd = 0; } $_; } # Removes the definition from the input string, adds to the preamble # and stores the body in %new_command; sub get_body_newcommand { local($_) = @_; local($argn,$cmd,$body,$tmp); $cmd = &get_next(1); # Get command name $cmd =~ s/^\s*\\//; $argn = &get_next(0); # Get optional no. of args $argn = 0 unless $argn; # Get the body of the code and store it with the name and number of args # UNLESS THE COMMAND IS ALREADY DEFINED # (This is the mechanism with which raw html can be ignored in a Latex document # but be recognised as such by the translator). $body = &get_next(1); $tmp = "do_cmd_$cmd"; $new_command{$cmd} = join(':!:',$argn,$body) unless (defined &$tmp); $_; } # Like get_body_newcommand above, but for simple raw TeX \defs sub get_body_def { local($_) = @_; local($argn,$cmd,$body,$is_simple_def,$tmp); $cmd = &get_next(2); $cmd =~ s/^\s*\\//; $argn = &get_next(3); $argn = 0 unless $argn; $body = &get_next(1); $tmp = "do_cmd_$cmd"; if ($is_simple_def && !defined (&$tmp)) { $new_command{$cmd} = join(':!:',$argn,$body); } $_; } # Removes the definition from the input string, adds to the preamble # and stores the body in %new_environment; sub get_body_newenvironment { local($_) = @_; local($argn,$env,$begin,$end,$tmp); $env = &get_next(1); # Get the environment name $env =~ s/^\s*\\//; $argn = &get_next(0); # Get optional no. of args $argn = 0 unless $argn; # Get the body of the code and store it with the name and number of args # UNLESS THE COMMAND IS ALREADY DEFINED (see get_body_newcommand) $tmp = "do_env_$env"; $begin = &get_next(1); $end = &get_next(1); $new_environment{$env} = join(':!:', $argn, $begin, $end) unless defined &$tmp; $_; } sub get_body_renewcommand { &get_body_newcommand(@_[0]); } sub get_body_renewenvironment { &get_body_newenvironment(@_[0]); } sub substitute_newcmd { # Modifies $cmd and $after in the caller # Get the body from the new_command array local($argn, $_) = split(/:!:/, $new_command{$cmd}); local($arg); foreach $i (1..$argn) { $arg = $undef_mark; $after =~ s/$next_pair_rx/$arg = $2;''/eo; # Get the next argument # Next argument may not be in braces - get next character - ARGG! $after =~ s/\s*(.)/$arg = $1;''/eo if ($arg eq $undef_mark); s/\#$i/$arg/g;} # Substitute the arguments in the body # Make the body unique (give unique id's to the brackets), # translate, and return it $_ = &revert_to_raw_tex($_); &pre_process; join('',$_,$after); } sub substitute_newenv { # Modifies $cmd and $after in the caller # Get the body from the new_environment array local($argn, $begdef, $enddef) = split(/:!:/, $new_environment{$cmd}); local($arg,$new_def_rx); # Note that latex allows argument substitution only in the # \begin part of the new definition local($_) = $begdef; foreach $i (1..$argn) { $after =~ s/$next_pair_rx/$arg = $2;''/eo; # Get the next argument s/\#$i/$arg/g;} # Substitute the arguments in the body # Make the body unique (Give unique id's to the brackets), # translate, and return it $_ = &revert_to_raw_tex($_); $begdef = &pre_process; # Make unique # Now substitute the \end part: $_ = &revert_to_raw_tex($enddef); $result_str = ''; $enddef = &pre_process; # Make unique $new_def_rx = &make_end_env_rx($cmd); $after =~ s/$new_def_rx/$enddef/; join('',$begdef,$after); } # Instead of substituting as with newcommand and newenvironment, # or generating code to handle each new theorem environment, # it now does nothing. This forces theorem environments to be passed # to latex. Although it would be possible to handle theorem # formatting in HTML as it was done previously it is impossible # to keep the theorem counters in step with other counters (e.g. equations) # to which only latex has access to. Sad... sub get_body_newtheorem { local($_) = @_; # Just chop off the arguments and append to $next_def &get_next(1); &get_next(0); &get_next(1); &get_next(0); $_; } # Modifies $_ in the caller and as a side-effect it modifies $next_def # which is local to substitute_meta_cmds sub get_next { local($what) = @_; local($next, $pat, $tmp); if ($what == 1) { ($next, $tmp, $pat) = &get_next_argument;} elsif ($what == 2) { ($next, $pat) = &get_next_tex_cmd;} elsif ($what == 3) { ($next, $pat) = &get_next_def_arg;} else { ($next, $pat) = &get_next_optional_argument;} $next_def .= &revert_to_raw_tex($pat) if $pat; $next =~ s/(^\s*)|(\s*$)//g; $next; } # The following get_next_ ARE ALL DESTRUCTIVE. sub get_next_argument { local($next, $br_id, $pat); s/$next_pair_rx/$br_id=$1;$next=$2;$pat=$&;''/eo; ($next, $br_id, $pat); } sub get_next_pair_or_char_pr { local($next, $br_id, $pat, $epat); if ( (/^\s*([\w])/o && (! $`))) { ($next, $pat) = ($1, $&) } elsif ( /$next_pair_pr_rx/o && (! $`)) { ($next, $br_id, $pat) = ($2, $1, $&) }; $epat = &escape_rx_chars($pat); s/$epat// if $pat; ($next, $br_id, $pat); } sub get_next_optional_argument { local($next, $pat); s/$optional_arg_rx/$next=$1;$pat=$&;''/eo if (/\s*[[]/ && (! $`)); # if the first character is a [ # (/^[]/ does not work because it may match the beginning of ANY line s/^\s*\[\]//g unless $pat; # This is not picked by $optional_arg_rx ($next, $pat); } sub get_next_tex_cmd { local($next, $pat); s/$single_cmd_rx/$next = $1; $pat=$&; ''/eo; ($next, $pat); } sub get_next_def_arg { local($next, $pat); # Sets is_simple_def for caller. Start by turning it off, then # turn it on if we find one of the "simple" patterns. # This has got to be hit-or-miss to an extent, given the # thoroughly incestuous relationship between the TeX macroprocessor # ('mouth') and typesetting back-end ('stomach'). Anything which # even does catcode hacking is going to lose BAD. $is_simple_def = 0; # no arguments if (/^$O/ && (! $`)) { $next=0; $pat=''; $is_simple_def=1; $O } # 'simple' arguments if (! $is_simple_def && /$tex_def_arg_rx/o && (! $`)) { s/$tex_def_arg_rx/$next=$1; $pat=$&; $is_simple_def=1; $O/eo; } # MESSY arguments if (! $is_simple_def) { print "Arguments to $cmd are too complex ...\n"; print "It will not be processed unless used in another environment\n"; print "which is passed to LaTeX whole for processing.\n"; s/^[^<]*(<[^<]+)*< $depth) {$_ = 0 ;}; $i++; 0; }, @curr_sec_id); @curr_sec_id; } sub make_head_and_body { local($title) = @_; "\n" . "\n" . "\n\n$title\n" . &meta_information($title) . "\n

\n"; } sub make_address { "

\n$ADDRESS\n
", "\n\n"; } sub encode_title { local($_) = @_; $_ = &encode($_); while (/(<[^<>]*>)/o) {s/$1//g}; # Remove HTML tags s/#[^#]*#//g; # Remove #-delimited markers $_; } # Encodes the contents of enviroments that are passed to latex. The code # is then used as key to a hash table pointing to the URL of the resulting # picture. sub encode { local($_) = @_; for (;;) { # Remove invocation-specific stuff last unless s/begin|end|<<\d+>>|tex2html_|wrap//go; } #$_ = pack("u*", $_); # uuencode s/\/|\\//g; # remove funnies may cause problems in a hash key s/\s*|\n//g; # Remove spaces and newlines $_; } ##################### Hypertext Section Links ######################## sub post_process { # Put hyperlinks between sections, add HTML headers and addresses, # do cross references and citations. # Uses the %section_info array created in sub translate. # Binds the global variables # $PREVIOUS, $PREVIOUS_TITLE # $NEXT, $NEXT_TITLE # $UP, $UP_TITLE # $CONTENTS # $INDEX # $NEXT_GROUP, $NEXT_GROUP_TITLE # $PREVIOUS_GROUP, $PREVIOUS_GROUP_TITLE # Converting to and from lists and strings is very inefficient. # Maybe proper lists of lists should be used (or wait for Perl5?) local($_, $key, $depth, $file, $title, $header, @link, @old_link, $navigation, %done, @keys, @tmp_keys, $flag, $child_links); @tmp_keys = @keys = sort numerically keys %section_info; print "\nDoing section links ..."; while (@tmp_keys) { $key = shift @tmp_keys; print "."; ($depth, $file, $title) = split($delim,$section_info{$key}); unless ($done{$file}) { $PREVIOUS = $PREVIOUS_TITLE = $NEXT = $NEXT_TITLE = $UP = $UP_TITLE = $CONTENTS = $INDEX = $NEXT_GROUP = $NEXT_GROUP_TITLE = $PREVIOUS_GROUP = $PREVIOUS_GROUP_TITLE = $_ = $navigation = undef; @link = split(' ',$key); ($PREVIOUS, $PREVIOUS_TITLE) = &add_link($previous_page_visible_mark,$file,@old_link); @old_link = @link; $link[$depth]++; ($NEXT_GROUP, $NEXT_GROUP_TITLE) = &add_link($next_visible_mark, $file, @link); $link[$depth]--;$link[$depth]--; ($PREVIOUS_GROUP, $PREVIOUS_GROUP_TITLE) = &add_link($previous_visible_mark, $file,@link); $link[$depth] = 0; ($UP, $UP_TITLE) = &add_link($up_visible_mark, $file, @link); @link = split(' ',$tmp_keys[0]); ($NEXT, $NEXT_TITLE) = &add_link($next_page_visible_mark, $file,@link); $CONTENTS = &add_special_link($contents_visible_mark, $tocfile, $file) if $CONTENTS_IN_NAVIGATION; $INDEX = &add_special_link($index_visible_mark, $idxfile, $file) if $INDEX_IN_NAVIGATION; $navigation = &navigation_panel unless $NO_NAVIGATION; $_ = &make_head_and_body($title); $header = join(' ', $_, $navigation) if $TOP_NAVIGATION; rename($file, "$$.$file"); open(INPUT, "<$$.$file") || die "Cannot open file $$.$file $!"; open(OUTFILE, ">$file") || die "Cannot open file $file $!"; &slurp_input; $child_links = &add_child_links(0,$depth, $key, @keys); $flag = (($BOTTOM_NAVIGATION || &auto_navigation) && $navigation); $_ = join('', $_, $CHILDLINE) if $child_links; $_ = join('', $header, $_, $child_links); $_ = join('', $_, $navigation) if ($flag); $flag = 0; $_ = join('', $_, $CHILDLINE) unless $flag; &remove_markers; print OUTFILE $_; print OUTFILE &make_address; close OUTFILE; $done{$file}++; &cleanup; } } } sub add_link { # Returns a pair (iconic link, textual link) local($icon, $current_file, @link) = @_; local($dummy, $file, $title) = split($delim,$section_info{join(' ',@link)}); if ($title) { $title = &get_first_words($title, $WORDS_IN_NAVIGATION_PANEL_TITLES); return (&make_href($file, $icon), &make_href($file, "$title")) unless ($file eq $current_file); } (&inactive_img($icon), ""); } sub add_special_link { local($icon, $file, $current_file) = @_; (($file && ($file ne $current_file)) ? &make_href($file, $icon) : undef) } sub remove_markers { &remove_general_markers; &text_cleanup; # Must NOT clean the ~'s out of the navigation icons (in panel or text), # and must not interfere with verbatim-like environments &remove_sensitive_markers; } sub remove_general_markers { s/$lof_mark/
    $figure_captions<\/UL>/o; s/$lot_mark/
      $table_captions<\/UL>/o; s/$bbl_mark/$citations/o; &add_toc if (/$toc_mark/); &add_idx if (/$idx_mark/); &replace_cross_references if /$cross_ref_mark/; &replace_external_references if /$external_ref_mark/; &replace_cite_references if /$cite_mark/; } sub remove_sensitive_markers { &replace_images if /$image_mark/; &replace_icons if /$icon_mark_rx/; &replace_verbatim if /$verbatim_mark/; &replace_verb if /$verb_mark/; s/;tex2html_html_special_mark_/\&/go; } # This code is extremely inefficient. At least the subtrees should be # filtered according to $MAX_LINK_DEPTH before going into the # inner loops. sub add_child_links { local($exclude, $depth, $current_key, @keys) = @_; local($_, $child_rx, @subtree, $next, %open); $child_rx = $current_key; $child_rx =~ s/( 0)*$//; # Remove trailing 0's foreach $next (@keys) { if (($next =~ /^$child_rx /) && ($next ne $current_key)) { push(@subtree,$next);}} # @subtree now contains the subtree rooted at the current node if (@subtree) { local($next_depth, $file, $title); @subtree = sort numerically @subtree; foreach $next (@subtree) { ($next_depth, $file, $title) = split($delim,$toc_section_info{$next}); next if ($exclude eq $title); $file = join('', $file, "#SECTION", split(' ', $next)); if (($next_depth > $depth) && (%open < $MAX_LINK_DEPTH) && ($next_depth < $MAX_SPLIT_DEPTH)) { $depth = $next_depth; $open{$depth}++; $_ .= "
        \n"; $_ .= "
      • ". &make_href($file,$title) . "\n"; } elsif (($next_depth <= $depth) && (%open <= $MAX_LINK_DEPTH)) { while (($next_depth < $depth) && %open ) { # Backing out if ($open{$depth}) { if (! $open{$next_depth}) { $open{$next_depth}++; } else { $_ .= "
      \n"; } delete $open{$depth};}; $depth--;} $depth = $next_depth; $_ .= "
    • ". &make_href($file,$title) . "\n";} } $_ .= "
    \n" x %open; } $_; } sub top_page { local($file, @navigation_panel) = @_; # It is the top page if there is a link to itself join('', @navigation_panel) =~ /$file/; } # Sets global variable $AUX_FILE sub process_aux_file { local($_); # To protect caller from &process_ext_file $AUX_FILE = 1; do { print "\nCannot open $FILE.aux $!\n"; $warnings .= "\nThe $FILE.aux file was not found," . " so sections will not be numbered \nand cross-references will be shown as icons.\n"; # i.e. undefined cross references will be shown as black arrows instead # of using the LaTeX symbolic names. } unless (&process_ext_file("aux")); # Sets $_ as a side-effect! $AUX_FILE = 0; } sub make_href { local($link, $text) = @_; $name++; "$text"; } sub make_section_heading { local($text, $level) = @_; local($section_tag) = join('', @curr_sec_id); join('',"<$level>","$text<\/$level>\n"); } sub extract_captions { # Uses and modifies $contents defined in translate_environments # and modifies $figure_captions, $table_captions, $before and $after local($env) = @_; local(%captions, $key, $caption, $item, $type, $list, $number, @tmp, $_); # associate the br_id of the caption with the argument of the caption $contents =~ s/$caption_rx/$captions{$1} = $2;''/ego; $after = join("","

    ",$after); $before .= "

    "; if ($env eq 'figure') { $type = "Figure"; $list = "\$figure_captions"; } elsif ($env eq 'table') { $type = "Table"; $list = "\$table_captions"; } foreach $key (sort {$a <=> $b;} keys %captions){ # Sort numerically $caption = &translate_commands(&translate_environments($captions{$key})); $item .= "

  • " . &make_href("$CURRENT_FILE#$key", $caption) . "\n"; $before .= "$anchor_mark"; $_ = $caption; &text_cleanup; $_ = &encode_title($_); @tmp = split(/$;/, eval ("\$encoded_$env" . "_number{\$_}")); $number = shift(@tmp); $number = "" if ($number eq "-1"); eval("\$encoded_$env" . "_number{\$_} = join(\$;, \@tmp)"); undef $_; undef @tmp; $after = join("", "\n
    $type", ($number ? " $number:" : ":"), " $caption
    \n", $after)} eval "$list .= \$item"; } # This processes \label commands found in environments that will # be handed over to Latex. Sets the table %symbolic_labels sub do_labels { local($context,$new_context) = @_; local($label); $context =~ s/$labels_rx/do { $label = &do_labels_helper($2); $new_context = &anchor_label($label,$CURRENT_FILE,$new_context);""}/geo; $new_context; } # This should be done inside the substitution but it doesn't work ... sub do_labels_helper { local($_) = @_; s/\W//g; $symbolic_labels{$_} = $latex_labels{$_}; # May be empty; $_; } sub add_toc { local($temp1, $temp2); print "\nDoing table of contents ..."; local(@keys) = keys %toc_section_info; @keys = sort numerically @keys; $temp1 = $MAX_LINK_DEPTH; $temp2 = $MAX_SPLIT_DEPTH; $MAX_SPLIT_DEPTH = $MAX_LINK_DEPTH = 1000; s/$toc_mark/&add_child_links("Contents",0,$keys[0],@keys)/eo; $MAX_LINK_DEPTH = $temp1; $MAX_SPLIT_DEPTH = $temp2; } sub add_idx { print "\nDoing the index ..."; local($key, @keys, $next, $index); @keys = keys %index; @keys = sort @keys; foreach $key (@keys) { $next = "
    $key
    " . $index{$key}; $next =~ s/[,] $/\n/; # Get rid of the last comma-space $index .= $next; } s/$idx_mark/
    $index<\/DL>/o; } sub make_footnotes { # Uses $footnotes defined in translate and set in do_cmd_footnote # Also uses $footfile local($_) = "
    $footnotes <\/DL>\n"; print "\nDoing footnotes ..."; &remove_markers; &text_cleanup; &make_file($footfile, "Footnotes"); # Modifies $_; } sub make_file { # Uses and modifies $_ defined in the caller local($filename, $title) = @_; $_ = join('',&make_head_and_body($title),$_,&make_address); &text_cleanup; open(FILE,">$filename") || print "Cannot open $filename $!\n"; print FILE $_; close(FILE); } sub replace_verbatim { # Modifies $_ s/$verbatim_mark(verbatim)(\d+)/
    $verbatim[\2]<\/PRE>/go;
        s/$verbatim_mark(rawhtml)(\d+)/&revert_to_raw_tex($verbatim[\2])/ego;	# Raw HTML
    }
    sub replace_verb {
        # Modifies $_
        s/$verb_mark(\d+)/$verb[\1]<\/code>/go;
    }
    
    # This is used by revert_to_raw_tex
    sub revert_verbatim {
        # Modifies $_
        s/$verbatim_mark(verbatim)(\d+)/\\begin{verbatim}$verbatim[\2]\\end{verbatim}/go;
        s/$verbatim_mark(rawhtml)(\d+)/\\begin{rawhtml}$verbatim[\2]\\end{rawhtml}/go;
    }
    
    sub revert_verb {
        # Modifies $_
        s/$verb_mark(\d+)/\\verb$verb_delim[\1]$verb[\1]$verb_delim[\1]/go;
    }
    
    sub replace_cross_references {
        # Modifies $_
        local($label,$id);
        s/$cross_ref_mark#(\w+)#(\w+)>$cross_ref_mark/
    	do {($label,$id) = ($1,$2); '"' . "$ref_files{$label}#$label" . '">' .
    		&get_ref_mark($label,$id)}/geo;
        # This is for pagerefs which cannot have symbolic labels
        s/$cross_ref_mark#(\w+)#\w+>/
    	do {$label = $1; '"' . "$ref_files{$label}#$label" . '">'}/geo;
    }
    
    sub replace_external_references {
        # Modifies $_
        local($label);
        s/$external_ref_mark#(\w+)#(\w+)>$external_ref_mark/
    	do {($label,$id) = ($1,$2); '"'. "$external_labels{$label}#$label" .
    		'">' . &get_ref_mark("userdefined$label",$id)}/geo;
    }
    
    sub get_ref_mark {
        local($label,$id) = @_;
        ( ($SHOW_SECTION_NUMBERS && $symbolic_labels{"$label$id"}) ||
          $latex_labels{"userdefined$label$id"} ||
          $symbolic_labels{"$label$id"} ||
          $latex_labels{$label} ||
          $cross_ref_visible_mark );
    }
        
    sub replace_images {
        # Modifies $_
        s/$image_mark#([^#]+)#/$id_map{\1}/go;
    }
    
    sub replace_icons {
        # Modifies $_
        s/$icon_mark_rx/&img_tag($icons{$1})/ego;
    }
    
    sub replace_cite_references {
        # Modifies $_
        # Uses $citefile set by the thebibliography environment
        s/#(\w+)>$cite_mark[$cite_info{\1}]\n/go;	# Replace consecutive blank lines with a paragraph tag
        s/$O\d+$C//go;		# Get rid of bracket id's
        s/$OP\d+$CP//go;		# Get rid of processed bracket id's
        s/(\w\s*)?---?(\s*\w)?/\1-\2/go; # Dashes 
        s/\\( |$)/ /go;	# Spacing commands
        s/\\,//go;	# Cannot treat \, as a command because , is a delimiter ...
        s/^~|([^\\])~/\1 /go;
    }
    
    # This is useful for getting words from a title which are not cluttered 
    # with tex2html markers or HTML constructs
    sub extract_pure_text {
        &text_cleanup;		# Remove marking brackets
        s/$image_mark#[^#]*#//g;	# Remove image marker
        s/tex2html[^$delimiters]*//g; # Remove other markers
        s/<[^>]*>//g;		# Remove HTML constructs 
    }
    				 
    ############################ Misc ####################################
    sub usage {			
        print "This is LaTeX2HTML Version $TEX2HTMLVERSION by Nikos Drakos, \nComputer Based Learning Unit, University of Leeds.\nUsage: latex2html \n   [-split num] \n   [-link num]    \n   [-nolatex] \n   [-external_images] \n   [-ascii_mode] \n   [-t top_page_title] \n   [-dir output_directory] \n   [-address author_address] \n   [-no_navigation] \n   [-top_navigation] \n   [-bottom_navigation]  \n   [-auto_navigation]  \n   [-index_in_navigation]  \n   [-contents_in_navigation]  \n   [-next_page_in_navigation] \n   [-previous_page_in_navigation] \n   [-info string] \n   [-dont_include file(s)] \n   [-reuse] \n   [-show_section_numbers] \n   [-init_file Perl file] \n   [-h ] \n   file(s)\n"
    }
    
    # The bibliographic references, the appendices, the lists of figures and tables
    # etc. must appear in the contents table at the same level as the outermost
    # sectioning command. This subroutine finds what is the outermost level and
    # sets the above to the same level;
    sub set_depth_levels {
        # Sets $outermost_level
        local($level);
        foreach $level (part, chapter, section, subsection, subsubsection, paragraph) {
    	last if (($outermost_level) = /\\($level)$delimiter_rx/);
    	}
        $level = ($outermost_level ? $section_commands{$outermost_level} :
    	      do {$outermost_level = 'section'; 3;});
        $MAX_SPLIT_DEPTH = $MAX_SPLIT_DEPTH + $level;
        %section_commands = ('tableofcontents', $level, 'listoffigures', $level,
    			 'listoftables', $level, 'bibliography', $level,
    			 'tex2htmlindex', $level, %section_commands);
    }
    			
    # Now ignores accents which cannot be translated to ISO-LATIN-1 characters
    # Also replaces ?' and !' ....
    sub replace_strange_accents {
        # Modifies $_;
        s/\?`/&iso_map("questacute", "")/geo;
        s/!`/&iso_map("exclamacute", "")/geo;
    };
    			       	       
    # Creates a new directory 
    sub new_dir {
        local($_) = @_;
        local($answer, $success);
        mkdir($_, oct(755)) ||
    	do {print "Cannot create directory $_: $!\n";
    	    if ($REUSE) {
    		&reuse;}
    	    else {
    		while (! ($answer =~ /^[dqr]$/)) {
    		    print "(r) Reuse the images in the old directory OR\n".
    			"(d) *** DELETE *** $_ AND ITS CONTENTS OR\n".
    			    "(q) Quit ?\n:";
    		    $answer = scalar();
    		    if ($answer =~ /^d$/) {
    			`rm -r $_`;	# ******
    			&new_dir($_);
    			return(1);}
    		    elsif ($answer =~ /^q$/) {
    			die "Bye!\n";}
    		    elsif ($answer =~ /^r$/) {
    			&reuse;
    			return(1);}
    		    else {print "Please answer r d or q!\n";};} };
    	};
    }
    
    sub reuse {
       print "Reusing directory $_:\n";
       local($key);
       require("$_/images.pl") if (-f "$_/images.pl");
       1;
    }
    
    # Given a filename or a directory it returns the file and the full pathname
    # relative to the current directory.
    sub get_full_path {
        local($file) = @_;
        local(@path, $path);
        if (-d $file) {		# $file is a directory
    	$path = &make_directory_absolute($file);
    	$file = '';
        }
        elsif ($file =~ /\//) {
    	@path = split(m;/;,$file);
    	$file = pop(@path);
    	$path = join('/',@path);
    	chdir $path;
    	$path = &make_directory_absolute($path);
        }
        elsif (-f $file) {		# $file is a plain file
    	$path = &getcwd;
        }
        ($path, $file);
    }
    
    
    # Given a relative filename from the directory in which the original
    # latex document lives, it tries to expand it to the full pathname.
    sub fulltexpath {
        # Uses $texfilepath defined in sub driver
        local($file) = @_;
        $file =~ s/\s//g;
        $file = "$texfilepath/$file" unless ($file =~ /\//);  # name begins with a /
        $file;
    }
    
    # This should not have been necessary if tokenization was done properly ...
    # It inserts a space after anything that $rx$delimiter_rx matches
    sub tokenize {
        local($rx) = @_;
        # Modifies $_;
        s/$rx$delimiter_rx/\\\1 \2/g 
    	if $rx;
    }
    
    # When part of the input text contains special perl characters and the text 
    # is to be used as a pattern then these specials must be escaped.
    sub escape_rx_chars {
        local($_) = @_;
        s/(\W)/\\$1/g;
        $_;
    }
    
    # Does not do much but may need it later ...
    # The document environment has to be removed because it spans
    # more than one sections (the translator can only deal with
    # environments wholly contained with sections). 
    
    # (Does a little more now ... the end of the preamble is now marked
    # with an internally-generated command which causes all output
    # erroneously generated from unrecognized commands in the preamble
    # to vanish --- rst).
    
    sub remove_document_env {
        s/\\begin$match_br_rx[d]ocument$match_br_rx/\\latextohtml_ditch_preceding /o;
        s/\\end$match_br_rx[d]ocument$match_br_rx//o;
    }
    
    # And here's the code to handle the marker ...
    
    sub do_cmd_latextohtml_ditch_preceding {
        $before = '';
        @_[0];
    }
    
    sub cleanup {
        do {
    	rename("$$_images.tex", "images.tex");
    	rename("$$_images.log", "images.log");
    	open(FILE, "echo $$* *.pbm | tr -s ' \t\r\f' '\\012\\012\\012\\012'|");
    	while () {chop; `rm $_` if (-f $_);};
    	close(FILE)}
        unless $DEBUG;
    }
    
    sub make_name {
        local($sec_name, $packed_curr_sec_id) = @_;
        # Remove 0's from the end of $packed_curr_sec_id
        $packed_curr_sec_id =~ s/(_0)*$//;
        $packed_curr_sec_id =~ s/^0$//o; # Top level file
        join("",($packed_curr_sec_id ? "node". ++$OUT_NODE : $sec_name), ".html");
    }
    
    sub make_first_key {
        local($_);
        $_ = ('0 ' x keys %section_commands);
        chop;
        $_;
    }
     
    # This copies the preamble into the variable $preamble.
    sub make_preamble {
        local($cmd, $before, $after);
        ($preamble) = /$preamble_rx/o;
        &revert_to_raw_tex($preamble);
    }
    	
    # It is necessary to filter some parts of the document back to raw 
    # tex before passing them to latex for processing.
    sub revert_to_raw_tex {
        local($_) = @_;
        while (s/$O\s*\d+\s*$C/\{/o) {
    	s/$&/\}/;
    	}
        # The same for processed markers ...
        while (s/$OP\s*\d+\s*$CP/\{/o) {
    	s/$&/\}/;
    	}
        # Replace any verbatim markers ...
        &revert_verbatim;
        &revert_verb;
        s/$tex2html_wrap_rx//go;
        s/($html_specials_inv_rx)/$html_specials_inv{$1}/geo;
        # Need an ISO-LATIN-1 inverse mappin here ...
        $warnings .= "\nAn ISO-LATIN-1 character (&#$1) has crept in the source text."
    	if (/$iso_latin1_character_rx/o);
        $_;
    }
    
    sub next_wrapper {
        local($dollar) = @_;
        local($_);
        $wrap_toggle = (($wrap_toggle eq 'end') ? 'begin' : 'end');
        $max_id++;	   
        $_ = "\\$wrap_toggle$O$max_id$C"."tex2html_wrap$O$max_id$C";
        $_ = (($wrap_toggle eq 'end') ? $dollar.$_ : $_.$dollar);
        $_;
    }
    
    sub make_wrapper {
        &make_any_wrapper(@_[0], "tex2html_wrap");
    }
    
    sub make_any_wrapper {
        local($toggle,$kind) = @_;
        $max_id++;	
        '\\'. (($toggle) ? 'begin' : 'end') . "$O$max_id$C"."$kind$O$max_id$C";
    }
    
    sub get_last_word {
        # Returns the last word in multi-line strings
        local($_) = @_;
        local ($word);
        &extract_pure_text;
        while (/\s(\S+)\s*$/g) {
    	$word = $1;}
        $word;
    }
    
    sub get_first_words {
        # Returns the first word 
        local($_, $min) = @_;
        local($words,$i);
        &extract_pure_text;
        foreach (/[\S]*\s*/g) {
    	$words .= $_; 
    	last if (++$i >= $min);
        }
        $words =~ s/\s*[,]\s*//;
        $words;
    }
    
    sub replace_word {
        # Replaces the LAST occurrence of $old with $new in $str;
        local($str, $old, $new) = @_;
        substr($str,rindex($str,$old),length($old)) = $new;
        $str;
    }
    
    # Returns the recognised sectioning commands as a string of alternatives
    # for use in regular expressions;
    sub get_current_sections {
        local($_, $key);
        foreach $key (keys %section_commands) {
    	if ($key =~ /star/) {
    	    $_ = $key . "|" . $_}
    	else {
    	    $_ .= "$key[*]?" . "|";
    	}}
        chop;			# Remove the last "|".
        $_;
    }
    
    sub numerically { 
        local(@x) = split(' ',$a);
        local(@y) = split(' ',$b);
        local($i);
        for($i=0;$i<$#x;$i++) {
           last if ($x[$i] <=> $y[$i]);
        }
    }
    
    # If a normalized command name exists, return it.
    sub normalize {
        local($cmd) = @_;
        local($ncmd);
        # Escaped special LaTeX characters
        if ($cmd =~ /^($latex_specials_rx)/) {
    	$cmd = "&" if ($cmd =~ /&/);
    	$after = join('', $cmd, $after);
    	$cmd = ""}
        elsif ($ncmd = $normalize{$cmd}) {
    	$ncmd}
        elsif ($cmd =~ s/[*]$/star/) {
    	$cmd }
        elsif ($cmd =~ s/^\@/tex/) {
    	$cmd}
        else {$cmd}
    }
    
    sub normalize_sections {
        s/$sections_rx/'\\' . &normalize($1) . $2/geo;
    }
    
    sub embed_image {
        local($url,$type,$external,$thumbnail) = @_;
        local($extern_image_mark);
        if ($thumbnail) {
    	$extern_image_mark = "\"\""}
        else {
    	$type = "($type)" unless ($type =~ /tex2html_wrap/);
    	$extern_image_mark = &extern_image_mark}
        (($external || $thumbnail || $EXTERNAL_IMAGES) ?
         &make_href($url, $extern_image_mark) :
         "");
    }
    
    sub save_labels_in_file {
        local($label,$file);
        open(LABELS,">labels.pl");
        print LABELS "# LaTeX2HTML $TEX2HTMLVERSION\n";
        print LABELS "# Associate symbolic labels with physical files.\n\n";
        while (($label,$file) = each %ref_files) {
    	print LABELS "\$external_labels{\"$label\"} =" .
                         "\"\$URL/$file\"; \n"};
        print LABELS "\n1;\n\n";
    }
    
    sub save_image_cache_in_file {
        local($uutxt,$file);
        open(FILE,">images.pl");
        print FILE "# LaTeX2HTML $TEX2HTMLVERSION\n";
        print FILE "# Associate image original text (scrambled) with physical files.\n\n";
        while (($uutxt,$file) = each %cached_env_img) {
    	print FILE "\$key = q/$uutxt/;\n";
    	print FILE "\$cached_env_img{\$key} =" .
                       "\'$file\'; \n"};
        print FILE "\n1;\n\n";
    }
    
    # returns true if $AUTO_NAVIGATION is on and there are more words in $_
    # than $WORDS_IN_PAGE
    sub auto_navigation {
        # Uses $_;
        ($AUTO_NAVIGATION && ( (scalar split(/\W*\s+\W*/, $_)) > $WORDS_IN_PAGE));
    }
    
    # Returns true if $f1 is newer than $f2
    sub newer {
        ($f1,$f2) = @_;
        local(@f1s) = stat($f1);
        local(@f2s) = stat($f2);
        ($f1s[9] > $f2s[9]);
    };
    
    sub iso_map {
        local($char, $kind) = @_;
        $iso_latin1_character_map{"$char$kind"};
    }
    
    ####################### Code Generation Subroutines ############################
    # This takes a string of commands followed by optional or compulsory
    # argument markers and generates a subroutine for each command that will
    # ignore the command and its arguments.
    # The commands are separated by newlines and have the format:
    ##      #{}# []# {}# [] etc. 
    # {} marks a compulsory argument and [] an  optional one.
    sub ignore_commands {
        local($_) = @_;
        foreach (/.*\n?/g) {
    	s/\n//g;
    	# For each line
    	local($cmd, @args) = split('#',$_);
    	next unless $cmd;
    	$cmd =~ s/ //;
    	++$ignore{$cmd};
    	do {
    	    # Replace the argument markers with appropriate patterns
    	    grep(($_ = do { if (/\{\}/) {
    		's/$next_pair_pr_rx//o || print "\nCannot find argument for $cmd!\n";';}
    			    elsif (/\[\]/) {
    				'&get_next_optional_argument;';}
    			    else {	# We have arbitrary code - just add it
    				"$_".';';}
    			})
    		 ,@args);
    	    # Generate a new subroutine
    	    eval "sub do_cmd_$cmd {" . 'local($_) = @_;'  . join('',@args) . '$_}';
    	} if (@args);
        }
    }
    
    sub ignore_numeric_argument {
        # Chop this off
        s/\s*=?\s*[+-]?[\d\.]+(cm|em|ex|in|pc|pt|mm)//o;
    }
      
    sub process_in_latex_helper {
        local($cmd) = @_;
        ($ASCII_MODE ? "[$cmd]" : &process_in_latex("\\$cmd"))
        }
    	
    # *Generates* subroutines to handle each of the declarations 
    # like \em, \quote etc., in case they appear with the begin-end 
    # syntax.
    sub generate_declaration_subs {
        local($key, $val);
        while ( ($key, $val) = each %declarations) {
    	if ($val) {
    	    eval "sub do_env_$key {" 
                          . 'local($_) = @_;' 
    		      . "\"<$val> " . '$_'  . "\"};";
    	}
        }
    }
    
    # *Generates* subroutines to handle each of the sectioning commands.
    sub generate_sectioning_subs {
        local($key, $val);
        while ( ($key, $val) = each %section_headings) {
    	eval "sub do_cmd_$key {" 
                          . 'local($after) = @_;'
    			  . '&do_cmd_section_helper(' . $val . ',' . $key . ');}';
    	# Now define the *-form of the same commands. The difference is that the 
    	# $key is not passed as an argument.
    	eval "sub do_cmd_$key" . "star {" 
                          . 'local($after) = @_;'
    			  . '&do_cmd_section_helper(' . $val . ');}';
        }
    }
    
    # Uses $after which is defined in the caller (the caller is a generated subroutine)
    # Also uses @curr_sec_id
    sub do_cmd_section_helper {
        local($H,$key) = @_;
        local($section_number, @tmp);
        # if we have a $key the current section is not of the *-form, so we need
        # to update the counters.
        do {
    	$latex_body .= "\\stepcounter{$key}\n";
        } if $key;
        # Ignore optional toc entry
        local($_) = $after; &get_next_optional_argument; $after = $_;
        $after =~ s/$next_pair_rx/do {$TITLE = $2; ''}/eo;
        $TITLE =~ s/\\(label|index)$any_next_pair_rx//;
        $TITLE = &translate_commands(&translate_environments($TITLE));
        $_ = $TITLE;  &text_cleanup;
        $TITLE = $_ ;
        # This is the LaTeX section number read from the $FILE.aux file
        @tmp = split(/$;/,$encoded_section_number{&encode_title($_)});
        $section_number = shift(@tmp);
        $section_number = "" if ($section_number eq "-1");
        $encoded_section_number{&encode_title($_)} = join($;, @tmp);
        $TITLE = "$section_number " . $_;
        $after = join('', &make_section_heading($TITLE, $H), $after);
        $TITLE =~ s/

    //g; # Remove newlines $after;} ############################ Environments ################################ # The following list environment subroutines still do not handle # correctly the case where the list counters are modified (e.g. \alph{enumi}) # and the cases where user defined bullets are mixed with the default ones. # e.g. \begin{enumerate} \item[(1)] one \item two \end{enumerate} will # not produce the same bullets as in the dvi output. sub do_env_itemize { local($_) = @_; if (/$item_description_rx/) { # Contains user defined optional labels &do_env_description($_, "COMPACT")} else { &list_helper($_,'UL'); } } sub do_env_enumerate { local($_) = @_; if (/$item_description_rx/) { # Contains user defined optional labels &do_env_description($_, "COMPACT")} else { &list_helper($_,'OL'); } } sub do_env_list { local ($_) = @_; local ($list_type) = 'UL'; s/$next_pair_rx//; # Ditch the label specifier s/$next_pair_rx//; # Ditto the length declarations ... # but we may want to switch to enumerated style # if they include a \usecounter. $list_type = 'OL' if $1 =~ /\\usecounter/; &list_helper($_, $list_type); } sub do_env_description { local($_, $compact) = @_; $compact = "" unless $compact; s/$item_description_rx/

    \1\n
    /g; # and just in case the description is empty ... s/\\item$delimiter_rx/
    \1/g; "
    $_ \n
    "; } sub list_helper { local($_, $tag) = @_; # This deals with \item[xxx] ... s/$item_description_rx/
    \1\n
    /g; s/\\item$delimiter_rx/
  • \1/g; "<$tag>$_"; } sub do_env_cente { local($_) = @_; "

    $_

    "; } sub do_env_verse { local($_) = @_; "

    \n$_

    "; } sub do_env_abstract { local($_) = @_; &make_abstract($_); } sub do_env_minipage { local($_) = @_; &get_next_optional_argument; s/$next_pair_rx//o; $_; } sub do_env_thebibliography { # Sets $citefile and $citations defined in translate local($_) = @_; $citefile = $CURRENT_FILE; s/$next_pair_rx//; s/^\s*$//g; # Remove empty lines (otherwise will have paragraphs!) $citations = join('',"

    ", &translate_commands(&translate_environments($_)),"
    "); join('','

    ' , "

    References

    \n$bbl_mark"); } # IGNORE the contents of this environment - We construct our own index sub do_env_theindex { ""; } # This is defined in html.sty sub do_env_comment { ""; } ################# Post Processing Latex Generated Images ################ # A subroutine of the form post_latex_do_env_ can be used to # format gifs that have come back from latex # Do nothing (avoid the paragraph breaks) sub post_latex_do_env_figure { @_[0]; } sub post_latex_do_env_table { @_[0]; } ############################ Commands ################################### # Capitalizes what follows the \sc declaration # *** POTENTIAL ERROR **** # (This is NOT the correct meaning of \sc in the cases when it # is followed by another declaration (e.g. \em). # The scope of \sc should be limited to the next occurence of a # declaration. sub do_cmd_sc { local($_) = @_; tr/a-z/A-Z/; $_; } # This is supposed to put the font back into roman. # Since there is no HTML equivalent for reverting # to roman we keep track of the open font tags in # the current context and close them. # *** POTENTIAL ERROR ****# # This will produce incorrect results in the exceptional # case where \rm is followed by another context # containing font tags of the type we are trying to close # e.g. {a \bf b \rm c {\bf d} e} will produce # a b c d e # i.e. it should move closing tags from the end sub do_cmd_rm { local($_, @open_font_tags) = @_; local($next); for $next (@open_font_tags) { $next = ($declarations{$next}); s/<\/$next>//; $_ = join('',"<\/$next>",$_); } $_; } sub do_cmd_title { local($_) = @_; &get_next_optional_argument; local($rest) = $_; $rest =~ s/$next_pair_pr_rx//o; $_ = $&; &text_cleanup; ($t_title) = $_; $TITLE = $t_title if ($TITLE eq $default_title); $TITLE =~ s/

    //g; # Remove Newlines $rest; } sub do_cmd_author { local($_) = @_; s/$next_pair_pr_rx//o; ($t_author) = $&; $_; } sub do_cmd_date { local($_) = @_; s/$next_pair_pr_rx//o; ($t_date) = $&; $_; } sub do_cmd_maketitle { join('', "

    $t_title

    \n", "

    $t_author

    \n", do {"

    $t_date

    \n" if $t_date;} , @_[0]); } sub do_cmd_abstract { local($_) = @_; s/$next_pair_pr_rx//o; local($abstract) = $&; join('',&make_abstract($abstract), $_); } sub make_abstract { local($_) = @_; join('',"

    Abstract:

    \n$_

    \n"); } sub do_cmd_today { local($today) = (`date`); join('',$today,@_[0]); } sub do_cmd_ldots { join('',"...",@_[0]); } sub do_cmd_hrule { join('',"


    ", @_[0]); } sub do_cmd_space { join(''," ",@_[0]); } sub do_cmd_par { join('',"

    ",@_[0]); } sub do_cmd_medskip { join('',"

    ",@_[0]); } sub do_cmd_smallskip { join('',"

    ",@_[0]); } sub do_cmd_bigskip { join('',"

    ",@_[0]); } sub do_cmd_slash { join('',"/",@_[0]); } sub do_cmd_esc_slash { @_[0]; } ################## Commands to be processed by Latex ################# # # The following commands are passed to Latex for processing. # They cannot be processed at the same time as normal commands # because their arguments must be left untouched by the translator. # (Normally the arguments of a command are translated before the # command itself). # # In fact, it's worse: it is not correct to process these # commands after we process environments, because some of them # (for instance, \parbox) may contain unknown or wrapped # environments. If math mode occurs in a parbox, the # translate_environments routine should *not* process it, lest # we encounter the lossage outlined above. # # On the other hand, it is not correct to process these commands # *before* we process environments, or figures containing # parboxes, etc., will be mishandled. # # So, the only way to handle these commands is to wrap them up # in null environments, as for math mode, and let translate_environments # (which can handle nesting) figure out which is the outermost. # # Incidentally, we might as well make these things easier to configure... sub process_commands_in_tex { local($_) = @_; foreach (/.*\n?/g) { chop; # For each line local($cmd, @args) = split('#',$_); next unless $cmd; $cmd =~ s/ //g; # Build routine body ... local ($body, $code) = ("", ""); foreach (@args) { if (/\{\}/) { $body .= '$args .= $& if s/$next_pair_rx//o;' . "\n" } elsif (/\[\]/) { $body .= '($dummy, $pat) = &get_next_optional_argument;' . "\n". '$args .= $pat;'; } else { $body .= $_ . "\n"; } } # Generate a new subroutine $code = "sub wrap_cmd_$cmd {" . "\n" .'local($cmd, $_) = @_; local ($args, $dummy, $pat) = "";' . "\n" .$body . "\n" .'(&make_wrapper(1) . $cmd.$args . &make_wrapper(0), $_)}' ."\n"; eval $code; # And make sure the main loop will catch it ... $raw_arg_cmds{$cmd} = 1; } } # Used in initialisation to build a regexp ... sub get_raw_arg_cmds { local($_, $key); foreach $cmd (keys %raw_arg_cmds) { $_ .= $cmd . "|"; } chop; # Remove the last "|". $_; } # Invoked before actual translation; wraps these commands in # tex2html_wrap environments, so that they are properly passed to # TeX in &translate_environments ... sub wrap_raw_arg_cmds { local ($processed_text, $delim, $cmd, $wrapper, $wrap); while (/$raw_arg_cmd_rx/) { $processed_text .= $`; ($cmd, $delim) = ($1, $2); $wrapper = "wrap_cmd_$1"; ($wrap, $_) = &$wrapper ("\\$cmd", "$delim$'"); $processed_text .= $wrap; } $processed_text . $_; } ######################################################################### # To make a table of contents, list of figures and list of tables commands # create a link to corresponding files which do not yet exist. # The binding of the file variable in each case acts as a flag # for creating the actual file at the end, after all the information # has been gathered. sub do_cmd_tableofcontents { local($_) = @_; $tocfile = $CURRENT_FILE; $TITLE = 'Contents'; join('', '

    ', &make_section_heading("Contents", "H2"), $toc_mark, $_); } sub do_cmd_listoffigures { local($_) = @_; $TITLE = 'List of Figures'; $loffile = $CURRENT_FILE; join('', '

    ' , &make_section_heading("List of Figures", "H2"), $lof_mark, $_); } sub do_cmd_listoftables { local($_) = @_; $TITLE = 'List of Tables'; $lotfile = $CURRENT_FILE; join('', '

    ' , &make_section_heading("List of Tables", "H2"), $lot_mark, $_); } # $idx_mark will be replaced with the real index at the end sub do_cmd_tex2htmlindex { local($_) = @_; $TITLE = 'Index'; $idxfile = $CURRENT_FILE; join('','

    ' , &make_section_heading("Index", "H2"), $idx_mark, $_); } sub do_cmd_footnote { local($_) = @_; s/$next_pair_pr_rx//o; local($br_id, $footnote) = ($1, $2); &process_footnote($footnote); join('',&make_href("$footfile#$br_id",$footnote_mark),$_); } sub do_cmd_thanks { &do_cmd_footnote(@_); } sub do_cmd_footnotemark { local($_) = @_; &get_next_optional_argument; # Don't use ()'s for the optional argument! s/\\footnotetext\[?[^]]*\]?\s*$any_next_pair_pr_rx//o; local($br_id, $footnote) = ($1, $2); if ($footnote) { &process_footnote($footnote); $_ = join('',&make_href("$footfile#$br_id",$footnote_mark),$_);} else { print "\nCannot find \\footnotetext";}; $_; } # Under normal circumstances this is never executed. Any commands \footnotetext # should have been processed when the corresponding \footnotemark was # encountered. It is possible however that when processing pieces of text # out of context (e.g. \footnotemarks in figure and table captions) # the pair of commands gets separated. Until this is fixed properly, # this command just puts the footnote in the footnote file in the hope # that its context will be obvious .... sub do_cmd_footnotetext { local($_) = @_; s/$next_pair_pr_rx//o; local($br_id, $footnote) = ($1, $2); &process_footnote($footnote) if $footnote; $_; } sub process_footnote { # Uses $before # Sets $footfile defined in translate # Modifies $footnotes defined in translate local($footnote) = @_; local($last_word) = &get_last_word($before); $footfile = "footnode.html"; $footnotes .= "

    ...$last_word
    " . &translate_commands($footnote) . "\n
    " . "

    \n" x 30 . "

    "; } # This just changes the depth of section so that an appendix is at the # outermost level. sub do_cmd_appendix { $latex_body .= "\\appendix\n"; $section_commands{'section'} = $section_commands{$outermost_level}; @_[0]; } sub do_cmd_ref { local($_) = @_; &process_ref($cross_ref_mark,$cross_ref_mark); } sub do_cmd_pageref { local($_) = @_; &process_ref($cross_ref_mark,$cross_ref_visible_mark); } # This is used by external style files ... sub process_ref { local($ref_mark, $visible_mark, $use_label) = @_; local($label,$id); s/$next_pair_pr_rx/($id, $label) = ($1, $2);''/eo; if ($label) { # if $use_label is 1 then $label is used as the cross_ref_mark # elseif $use_label is a string then $use_label is used # else the usual mark will be used $use_label = ( (($use_label == 1) && $label) || $use_label); $label =~ s/\W//g; # Remove non alphanumeric characters $symbolic_labels{"$label$id"} = $use_label; # The quotes around the HREF are inserted later join('',"$visible_mark<\/A>",$_); } else { print "Cannot find label argument after <$last_word>\n"; $_;} } # Uses $CURRENT_FILE defined in translate sub do_cmd_label { local($_) = @_; s/$next_pair_pr_rx//o; local($label) = $2; &anchor_label($label,$CURRENT_FILE,$_); } # This subroutine is also used to process labels in undefined environments sub anchor_label { # Modifies entries in %ref_files defined in translate local($label,$filename,$context) = @_; $label =~ s/\W//g; # Remove non alphanumeric characters # Associate the label with the current file $ref_files{$label} = $filename; join('',"$anchor_mark",$context); } # This just creates a link from a label (yet to be determined) to the # cite_key in the citation file. sub do_cmd_cite { local($_) = @_; s/$next_pair_pr_rx//o; local($cite_key) = $2; if ($cite_key) { foreach $cite_key (split(/,/,$cite_key)) { $cite_key =~ s/\W//g; # Remove non alphanumeric characters # The proper link $citefile#$cite_key will be substituted later # along with the quotes $_ = join('',"$cite_mark<\/A>",$_); }} else {print "Cannot find citation argument\n";} $_; } sub do_cmd_index { local($_) = @_; s/$next_pair_pr_rx//o; local($br_id, $str) = ($1, $2); join('',&make_index_entry($br_id,$str),$_); } # This command will only be encountered inside a thebibliography environment. sub do_cmd_bibitem { local($_) = @_; # The square brackets may contain the label to be printed local($label, $dummy) = &get_next_optional_argument; $label = ++$bibitem_counter unless $label; # Numerical labels s/$next_pair_pr_rx//o; $cite_key = $2; if ($cite_key) { $cite_key =~ s/\W//g; # Remove non alphanumeric characters # Associate the cite_key with the printed label. # The printed label will be substituted back into the document later. $cite_info{$cite_key} = $label; # Create an anchor around the citation join('',"
    $label
    ", $_); } else { print "Cannot find bibitem labels\n";} } # This just reads in the $FILE.bbl file if it is available and appends # it to the items that are still to be processed. # The $FILE.bbl should contain a thebibliography environment which will # cause its contents to be processed later in the appropriate way. # (Note that it might be possible for both the \bibliography command and # the thebibliography environment to be present as the former may have been # added by the translator as a sectioning command. In this case (both present) # the $citefile would have already been set by the thebibliography environment) sub do_cmd_bibliography { local($after) = @_; $after =~ s/$next_pair_rx//o; local($bibfile) = $2; $TITLE = 'References'; do { unless ($citefile) { $citefile = $CURRENT_FILE; if (&process_ext_file("bbl")) { # *** BINDS $_ as a side effect *** $after = join('',$_,$after);} else { print "\nCannot open $FILE.bbl $!\n"; $warnings .= "\nThe bibliography file was not found."; $after = join('',"

    No References!

    \n", $after); }} } if $bibfile; $after; } sub do_cmd_textohtml_info_page { local($_) = @_; ( ($INFO == 1) ? join('', "$t_title

    \nThis document was generated using the LaTeX2HTML translator Version $TEX2HTMLVERSION Copyright © 1993, 1994, Nikos Drakos, Computer Based Learning Unit, University of Leeds.

    The command line arguments were:
    latex2html $argv.

    The translation was initiated by $address_data[0] on $address_data[1]", $_) : join('',$INFO,$_)) } sub do_cmd_dbackslash { local($_) = @_; &get_next_optional_argument; join('',"
    ",$_); } ################## Commands used in the $FILE.aux file ####################### # This is used in $FILE.aux sub do_cmd_newlabel { local($_) = @_; local($label,$val,$tmp); s/$next_pair_pr_rx/$label = $2;''/eo; s/$next_pair_pr_rx/$tmp=$2;''/eo; $tmp =~ s/$next_pair_pr_rx/$val=$2/eo; $label =~ s/\W//g; # Remove non alphanumeric characters $latex_labels{$label} = $val; $_; } # Before normalizing this was \@writefile # Sets %encoded_(section|figure|table)_number which maps encoded section titles to LaTeX numbers sub do_cmd_texwritefile { local($after) = @_; local($type,$arg,$tmp,$title,$number,%hash,$stype); # The form of the expression is: # \@writefile{TOC} {... {SECTION} {... {SECTION_NUMBER} TITLE}{PAGE}} $after =~ s/$next_pair_pr_rx/$type = $2;''/eo; # Chop off {TOC} $after =~ s/$next_pair_pr_rx/$arg = $2;''/eo; # Get {... {SECTION} {... }} $hash{"toc"} = "section" if $SHOW_SECTION_NUMBERS; $hash{"lof"} = "figure"; $hash{"lot"} = "table"; if ($type) { $arg =~ s/$next_pair_pr_rx/$stype = $2; ''/eo; # This is section|subsection etc $arg =~ s/$next_pair_pr_rx/$tmp = $2; ''/eo; $tmp =~ s/$next_pair_pr_rx/$number = $2; ''/eo; if ($type eq "toc" && $stype eq "part") { while ($tmp =~ s/$next_pair_pr_rx//o) {}; $number =~ tr/a-z/A-Z/; $number = "Part $number:"} # This cause problem when picking figure numbers... # while ($tmp =~ s/$next_pair_pr_rx//o) {}; $_ = $tmp; &text_cleanup; $title = &encode_title($_); eval "\$encoded_$hash{$type}_number{\$title} = ((\$encoded_$hash{$type}_number{\$title} ne \"\") ? \$encoded_$hash{$type}_number{\$title} . \$; : \"\") . ((\$number ne \"\") ? \$number : \"-1\")"; } $after; } ########################### Counter Commands ################################# sub do_cmd_newcounter { local($_) = @_; local($ctr,$dummy,$pat); s/$next_pair_rx/$ctr=$2;''/eo; ($dummy,$pat) = &get_next_optional_argument; $latex_body .= &revert_to_raw_tex("\\newcounter{$ctr}$pat\n") unless ($preamble =~ /\\newcounter{$ctr}/); $_; } sub do_cmd_addtocounter { local($_) = @_; local($ctr,$num); s/$next_pair_rx/$ctr = $2;''/eo; s/$next_pair_rx/$num = $2;''/eo; $latex_body .= &revert_to_raw_tex("\\addtocounter{$ctr}{$num}\n"); $_; } sub do_cmd_setcounter { local($_) = @_; local($ctr,$num); s/$next_pair_rx/$ctr = $2;''/eo; s/$next_pair_rx/$num = $2;''/eo; $latex_body .= &revert_to_raw_tex("\\setcounter{$ctr}{$num}\n"); $_; } sub do_cmd_stepcounter { local($_) = @_; local($ctr); s/$next_pair_rx/$ctr = $2;''/eo; $latex_body .= &revert_to_raw_tex("\\stepcounter{$ctr}\n"); $_; } sub do_cmd_refstepcounter { local($_) = @_; local($ctr); s/$next_pair_rx/$ctr = $2;''/eo; $latex_body .= &revert_to_raw_tex("\\refstepcounter{$ctr}\n"); $_; } sub do_cmd_arabic { local($_) = @_; local($ctr); s/$next_pair_rx/$ctr = $2;''/eo; join('',&process_in_latex_helper("arabic{$ctr}"),$_); } sub do_cmd_roman { local($_) = @_; local($ctr); s/$next_pair_rx/$ctr = $2;''/eo; join('',&process_in_latex_helper("roman{$ctr}"),$_); } sub do_cmd_Roman { local($_) = @_; local($ctr); s/$next_pair_rx/$ctr = $2;''/eo; join('',&process_in_latex_helper("Roman{$ctr}"),$_); } sub do_cmd_alph { local($_) = @_; local($ctr); s/$next_pair_rx/$ctr = $2;''/eo; join('',&process_in_latex_helper("alph{$ctr}"),$_); } sub do_cmd_Alph { local($_) = @_; local($ctr); s/$next_pair_rx/$ctr = $2;''/eo; join('',&process_in_latex_helper("Alph{$ctr}"),$_); } sub do_cmd_fnsymbol { local($_) = @_; local($ctr); s/$next_pair_rx/$ctr = $2;''/eo; join('',&process_in_latex_helper("fnsymbol{$ctr}"),$_); } sub do_cmd_thecounter { # Uses $counter bound by the caller join('',&process_in_latex_helper("the$counter"),@_); } ################# Accent and Special Symbols ################################## # Generate code for all the accents handling commands sub generate_accent_commands { local($accent); foreach $accent ("grave", "acute", "circ", "uml", "tilde", "ring", "overscore", "u", "v", "H", "t", "d", "b") { eval "sub do_cmd_$accent {" . 'local($_) = @_;' . "&replace_next_char($accent);" . '$_}'; } } # This handles the \c command sub do_cmd_c { local($_) = @_; &replace_next_char("cedil"); $_; } sub do_cmd_i { join('',"i",@_[0]); } sub do_cmd_ae { join('', &iso_map("ae", "lig"), @_[0]);} sub do_cmd_AE { join('', &iso_map("AE", "lig"), @_[0]);} sub do_cmd_aa { join('', &iso_map("a", "ring"), @_[0]);} sub do_cmd_AA { join('', &iso_map("A", "ring"), @_[0]);} sub do_cmd_o { join('', &iso_map("o", "slash"), @_[0]);} sub do_cmd_O { join('', &iso_map("O", "slash"), @_[0]);} sub do_cmd_ss { join('', &iso_map("sz", "lig"), @_[0]);} sub do_cmd_pounds { join('', &iso_map("pounds", ""), @_[0]);} sub do_cmd_S { join('', &iso_map("S", ""), @_[0]);} sub do_cmd_copyright { join('', &iso_map("copyright", ""), @_[0]);} sub do_cmd_P { join('', &iso_map("P", ""), @_[0]);} # Modifies $_ in the caller sub replace_next_char { local($type) = @_; local($arg, $first_char); ($arg) = &get_next_pair_or_char_pr; $arg =~ s/(\w)/$first_char = $1; ''/eo; $_ = join('', (&iso_map($first_char, $type) || $first_char), $arg, $_) } sub brackets { ($OP, $CP); } sub address_data { local($user, $date, $_); # Get author email address and current date. $user = (`whoami`) || print "Can't get user name\n"; $date = (`date`) || print "Can't get current date\n"; chop($date); if ($user) { chop($user); open(FINGER, "finger $user |") || print "Can't find 'finger'\n"; while($_ = ) { last if ( ($user) = /real life:(.*)$/io); } } ($user, $date); } # Given a directory name in either relative or absolute form, returns # the absolute form. sub make_directory_absolute { local($path) = @_; local($orig_cwd); if (! ($path =~ /^\//)) { # if $path doesn't start with '/' $orig_cwd = &getcwd; chdir $path; $path = &getcwd; chdir $orig_cwd; } $path; } ########################## Input and Include commands ######################### sub do_cmd_input { local($_) = @_; local($file); s/$next_pair_pr_rx/$file=$2;''/eo; $file = &revert_to_raw_tex("\\input{$file}\n") if $file; &add_to_preamble('include',$file); $_; } sub do_cmd_include { local($_) = @_; local($file); s/$next_pair_pr_rx/$file=$2;''/eo; $file = &revert_to_raw_tex("\\include{$file}\n") if $file; &add_to_preamble('include',$file); $_; } ############################ Initialization #################################### sub initialise { ############################ Global variables ############################### $OUT_NODE = 0; # Used in making filenames of HTML nodes unique $* = 1; # Enable multi-line patterns ($O , $C, $OP, $CP) = ('<<' , '>>', '<#', '#>'); # Open/Close Markers $name = 0; # Used in the HREF NAME= field $wrap_toggle = 'end'; $delim = '%:%'; # Delimits items of sectioning information # stored in a string $cross_ref_mark = ''; $external_ref_mark = ''; $cite_mark = ''; $bbl_mark = ''; $toc_mark = ''; $lof_mark = ''; $lot_mark = ''; $idx_mark = ''; $verbatim_mark = ''; $verb_mark = ''; $image_mark = ''; $bibitem_counter = 0; $undef_mark = ''; # This defines textual markers for all the icons # e.g. $up_visible_mark = ''; # They will be replaced with the real icons at the very end. foreach $icon (keys %icons) {eval "\$$icon = ''"}; %declarations = ('em' , 'em', 'it' , 'i', 'bf' , 'b', 'tt' , 'tt', 'sl' , 'i', # Oops! 'sf' , 'i', # Oops! 'boldmath' , 'b', 'quote', 'blockquote', 'quotation', 'blockquote', 'large', 'H5', 'Large', 'H4', 'LARGE', 'H3', 'huge', 'H2', 'Huge', 'H1', %declarations # Just in case someone extends it in the init file ); &generate_declaration_subs; # Generate code to handle declarations %section_commands = ('partstar' , '1' , 'chapterstar', '2', 'sectionstar', '3', 'subsectionstar', '4', 'subsubsectionstar', '5', 'paragraphstar', '6', 'subparagraphstar', '7', 'part' , '1' , 'chapter', '2', 'section', '3','subsection', '4', 'subsubsection', '5', 'paragraph', '6', 'subparagraph', '7' , 'slidehead', '3'); # The tableofcontents, listoffigures, listoftables, bibliography and # tex2htmlindex are set after determining what is the outermost level # in sub set_depth_levels. Appendix is implemented as a command. %section_headings = ('part' , 'H1' , 'chapter' , 'H1', 'section', 'H1', 'subsection', 'H2', 'subsubsection', 'H3', 'paragraph', 'H4', 'subparagraph', 'H5'); &generate_sectioning_subs; # Generates code to handle sectioning commands %section_headings = ('partstar' , 'H1' , 'chapterstar' , 'H1', 'sectionstar', 'subsectionstar', 'H1', 'H2', 'subsubsectionstar', 'H3', 'paragraphstar', 'H4', 'subparagraphstar', 'H5'); # These need their own custom code but are treated as sectioning commands %section_headings = ('tableofcontents', 'H1', 'listoffigures', 'H1', 'listoftables', 'H1', 'bibliography', 'H1', 'tex2htmlindex', 'H1', %section_headings); &generate_accent_commands; # Code to handle accent commands # These are replaced as soon as the text is read in. %html_specials = ('<', ';tex2html_html_special_mark_lt;' , '>', ';tex2html_html_special_mark_gt;', '&', ';tex2html_html_special_mark_amp;', '"', ';tex2html_html_special_mark_quot;'); # This mapping is needed in sub revert_to_raw_tex # before passing stuff to latex for processing. %html_specials_inv = ( ';tex2html_html_special_mark_lt;' ,'<', ';tex2html_html_special_mark_gt;','>', ';tex2html_html_special_mark_amp;','&', ';tex2html_html_special_mark_quot;','"'); # For some commands such as \\, \, etc it is not possible to define # perl subroutines because perl does not allow some non-ascii characters # in subroutine names. So we define a table and a subroutine to relate # such commands to ascii names. %normalize = ('\\', 'dbackslash', '/', 'esc_slash', "`", 'grave', "'", 'acute', "^", 'circ', '"', 'uml', '~', 'tilde', '.', 'ring', '=', 'overscore'); # %languages_translations holds for each known language the # appropriate translation function. The function is called in # slurp_input. # The translation functions subtitute LaTeX macros # with ISO-LATIN-1 character references %language_translations = ('english', 'english_translation', 'USenglish', 'english_translation', 'original', 'english_translation', 'german', 'german_translation', 'austrian', 'german_translation', 'french', 'french_translation' ); # Inclusion in this list will cause a command or an environment to be ignored. # This is suitable for commands without arguments and for environments. # If however a do_env|cdm_ exists then it will be used. %ignore = ('sloppypar', 1, 'document', 1, 'mbox', 1, 'newblock', 1, ',', 1, '@', 1, ' ', 1, '-', 1, 'tiny', 1, 'scriptsize', 1, 'footnotesize', 1, 'small' , 1, 'normalsize', 1, 'sloppy', 1, 'hyphen', 1, 'titlepage', 1, 'htmlonly', 1, 'flushleft', 1, 'flushright', 1, 'slide', 1, %ignore); # Specify commands with arguments that should be ignored. # Arbitrary code can be placed between the arguments # to be executed while processing the command. # # Note that some commands MAY HAVE ARGUMENTS WHICH SHOULD BE LEFT AS TEXT # EVEN THOUGH THE COMMAND IS IGNORED (e.g. mbox, center, etc) &ignore_commands( <<_IGNORED_CMDS_); documentclass # [] # {} usepackage # [] # {} documentstyle # [] # {} pagestyle # {} thispagestyle # {} pagenumbering #{} linebreak# [] nolinebreak# [] pagebreak# [] # \$_ = join('',"

    ",\$_) nopagebreak #[] mbox makebox# []# [] newsavebox# {} sbox# {} savebox# {}# []# [] usebox # {} newlength# {} setlength# {}# {} addtolength# {}# {} settowidth# {}# {} newlength # {} hspace # {}# &ignore_numeric_argument hspacestar # {}# &ignore_numeric_argument vspace# {}# &ignore_numeric_argument vspacestar # {}# &ignore_numeric_argument newsavebox# {} samepage newpage clearpage hfill vfill vskip # &ignore_numeric_argument fill parindent # &ignore_numeric_argument nocite # {} bibliographystyle # {} center and makeindex marginpar reversemarginpar normalmarginpar markboth # {} # {} itemsep # &ignore_numeric_argument marginparsep # &ignore_numeric_argument marginparwidth # &ignore_numeric_argument headheight # &ignore_numeric_argument headsep # &ignore_numeric_argument textwidth # &ignore_numeric_argument textheight # &ignore_numeric_argument topskip # &ignore_numeric_argument footheight # &ignore_numeric_argument footskip # &ignore_numeric_argument parskip # &ignore_numeric_argument baselineskip # &ignore_numeric_argument raise # &ignore_numeric_argument lower # &ignore_numeric_argument kern # &ignore_numeric_argument space addcontentsline # {} # {} # {} addtocontents # {} rule # [] # {} # {} centerline indent noindent oddsidemargin # &ignore_numeric_argument evensidemargin # &ignore_numeric_argument topmargin # &ignore_numeric_argument protect typeout # {} relax numberline string ignorespaces contentsline bibdata bibstyle citation bibcite newfont # {} # {} end # {} _IGNORED_CMDS_ # Commands which need to be passed, ALONG WITH THEIR ARGUMENTS, to TeX. # Note that this means that the arguments should *not* be translated, # This is handled by wrapping the commands in the dummy tex2html_wrap # environment before translation begins ... # Also it can be used to specify environments which may be defined # using do_env_* but whose contents will be passed to LaTeX and # therefore should not be translated. # Note that this code squeezes spaces out of the args of psfig; # that's what the last round did ... # The ugliness involving \special is there because a TeX file with # a \special and nothing else in it generates no .dvi file (arguably # a bug in my version of TeX). So, we throw on a useless space ... &process_commands_in_tex (<<_RAW_ARG_CMDS_); psfig # {} # \$args =~ s/ //g; fbox # {} framebox # [] # [] # {} parbox # [] # {} # {} special # {} # \$args .= '\\\\ '; dag ddag l L oe OE LaTeX TeX _RAW_ARG_CMDS_ # This maps the HTML mnemonic names for the ISO-LATIN-1 character references # to their numeric values. When converting latex specials characters to # ISO-LATIN-1 equivalents I use the numeric values because this makes any # conversion back to latex (using revert_raw_tex) more reliable (in case # the text contains "&mnemonic_name"). Errors may occur if an environment # passed to latex (e.g. a table) contains the numeric values of character # references. %iso_latin1_character_map = ( 'AElig', 'Æ', # capital AE diphthong (ligature) 'Aacute', 'Á', # capital A, acute accent 'Acirc', 'Â', # capital A, circumflex accent 'Agrave', 'À', # capital A, grave accent 'Aring', 'Å', # capital A, ring 'Atilde', 'Ã', # capital A, tilde 'Auml', 'Ä', # capital A, dieresis or umlaut mark 'Ccedil', 'Ç', # capital C, cedilla 'ETH', 'Ð', # capital Eth, Icelandic 'Eacute', 'É', # capital E, acute accent 'Ecirc', 'Ê', # capital E, circumflex accent 'Egrave', 'È', # capital E, grave accent 'Euml', 'Ë', # capital E, dieresis or umlaut mark 'Iacute', 'Í', # capital I, acute accent 'Icirc', 'Î', # capital I, circumflex accent 'Igrave', 'Ì', # capital I, grave accent 'Iuml', 'Ï', # capital I, dieresis or umlaut mark 'Ntilde', 'Ñ', # capital N, tilde 'Oacute', 'Ó', # capital O, acute accent 'Ocirc', 'Ô', # capital O, circumflex accent 'Ograve', 'Ò', # capital O, grave accent 'Oslash', 'Ø', # capital O, slash 'Otilde', 'Õ', # capital O, tilde 'Ouml', 'Ö', # capital O, dieresis or umlaut mark 'THORN', 'Þ', # capital THORN, Icelandic 'Uacute', 'Ú', # capital U, acute accent 'Ucirc', 'Û', # capital U, circumflex accent 'Ugrave', 'Ù', # capital U, grave accent 'Uuml', 'Ü', # capital U, dieresis or umlaut mark 'Yacute', 'Ý', # capital Y, acute accent 'aacute', 'á', # small a, acute accent 'acirc', 'â', # small a, circumflex accent 'aelig', 'æ', # small ae diphthong (ligature) 'agrave', 'à', # small a, grave accent 'amp', 'amp;', # ampersand 'aring', 'å', # small a, ring 'atilde', 'ã', # small a, tilde 'auml', 'ä', # small a, dieresis or umlaut mark 'ccedil', 'ç', # small c, cedilla 'eacute', 'é', # small e, acute accent 'ecirc', 'ê', # small e, circumflex accent 'egrave', 'è', # small e, grave accent 'eth', 'ð', # small eth, Icelandic 'euml', 'ë', # small e, dieresis or umlaut mark 'gt', '>', # greater than 'iacute', 'í', # small i, acute accent 'icirc', 'î', # small i, circumflex accent 'igrave', 'ì', # small i, grave accent 'iuml', 'ï', # small i, dieresis or umlaut mark 'lt', '<', # less than 'ntilde', 'ñ', # small n, tilde 'oacute', 'ó', # small o, acute accent 'ocirc', 'ô', # small o, circumflex accent 'ograve', 'ò', # small o, grave accent 'oslash', 'ø', # small o, slash 'otilde', 'õ', # small o, tilde 'ouml', 'ö', # small o, dieresis or umlaut mark 'szlig', 'ß', # small sharp s, German (sz ligature) 'thorn', 'þ', # small thorn, Icelandic 'uacute', 'ú', # small u, acute accent 'ucirc', 'û', # small u, circumflex accent 'ugrave', 'ù', # small u, grave accent 'uuml', 'ü', # small u, dieresis or umlaut mark 'yacute', 'ý', # small y, acute accent 'yuml', 'ÿ', # small y, dieresis or umlaut mark # These do not have HTML mnemonic names ... 'pounds', '£', # pound sign 'S', '§', # section mark 'copyright', '©', # copyright mark 'P', '¶', # paragraph mark 'questacute', '¿', # question mark - upside down 'exclamacute', '¡', # exclamation mark - upside down # These are character types without arguments ... 'grave' , "`", 'acute' , "´", 'circ', '^', 'tilde', '~', 'ring', '·', 'overscore' , '¯', 'H', "´´", 'cedil', "¸" ); # Global variable $iso_latin1_character_map_inv ################### Frequently used regular expressions ################### # $1 : preamble $preamble_rx = "(^[\\s\\S]*)\\\\begin\\s*$O\\d+$C\\s*document\\s*$O\\d+$C"; $delimiters = '\'\\s[\\]\\\\<>(=).,#;:~\/!-'; $delimiter_rx = "([$delimiters])"; # $1 : br_id # $2 : $begin_env_rx = "[\\\\]begin\\s*$O(\\d+)$C\\s*([^$delimiters]+)\\s*$O\\1$C(\\s*\\n)?"; $match_br_rx = "\\s*$O\\d+$C\\s*"; $optional_arg_rx = "^\\s*\\[([^]]+)\\]"; # Cannot handle nested []s! # Matches a pair of matching brackets # $1 : br_id # $2 : contents $next_pair_rx = "^\\s*$O(\\d+)$C([\\s\\S]*)$O\\1$C"; $any_next_pair_rx = "$O(\\d+)$C([\\s\\S]*)$O\\1$C"; $any_next_pair_rx4 = "$O(\\d+)$C([\\s\\S]*)$O\\4$C"; # $1 : br_id $begin_cmd_rx = "$O(\\d+)$C"; # $1 : largest argument number $tex_def_arg_rx = "^[#0-9]*#([0-9])$O"; # $1 : declaration or command or newline (\\) $cmd_delims = q|-#,.~/\'`^"=|; # Commands which are also delimiters! # The tex2html_dummy is an awful hack .... $single_cmd_rx = "\\\\([$cmd_delims]|[^$delimiters]+|\\\\|(tex2html_dummy))"; # $1 : description in a list environment $item_description_rx = "\\\\item\\s*[[]\\s*((($any_next_pair_rx4)|([[][^]]*[]])|[^]])*)[]]"; $fontchange_rx = 'rm|em|bf|it|sl|sf|tt'; # Matches the \caption command # $1 : br_id # $2 : contents $caption_rx = "\\\\caption\\s*$O(\\d+)$C([\\s\\S]*)$O\\1$C"; # Matches the \htmlimage command # $1 : br_id # $2 : contents $htmlimage_rx = "\\\\htmlimage\\s*$O(\\d+)$C([\\s\\S]*)$O\\1$C"; # Matches a pair of matching brackets # USING PROCESSED DELIMITERS; # (the delimiters are processed during command translation) # $1 : br_id # $2 : contents $next_pair_pr_rx = "^\\s*$OP(\\d+)$CP([\\s\\S]*)$OP\\1$CP"; $any_next_pair_pr_rx = "$OP(\\d+)$CP([\\s\\S]*)$OP\\1$CP"; # This will be used to recognise escaped special characters as such # and not as commands $latex_specials_rx = '[\$]|&|%|#|{|}|_'; # This is used in sub revert_to_raw_tex before handing text to be processed # by latex. $html_specials_inv_rx = join("|", keys %html_specials_inv); # This is also used in sub revert_to_raw_tex $iso_latin1_character_rx = '&#(\d+);'; # Matches a \begin or \end {tex2html_wrap}. Also used be revert_to_raw_tex $tex2html_wrap_rx = '[\\\\](begin|end)\s*{\s*tex2html_wrap\s*}'; $meta_cmd_rx = '[\\\\](renewcommand|renewenvironment|newcommand|newenvironment|newtheorem|def)'; # Matches counter commands - these are caught ealry and are appended to the # file that is passed to latex. $counters_rx = "[\\\\](newcounter|addtocounter|setcounter|refstepcounter|stepcounter|arabic|roman|Roman|alph|Alph|fnsymbol)$delimiter_rx"; # Matches a label command and its argument $labels_rx = "[\\\\]label\\s*$O(\\d+)$C([\\s\\S]*)$O\\1$C"; # Matches environments that should not be touched during the translation $verbatim_env_rx = "\\s*{(verbatim|rawhtml)[*]?}"; # Matches icon markers $icon_mark_rx = "" } # Frequently used regular expressions with arguments sub make_end_env_rx { local($env) = @_; $env = &escape_rx_chars($env); "[\\\\]end\\s*$O(\\d+)$C\\s*$env\\s*$O\\1$C[\\s^\n]*"; } sub make_begin_end_env_rx { local($env) = @_; $env = &escape_rx_chars($env); "[\\\\](begin|end)\\s*$O(\\d+)$C\\s*$env\\s*$O\\2$C"; } sub make_end_cmd_rx { local($br_id) = @_; "$O$br_id$C"; } sub make_new_cmd_rx { "[\\\\](". join("|", keys %new_command) . ")" if %new_command; } sub make_new_env_rx { local($where) = @_; $where = &escape_rx_chars($where); "[\\\\]$where\\s*$O(\\d+)$C\\s*(". join("|", keys %new_environment) . ")\\s*$O\\1$C\\s*" if %new_environment; } sub make_sections_rx { local($section_alts) = &get_current_sections; # $section_alts includes the *-forms of sectioning commands $sections_no_delim_rx = "\\\\($section_alts)"; $sections_rx = "\\\\($section_alts)$delimiter_rx" } sub make_order_sensitive_rx { local(@theorem_alts, $theorem_alts); @theorem_alts = ($preamble =~ /\\newtheorem\s*{([^\s}]+)}/og); $theorem_alts = join('|',@theorem_alts); $order_sensitive_rx = "(equation|eqnarray|caption|ref|counter|\\\\the|\\\\stepcounter" . "|\\\\arabic|\\\\roman|\\\\Roman|\\\\alph|\\\\Alph|\\\\fnsymbol)"; $order_sensitive_rx =~ s/\)/|$theorem_alts)/ if $theorem_alts; } sub make_language_rx { local($language_alts) = join("|", keys %language_translations); $setlanguage_rx = "\\\\setlanguage{\\\\($language_alts)}"; $language_rx = "\\\\($language_alts)TeX"; } sub make_raw_arg_cmd_rx { # $1 : commands to be processed in latex (with arguments untouched) $raw_arg_cmd_rx = "\\\\(" . &get_raw_arg_cmds . ")([$delimiters]+|\\\\|#|\$)"; } # Creates an anchor for its argument and saves the information in # the array %index; # In the index the word will use the beginning of the title of # the current section (instead of the usual pagenumber). # The argument to the \index command is IGNORED (as in latex) sub make_index_entry { local($br_id,$str) = @_; # If TITLE is not yet available (i.e the \index command is in the title of the # current section), use $before. $TITLE = $before unless $TITLE; # Save the reference $words = (&get_first_words($TITLE, 4) || 'no title'); $index{$str} .= &make_href("$CURRENT_FILE#$br_id",$words) . ", "; "$anchor_invisible_mark<\/A>"; } sub image_message { print <<_EOM_ To resolve the image conversion problems please consult the "Troubleshooting" section of your local User Manual or follow the links to it at http://cbl.leeds.ac.uk/nikos/tex2html/doc/latex2html/latex2html.html _EOM_ } ############################################################### # These next few lines are legal in both Perl and nroff. .00; # finish .ig 'di \" finish diversion--previous line must be blank .nr nl 0-1 \" fake up transition to first page again .nr % 0 \" start at page 1 '; __END__ ##### From here on it's a standard manual page - VERSION ##### .TH LaTeX2HTML 1 .AT 3 .SH NAME latex2html \- translate LaTeX files to HTML (HyperText Markup Language) .SH SYNOPSIS .B latex2html [-split num] [-link num] [-nolatex] [-external_images] [-ascii_mode] [-t top_page_title] [-dir output_directory] [-address author_address] [-no_navigation] [-top_navigation] [-bottom_navigation] [-auto_navigation] [-index_in_navigation] [-contents_in_navigation] [-next_page_in_navigation] [-previous_page_in_navigation] [-info string] [-dont_include file(s)] [-reuse] [-show_section_numbers] [-init_file Perl file] [-h ] file(s) .SH DESCRIPTION .I LaTeX2HTML is a Perl program that translates LaTeX source files into HTML. For each source file given as an argument the translator will create a directory containing the corresponding HTML files. See the WWW online documentation or the /doc/manual.ps file for more detailed information and examples. .SH PROBLEMS For information on various problems and remedies see the WWW online documentation or the documents available in the distribution. An online bug reporting form and various archives are available at http://cbl.leeds.ac.uk/nikos/tex2html/doc/latex2html/latex2html.html .SH AUTHOR Nikos Drakos, Computer Based Learning Unit, University of Leeds . Several people have contributed suggestions, ideas, solutions, support and encouragement. The pstogif script uses the pstoppm.ps postscript program originally written by Phillip Conrad (Perfect Byte, Inc.) and modified by L. Peter Deutsch (Aladdin Enterprises).