#!/usr/bin/env perl

#    gladtex: Reads a 'htex' file (html with LaTeX maths embedded in <EQ></EQ>)
#             and produces html with equations substituted by images.
#    Project homepage at http://gladtex.sourceforge.net
#    Copyright (C) 1999-2010 Martin G. Gulbrandsen
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

# note: the utility 'eqn2img' should accompany this script,
#       and must be callable from this script.

use IPC::Open2;
use Cwd;
use Getopt::Std;
use Storable;
#require 'getopts.pl'; -- replaced by GetOpt::Std module
#require 'getcwd.pl'; -- replaced by Cwd module

$img_dir = "."; # default values
$dpi = 100;
$supersample = 4;
$format = "png";
$verbose = 0;
$foreground = "000000";
$background = "A0A0A0";
$transparency = 1;
$environment = "displaymath";
$blockMathClass = "displaymath";
$inlineMathClass = "inlinemath";

$preamble = "
\\usepackage{amsmath}
\\usepackage{amssymb}
";

$usage = 
"gladtex version 1.2, Copyright (C) 1999-2010 Martin G. Gulbrandsen

gladtex comes with ABSOLUTELY NO WARRANTY. This is free software,
and you are welcome to redistribute it under certain conditions;
see the file COPYING for details.

Project homepage at http://gladtex.sourceforge.net

Usage: gladtex [OPTION]... [FILE]...
Convert htex file (HTML with LaTeX equations) to html with images
  -v          print verbose information
  -f format   store images in 'format' (png by default)
  -r dpi      set resolution (size of images) to 'dpi' ($dpi by default)
  -s n        set oversampling factor for antialiasing to 'n' ($supersample by default)
  -d path     store image files in 'path' (current directory is default)
  -u url      url to image files above (relative links are default)
  -p string   add 'string' to LaTeX preamble (e.g. \\usepackage{...})
  -e env      embed LaTeX code in \\begin{env}..\\end{env} ($environment by default)
  -c colour   set foreground RGB colour ($foreground by default)
  -b colour   set background RGB colour ($background by default)
  -l class    CSS class to assign to block-level math (default: 'displaymath')
  -i class    CSS class to assign to inline math (default: 'inlinemath')
  -t          turn transparency OFF

Output files:
  *.html      copy of input FILE(s), with <EQ>..</EQ> tags substituted
              filename is same as input file, with the extension replaced
  eqn???.png  equation images (png extension is example only, see -f option)

Input files should not have .html extension, .htex is recommended.
";

$| = 1; # flush output after every print, causes better feedback with -v flag
$img_name = "eqn000";


# --- sub: rel_name ---
# usage: rel_name $src, $dest;
# $src and $dest should be absolute paths
# returns: relative path to $dest, as seen from $src
sub rel_name {
    my @src = split /\//, shift;
    my @dest = split /\//, shift;
    my $path;
    my $i = 0;

    # let $i = first level where $src and $dest doesn't match
    for($i=0; $i <= $#src and $i <= $#dest and $src[$i] eq $dest[$i]; $i++) {};
    
    $path = "../" x ($#src - $i + 1);

    for(; defined $dest[$i]; $i++) {
	$path .= "$dest[$i]/";
    }

    return $path;
}

# --- sub: full_name ---
# usage: full_name $src, $dest
# $src should be some absolute path
# $dest may be relative (as seen from $src) or absolute
# returns absolute path to $dest, as seen from $src (without trailing /)
sub full_name {
    my $src = shift;
    my $dest = shift;

    # add trailing / if not present
    $dest .= "/" unless $dest =~ /\/$/;
    # if not absolute path, add $src
    $dest = $src . "/$dest"  unless $dest =~ /^\//;

    # remove ./
    $dest =~ s/\.\///g;
    # remove //
    while($dest =~ s{//}{/}g) {};
    # remove ../
    while($dest =~ s{/([^/]*/)\.\./}{$1}g) {};
    # remove trailing /
    $dest =~ s/\/$//;

    return $dest;
}

# --- Parse command line options ---

getopts('f:r:s:d:u:vtc:b:p:e:l:i:');

if($#ARGV < 0) {
    print $usage;
    print `eqn2img -f?`; # this prints list of supported formats
    exit;
}

$img_dir = $opt_d if defined $opt_d;
$dpi = $opt_r if defined $opt_r;
$supersample = $opt_s if defined $opt_s;
$format = $opt_f if defined $opt_f;
$verbose = $opt_v if defined $opt_v;
$transparency = 0 if defined $opt_t;
$preamble .= "$opt_p\n" if defined $opt_p;
$foreground = $opt_c if defined $opt_c;
$background = $opt_b if defined $opt_b;
$environment = $opt_e if defined $opt_e;
$blockMathClass = $opt_l if defined $opt_l;
$inlineMathClass = $opt_i if defined $opt_i;
if(defined $opt_u) {
    $url = $opt_u;
    $url .= "/" unless $url =~ /\/$/;
    if(!defined $opt_d) {
	print "\nWarning: -u option present, but no -d\n\n";
    }
}
$opt_t = 0; # just do something with $opt_t to avoid 'possible typo' warning

# todo: add validization of options

if($opt_d and !defined $url) {
    $img_dir = full_name(getcwd(), $img_dir);
}

# read in cached history
if($opt_d and -r "$img_dir/gladtex.cache") {
    print "Retrieving cache\n" if $verbose;
    %history = %{retrieve("$img_dir/gladtex.cache")};
}

# --- Process input files ---

print "Processing ", $#ARGV + 1, " files\n" if $verbose;

$startup_cwd = getcwd();

foreach $file (@ARGV) {
    ($directory, $basename, $extension) = $file =~ /(.*?)\/*([^\/]*?)\.([^\/]*)$/;
    $directory or $directory = ".";
    $basename or $basename = "noname";
    $extension or $extension = "htex";
    $extension eq "html" and die "Don't use .html extension, .htex is recommended.";

    $full_dir = full_name($startup_cwd, $directory);
    if(getcwd() ne $full_dir) {
	if(!$opt_d and defined %history) {
	    print "Storing cache\n" if $verbose;
	    store(\%history, "gladtex.cache");
	    undef %history;
	}

	chdir $full_dir;
	$img_name = "eqn000" if !$opt_d;
    }
    if(!$opt_d and !defined %history) {
	if(-r "gladtex.cache") {
	    print "Retrieving cache\n" if $verbose;
	    %history = %{retrieve("gladtex.cache")};
	}
#	    else {
#		%history = ();
#	    }
    }

    open(INPUT, "$basename.$extension") or die "Cannot open $file";
    open(OUTPUT, ">$basename.html") or die "Cannot open $basename.html";

    print "\n$file -> $basename.html\n" if $verbose;

    for($start_line = 1; not eof INPUT; $start_line++) {
	$line = <INPUT>;

	# search for <eq> tag (the s option is needed to avoid loosing linebreak at end of line)
	while($line =~ /(.*?)<eq(.*?)>(.*)/is) {
	    print OUTPUT $1; # everything before <eq> tag
	    $options = $2;   # anything between '<eq' and '>'
	    $line = $3;      # the rest

	    $this_preamble = $preamble;
	    $this_foreground = $foreground;
	    $this_background = $background;
	    $this_environment = $environment;
            $this_dpi = $dpi;
            $this_css_class = $inlineMathClass;

	    # scan options within <eq> tag
#	    while($options =~ /\s*?(\S*?)=(\S*)/g) { # should whitespace be allowed around equal sign?
	    while($options =~ /\s*?(\S*?)=\s*(\"(.*?)\"|\'(.*?)\'|(\S*))/g) { # should whitespace be allowed around equal sign?
		$key = $1;

		# only one of these will be defined
		$value = $3 if defined $3; # "value"
		$value = $4 if defined $4; # 'value'
		$value = $5 if defined $5; # value (no quotation marks)
		# is there a better way to scan for key/value pairs?

		foreach($key) { # may add more options here when needed..
		    /^preamble/i and $this_preamble .= "$value\n";
		    /^color/i and $this_foreground = $value;
		    /^bgcolor/i and $this_background = $value;
		    /^env/i and $this_environment = $value;
		    /^dpi/i and $this_dpi = $value;
		}
	    }

	    $equation = "";
	    $end_line = $start_line;

            if ($this_environment eq "displaymath") {
                $this_css_class = $blockMathClass;
            } elsif ($this_environment eq "math") {
                $this_css_class = $inlineMathClass;
            }

	    # read equation until </eq> is found
	    while(not (($before, $after) = ($line =~ /(.*?)<\/eq>(.*)/is)) ) {
		$equation .= "$line\n";
		if(eof INPUT) {
		    print STDERR "Closing tag </eq> not found in equation started at line $start_line\n";
		    # todo: cleanup
		    exit 1;
		}
		$line = <INPUT>;
		$end_line++;
	    }
	    $equation .= $before; # everything before </eq>
	    $line = $after;       # everything after </eq>

	    # strip whitespace: this makes 'history' stronger and removes linebreak
	    # trouble (a paragraph can't end within $$..$$ in latex)
	    $equation =~ s/\s+/ /g;

            # Resolve entities in the equation, since the document
            # ought to be well-formed HTML.
            $equation =~ s/&gt;/>/g;
            $equation =~ s/&lt;/</g;

	    print "Processing equation at line(s) $start_line to $end_line:\n" if $verbose;

	    if($opt_u) {
		$img_src = $url;
	    }
	    else {
		if($opt_d) {
		    $img_src = rel_name(getcwd(), $img_dir);
		}
		else {
		    $img_src = "";
		}
	    }

	    $eqn2img_opt = "-e $this_environment -r $this_dpi -p '$this_preamble' -c $this_foreground -b $this_background -f $format -s $supersample " . ($transparency ? "" : "-t ") . ($verbose ? "-v " : "");

	    # --- process the latex code in $equation ---
	    # recycle image if the same equation has appeared before with the
	    # same options (colors etc.)
	    if(defined $history{$equation} and $history{$equation}->{"opt"} eq $eqn2img_opt) {
		print "Reusing image\n" if $verbose;
	    }
	    else {
		while(-e "$img_dir/$img_name.$format") { $img_name++ }; # never overwrite an image
		print "$img_dir/$img_name: " if $verbose;

		$pid = open2(\*eqn2img_out, \*eqn2img_in, "eqn2img $eqn2img_opt -o '$img_dir/$img_name.$format'");
		print eqn2img_in $equation;
		close eqn2img_in;
		$dimensions = <eqn2img_out>;
		waitpid $pid, 0; # close seems not to set $? when using open2, why is that?
		if($?) {
		    print STDERR "Error processing equation starting at line $start_line:\n", $equation,"\n";
                    print STDERR "See ${img_dir}/${img_name}.log for details\n";
		    exit 1;
		}
		close eqn2img_out;

		print ", done.\n" if $verbose;
		$history{$equation}->{"opt"} = $eqn2img_opt;
		$history{$equation}->{"img"} = "$img_name.$format";
		$history{$equation}->{"dim"} = $dimensions;
	    }
	    print OUTPUT "<IMG CLASS=\"".$this_css_class."\" SRC=\"".$img_src.$history{$equation}->{"img"}."\" "
		         .$history{$equation}->{"dim"}.">";
	    
	    $start_line = $end_line;
	}

	print OUTPUT "$line";
    }
    close(INPUT);
    close(OUTPUT);
}

# write history cache
if($opt_d) {
    print "Storing cache\n" if $verbose;
    store(\%history , "$img_dir/gladtex.cache");
}
else {
    print "Storing cache\n" if $verbose;
    store(\%history, "gladtex.cache");
}

