#!/usr/local/bin/perl

#
#	Copyright (c) 2001,2002 Edwin Mons <info.to.html@edwinm.ik.nu>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#

use strict;

use File::Basename;
use File::Copy;
use Compress::Zlib;
use Getopt::Std;
use Sys::Hostname;
use POSIX qw(isalnum);


my $DATADIR = "/usr/local/share/info_to_html";
my $INFOPATH = ".:/usr/share/info:/usr/local/info";
my $OUTDIR  = ".";
my $STYLESHEET = "$DATADIR/default.css";

my $ME = "info_to_html";
my $URL = "http://www.mons.net/info_to_html";
my $VERSION = "0.9.6";

my %opts = (
	"d"	=>	$OUTDIR,								# top-level directory
	"i" =>  $INFOPATH,		          # infodir
	"s"	=>	$STYLESHEET,						# stylesheet to use
	"S"	=>	0,											# include stylesheet in file?
	"C"	=>	0,											# copy stylesheet?
	"v" =>  0,											# verbose?
);

my $host;

sub usage()
{
	print <<"EOF";
Usage: $ME [-CSv] [-i infodir] [-d outputdir] [-s cssfile] info

  -i infopath   Path to use as info-root 
                  (Default: $INFOPATH)
  -d outputdir  Directory to use as output-root 
                  (Default: $OUTDIR)
  -s cssfile    CSS file to use for the HTML
                  (Default: $STYLESHEET)
  -C            If set, the stylesheet will be copied to the output directory
  -S            If set, the stylesheet will be included in all files
  -v            Verbose

The infofile will be split by node and dumped in a subdirectory of outputdir,
e.g. if the infofile was named gcc, the output would go to outputdir/gcc.

EOF
	exit 1;
}


sub WARN
{
	print STDERR "WARNING: ", @_, "\n"; 
}


sub LOG
{
	print @_, "\n" if $opts{v};
}


sub html($)
{
	my ($t) = @_;

	$t =~ s/&/\&amp;/g;
	$t =~ s/</\&lt;/g;
	$t =~ s/>/\&gt;/g;

	return $t;
}


sub unhtml($)
{
	my ($t) = @_;

	$t =~ s/&lt;/</g;
	$t =~ s/&gt;/>/g;
	$t =~ s/&amp;/\&/g;

	return $t;
}


sub check_dir($)
{
	my ($dir) = @_;

	if (! -d $dir)
	{
		LOG "Creating $dir";
		mkdir($dir, 0777) || die "Cannot create directory $dir";
	}
}


sub chompstart($)
{
  my ($var) = @_;

  $var =~ s/^[  ]+//;

  return $var;
}


sub readfile($$)
{
	my ($file, $buffer) = @_;

	my $b;
	open(FILE, $file) || die "Cannot open $file";
	binmode(FILE);
	while (read(FILE, $b, 131072) > 0)
	{
		$$buffer .= $b;
	}
	close(FILE);

	return $buffer;
}


sub getinfofile($$)
{
	my ($infofile, $buffer) = @_;
	my @consider = (
		[ "%s", "plain" ],
		[ "%s.info", "plain" ],
		[ "%s.info.gz", "gzip" ],
	);
	my ($file, $it, $base, $type);
	my $done = 0;

CONSIDERATION:
	foreach $base (split(':', $opts{i}))
	{
		foreach $it (@consider)
		{
			$file = sprintf($base . "/" . $it->[0], $infofile);
			$type = $it->[1];
			LOG "Considering file $file";
			if ( -f $file )
			{
				LOG "$file exists.  Using it.  The type is $type";
				$done = 1;
				last CONSIDERATION;
			}
		}
	}

	if (!$done)
	{
		print "Cannot find an infofile for $infofile\n";
		exit 2;
	}

	$$buffer = "";
	if ( $type eq "gzip" )
	{
		my ($b, $gz);

		$gz = gzopen($file, "rb") || die "Cannot open $file: $gzerrno\n";
		while ($gz->gzread($b, 131072) > 0)
		{
			$$buffer .= $b;
		}
		if ($gzerrno != Z_STREAM_END)
		{
			die "Error reading from $file: $gzerrno\n";
		}
		$gz->gzclose();
	}
	else
	{
		readfile($file, $buffer);
	}

	return $buffer;
}


sub convert_nodename($)
{
	my ($node) = @_;

	$node =~ s/[^A-Za-z0-9+]/_/g;

	return $node;
}


sub convert_nodename_long($$)
{
	my ($infoname, $node) = @_;
	my ($dir) = "";

	if ($node =~ m!(DIR)!i)
	{
		return "../";
	}
	if ($node =~ m!^\(([^)]+)\)!)
	{
		($infoname) = split(/\./, $1);
		$node = substr($node, length($1) + 2);
		$dir = "../$infoname/";
	}

	return $dir . "$infoname.info." . convert_nodename($node) . ".html";
}


sub condense_nodedata($)
{
	my ($ndref) = @_;
	my @ret = ();
	my $lasttype = "";
	my $node;
	
	foreach $node (@$ndref)
	{
#		if ($node->{'type'} eq $lasttype && $lasttype eq "para")
#		{
#			$ret[scalar(@ret) - 1]->{'data'} .= "\n" . $node->{'data'};
#		}
#		elsif ($node->{'type'} eq $lasttype && $lasttype eq "empty")
#		{
#		}
#		elsif ($node->{'type'} eq 'para' && ($lasttype eq 'menu' || $lasttype eq 'index') && ($node->{'data'} =~ m/^\s+/))
		if ($node->{'type'} eq 'para' && ($lasttype eq 'menu' || $lasttype eq 'index') && ($node->{'data'} =~ m/^\s+/))
		{
			$ret[scalar(@ret) - 1]->{'data'}->{'desc'} .= "\n" . $node->{'data'};
		}
		elsif ($node->{'type'} eq 'empty' && ($lasttype eq 'menu' || $lasttype eq 'index'))
		{
			push(@ret, {
				'type' => $lasttype
			});
		}
		else
		{
			push(@ret, $node);
			$lasttype = $node->{'type'};
		}
	}

	return @ret;
}


sub count_nonword($)
{
		my ($s) = @_;
		my $it;
		my $wcnt = 0;
		my $l = length($s) - 1;

		foreach $it (0..$l)
		{
			$wcnt++ if (!isalnum(substr($s, $it, 1)));
		}

		return $wcnt;
}


sub convert_para($$)
{
	my ($item, $infoname) = @_;
  my $html = "";
	my ($line, $last, $wcnt, $it, $s);

	# First, check for special para types.
	my @lines = split("\n", $item->{'data'});
	
	# Check if it's a title...
	if (@lines == 2 && 
			length($lines[0]) == length($lines[1]) &&
			($lines[1] =~ m/^[\*,=\-\.]+$/))
	{
		$html .= "<h1>$lines[0]</h1>\n";
	}
	else
	{
		$wcnt = 0;
		$wcnt = count_nonword($item->{'data'});

		# if the ratio is less than 0.4, treat it as preformatted text
		if (($wcnt / length($item->{'data'})) < 0.6)
		{
			$html .= "<pre>" . html($item->{'data'}) . "</pre>\n";
		}
		else
		{
			my $last = 0;
			my $first = -1;
			$wcnt = 0;
			foreach $line (@lines)
			{
				if ($line =~ m/^([\s`]+)/)
				{
					if (length($1) > $last)
					{
						$last = length($1);
						$wcnt++;
					}
					$first = length($1) if ($first < 0);
				}
				last if ($wcnt > 1);
			}
			$item->{'data'} = html($item->{'data'});
			$item->{'data'} =~ s/^(\s+)(\*[Nn]ote)/"$1" . ($2 eq "*Note"? "See ": "see ") . "$2"/sge;
			$item->{'data'} =~ s/\*[Nn]ote(\s+)([^:]+)::[,.]/"<a href='" . convert_nodename_long($infoname, unhtml($2)) . "'>" . chompstart("$1$2") . "<\/a>"/sge;
			$item->{'data'} =~ s/\*[Nn]ote(\s+)([^:]+): (\([^)]+\))?([^.,]+)[.,]/"<a href='" . convert_nodename_long($infoname, unhtml((defined($3)?$3:"")."$4")) . "'>" . chompstart("$1$2: " . (defined($3)?$3:"") . "$4") . "<\/a>"/sge;
			$item->{'data'} =~ s!(\s)_([^ ]*)_([.,\s])!$1<u>$2</u>$3!sg;

			if ($wcnt > 1)
			{
				$item->{'data'} =~ s/\n/<br>\n/gs;
				$item->{'data'} =~ s/^(\s+)/"<span style='font-family: courier; whitespace: pre;'>" . " " x (length($1) - $first) . "<\/span>"/egm;
			}
			if ($last == 0)
			{
				$html .= "<p>";
			}
			else
			{
				$html .= "<p style='padding-left: " . (8*($first)) . "px;'>";
			}
			$html .= $item->{'data'} . "</p>\n";
		}
	}

	return $html;
}


sub node_to_html($$)
{
	my ($nodedata, $nodeinfo) = @_;
	my $lasttype = "";
	my $item;
	my $html = "";
	my ($nodename, $infoname);
	my $wcnt;
	my $it;
	my $s;
	my $i = 0;
	my $line;

	($infoname) = split(/\./, $nodeinfo->{'File'});
	$nodename = convert_nodename($nodeinfo->{'Node'});

	$html .= "<html>\n\n<head>\n  <title>$infoname.info: $nodeinfo->{'Node'}</title>\n";
	if (-e $opts{s})
	{
		if ($opts{S})
		{
			$html .= "	<style type='text/css'><!--\n";
			readfile($opts{s}, \$html);
			$html .= "	// --></style>\n";
		}
		elsif ($opts{C})
		{
			$html .= "  <link rel='stylesheet' href='" . basename($opts{s}) . "' type='text/css'>\n";
		}
		else
		{
			$html .= "  <link rel='stylesheet' href='$opts{s}' type='text/css'>\n";
		}
	}
	$html .= "</head>\n\n<body>\n<h1 class='title'>$infoname.info: $nodeinfo->{'Node'}</h1>\n";

	if (exists($nodeinfo->{'Next'}))
	{
		$html .= "<b class='next'>Go forward to <a href='" . convert_nodename_long($infoname, $nodeinfo->{'Next'}) . "'>$nodeinfo->{'Next'}</a></b><br>\n";
	}
	
	if (exists($nodeinfo->{'Prev'}))
	{
		$html .= "<b class='back'>Go backward to <a href='" . convert_nodename_long($infoname, $nodeinfo->{'Prev'}) . "'>$nodeinfo->{'Prev'}</a></b><br>\n";
	}
	
	if (exists($nodeinfo->{'Up'}))
	{
		$html .= "<b class='up'>Go up to <a href='" . convert_nodename_long($infoname, $nodeinfo->{'Up'}) . "'>$nodeinfo->{'Up'}</a></b><br>\n";
	}

	if ($nodeinfo->{'Node'} ne 'Top')
	{
		$html .= "<b class='top'>Go to the top op <a href='" . convert_nodename_long($infoname, "Top") . "'>$infoname</a></b><br>\n";
	}
	
	foreach $item (@$nodedata)
	{
		if ($item->{'type'} ne $lasttype)
		{
			if ($lasttype eq 'menu' || $lasttype eq 'index')
			{
				$html .= "</table>\n";
			}
		}

		if ($item->{'type'} eq "para")
		{
			$html .= convert_para($item, $infoname);
		}
		elsif ($item->{'type'} eq "menuhead")
		{
			$html .= "<h3 class='menu'>Menu</h3>\n";
		}
		elsif ($item->{'type'} eq "menu")
		{
			if ($lasttype ne $item->{'type'})
			{
				$html .= "<table border='0' class='" . $item->{'type'} . "'>\n";
			}
			if (!exists($item->{'data'}->{'node'}))
			{
				$html .= "<tr><td>&nbsp;</td><td>&nbsp;</td></tr>\n";
			}
			else
			{
				$html .= "<tr><td><a href='" . convert_nodename_long($infoname, $item->{'data'}->{'node'}) . "'>" . html($item->{'data'}->{'name'}) . "</a></td>";
				if (exists($item->{'data'}->{'desc'}))
				{
					$html .= "<td>" . html($item->{'data'}->{'desc'}) . "</td>";
				}
				$html .= "</tr>\n";
			}
		}
		elsif ($item->{'type'} eq "index")
		{
			if ($lasttype ne $item->{'type'})
			{
				$html .= "<table border='0' class='index'>\n";
			}
			if (!exists($item->{'data'}->{'node'}))
			{
				$html .= "<tr><td>&nbsp;</td><td>&nbsp;</td></tr>\n";
			}
			else
			{
				$html .= "<tr><td>" . html($item->{'data'}->{'desc'}) . "</td><td><a href='" . convert_nodename_long($infoname, $item->{'data'}->{'node'}) . "'>" . html($item->{'data'}->{'node'}) . "</a></td></tr>\n";
			}
		}

		$lasttype = $item->{'type'};
	}

	if ($lasttype eq 'menu' || $lasttype eq 'index')
	{
		$html .= "</table>\n";
	}
	$html .= "<span class='advert'>Created " . localtime() . " on $host with <a href='$URL'>$ME</a> version $VERSION.</span>\n</body>\n</html>\n";

	return $html;
}


sub write_html($$)
{
	my ($html, $nodeinfo) = @_;
	my $fname;
	my ($infoname, $nodename);

	($infoname) = split(/\./, $nodeinfo->{'File'});
	$nodename = convert_nodename($nodeinfo->{'Node'});
	$fname = "$opts{d}/$infoname/$infoname.info.$nodename.html";

	check_dir("$opts{d}/$infoname");

	print "Writing to $fname\n" if ($opts{'v'});
	open(FILE, ">$fname");
	print FILE $$html;
	close(FILE);
}


sub split_node($$$$)
{
	my ($inforef, $node, $nodedata, $nodeinfo) = @_;
	my ($nodename);
	my ($line, @lines);
	my ($item, $lasttype);

	# @nodedata will contain an array with the following data:
	# [ "type" => one of 'menu', 'menuhead', 'index', 'para' or 'empty',
	#   "data" => the data of that field.  In case of index or menu:
	#							[ "node" => link to the node,
	#								"desc" => Description
	#							]
	#	]

	# First, test if it's a node
	if (!($$node =~ m/^File:/))
	{
		# if it doesn't start with 'File:', it's not a node.  Discard it.
		print "Node doesn't start with File:\n" if ($opts{'v'});
		return 0;
	}

	@lines = split("\n", $$node);
	$_ = shift @lines;
	foreach $item (split(/,\s+/))
	{
		my ($key, $val) = split(/:\s+/, $item);
		$nodeinfo->{$key} = $val;
	}

	$lasttype = "";
	foreach $line (@lines)
	{
		$_ = $line;

		# Check for empty lines
		if (m/^\s*$/)
		{
			if ($lasttype ne 'empty')
			{
				push(@$nodedata, { "type" => "empty" });
				$lasttype = 'empty';
			}
		}
		# Check for Menu heads
		elsif (m/^\* Menu:$/)
		{
			push(@$nodedata, { "type" => "menuhead" });
			$lasttype = 'menuhead';
		}
		# Check for menu items.  A menu cannot follow directly on an indexitem
		elsif (@$nodedata > 0 && ($nodedata->[@$nodedata - 1]->{'type'} ne 'index') && 
						m/^\* ([^:]*)::(\s*(.*))?$/)
		{
			push(@$nodedata, { 
				"type" => "menu",
				"data" => {
										"node" => "$1",
										"name" => "$1",
										"desc" => "$3",
									}
			});
			$lasttype = 'menu';
		}
		# this basically supercedes index type
		elsif (m/^\* (.*):\s*([^\.,]*)[\.,](\s+(.*))?$/)
		{
			push(@$nodedata, { 
				"type" => "menu",
				"data" => {
										"node" => "$2",
										"name" => "$1",
									}
			});
			if (defined($4))
			{
				$nodedata->[@$nodedata - 1]->{'data'}->{'desc'} = $4;
			}
			$lasttype = 'menu';
		}
		# Check for index items
		elsif (m/^\* (.*):\s*(.*)[\.,]$/)
		{
			push(@$nodedata, { 
				"type" => "index",
				"data" => {
										"node" => "$2",
										"desc" => "$1",
									}
			});
			$lasttype = 'index';
		}
		else
		{
			if ($lasttype eq 'para')
			{
				$nodedata->[@$nodedata - 1]->{'data'} .= "\n" . $_;
			}
			else
			{
				push(@$nodedata, {
					"type" => "para",
					"data" => $_,
				});
				$lasttype = 'para';
			}
		}
	}

	return 1;
}


sub info_to_html($$)
{
	my ($infoname, $inforef) = @_;
	my ($node);

	my @firstnodes = split("\037\n?", $$inforef);
	my @nodes;
	my $temp;
	my $indirect;

	# First, find indirects
	foreach $node (@firstnodes)
	{
		if ($node =~ m/^Indirect:/)
		{
			print "Found indirect node table\n" if ($opts{'v'});
			foreach $indirect (split("\n", $node))
			{
				my ($file, $pos);
				if (($file, $pos) = $indirect =~ m/^([^:]*):\s+(\d+)$/)
				{
					print "Reading indirect `$file' ($pos)\n" if ($opts{'v'});
					if (getinfofile($file, \$temp) <= 0)
					{
						print "Something went wrong while reading the infofile...\n";
					}
					my @nextnodes = split("\037\n?", $temp);
					push(@nodes, @nextnodes);
				}
			}
		}
		else
		{
			push(@nodes, $node);
		}
	}

	if ($opts{C})
	{
		LOG "Copying `$opts{s}' to $opts{d}/$infoname";
		check_dir("$opts{d}/$infoname");
		copy($opts{s}, "$opts{d}/$infoname");
	}

	foreach $node (@nodes)
	{
		my (@nodedata, %nodeinfo);
		my ($html);

		@nodedata = ();
		%nodeinfo = ();

		next if (!split_node($inforef, \$node, \@nodedata, \%nodeinfo));

		# We scanned the node, now we convert it to HTML
		@nodedata = condense_nodedata(\@nodedata);
		$html = node_to_html(\@nodedata, \%nodeinfo);
		write_html(\$html, \%nodeinfo);
	}
}


sub get_hostname()
{
	$host = hostname();
}


sub main()
{
	get_hostname();

	getopts('vi:d:s:SC?', \%opts);

	if (@ARGV < 1 || $opts{'h'} || $opts{'?'})
	{
		usage();
	}

	if ( ! -d $opts{d} )
	{
		die "The directory-root ($opts{d}) does not exist.";
	}

	if ( ! -e $opts{s} )
	{
		if ( $opts{S} || $opts{C} )
		{
			die "`$opts{s}' doesn't exist";
		}
		else
		{
			WARN "`$opts{s}' doesn't exist (there will be no CSS definitions)";
		}
	}
	if ( $opts{s} =~ '^[^/]' && ! $opts{S} && ! $opts{C} )
	{
		WARN "You're using a relative path for the CSS file";
	}

	LOG "Using CSS file `$opts{s}'";

	my $infofile = $ARGV[0];

	my $info;

	if (getinfofile($infofile, \$info) <= 0)
	{
		die "Something went wrong while reading the infofile...\n";
	}

	info_to_html($infofile, \$info);
}


main();

# ex:set ts=2 sw=2 ai:
