#!/usr/local/bin/perl -w
#
# Copyright (c) 2008, 2009 Mikolaj Golub
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# $Id$
#

#
# Modules
#

use strict;
use Getopt::Std;
use Time::Local qw(timelocal);
use POSIX qw(strftime);
use Fcntl qw(:flock);

#
# Global variables with some defaults
#

# configuration file
my $confile = '/usr/local/etc/gather/gather.cfg';

# verbose mode
my $verbose = 0;

# map file
my $mapfile = '/usr/local/etc/gather/gather.map';

# directory where collected data will be stored
my $datadir =  '/var/db/gather';

# how old data in days are considered to be expired
my $expiration = 30;

# if compression is used
my $compression = 0;

# compress filter.
my $compress = 'gzip -c';

# decompression filter
my $uncompress = 'zcat';

# suffix that will be added to compressed files
my $compsuffix = '.gz';

# run commands in parallel
my $concurrent = 0;

# filter that will be used before storing (and compressing if it is turned on) sysutils output
my $filter = '';

# time granularity: hour, min, sec
my $granularity = 'min';

# time multipliers
my %multipliers = ( 'sec' => 1, 'min' => 60, 'hour' => 3600);

# dir formats
my %dirs = ('sec' => '%F/%H/%M/%S', 'min' => '%F/%H/%M', 'hour' => '%F/%H');

# period to diplay collected data
my $period = '';

# time format used when outputting data
my $timefmt = ($granularity eq 'sec') ? '%Y-%m-%d/%H/%M/%S:' : '%Y-%m-%d/%H/%M:';

# lockfile to prevent run of next gather when previous is still running
# (empty string means we don't want to use locking)
my $lockfile = '';

# Gnuplot settings
my %gnuplot = (
    'cmd'     => 'gnuplot -p',
    'term'    => 'x11',
    'yrange'  => ':',
    'style'   => 'linespoints',
    'options' => '',
);

# map
my %map;

# command to run
my $cmd;

#
# Functions
#

#
# print version and usage info

my $VERSION = 'gather 0.4.1 2012 May 26';
my $progname = $0;

$Getopt::Std::STANDARD_HELP_VERSION = 1;

sub VERSION_MESSAGE()
{
    print "version: $VERSION\n";
}

sub HELP_MESSAGE()
{
    print "\n";
    print "Usage: $progname [options] <command> [args]\n";
    print "\n";
    print "  Collect and display system statistics\n";
    print "\n";
    print "Options:\n";
    print "\n";
    print "  -h             display this help and exit\n";
    print "  -V             display version info and exit\n";
    print "  -v             verbose mode\n";
    print "  -C <config>    path to configuration file\n";
    print "                 (default is \`$confile')\n";
    print "  -d <datadir>   path to directory with statistics\n";
    print "                 (default is \`$datadir')\n";
    print "  -c             compression is used\n";
    print "  -g             time granularity (hour, min, sec)\n";
    print "\n";
    print "Commands:\n";
    print "\n";
    print "  collect [otions]      - collect statistics\n";
    print "\n";
    print "    -l <lockfile>  use lockfile to prevent running next gather\n";
    print "                   when previous one is still running\n";
    print "    -c <1|0>       run programs in parallel (concurrently) or not (default is $concurrent)\n";
    print "\n";
    print "  script [options]      - generate sh script that can be used instead of \`gather collect'\n";
    print "                          (options are the same as for collect command)\n";
    print "\n";
    print "  show [otions] <what>  - display statistics\n";
    print "\n";
    print "    -t <period>    show results for this timeperiod (default is \`$period')\n";
    print "                   examples: 2008-08-01, 2008-08-02/23, 2008-08-03/03/04--2008-08-03/20\n";
    print "    -f <timefmt>   display time in given strftime(3) format\n";
    print "                   (default is \`$timefmt')\n";
    print "\n";
    print "  archive [opts] <name> - archive data to archive\n";
    print "\n";
    print "    -t <period>    archive data for this timeperiod (default is \`${expiration}d')\n";
    print "\n";
    print "  expire [<days>]       - remove data older then specified days\n";
    print "                          (default is $expiration days)\n";
    print "\n";
    print "Run \`$progname show help' to read minihelp about show command\n";
    print "\n";
}

#
# eval perl from file

sub eval_file($)
{

    my $file = shift;
    open my $fh, $file
	or die "Can't open file $file: $!";
    local $/;
    eval <$fh>;
    die "Can't parse $file" if $@;
    close $fh
	or die "Can't close file $file: $!";
}

#
# parse command line options

sub parse_options()
{

    my %opts;
    if (getopts('hVvC:d:cg:', \%opts)) {

	if (defined $opts{'h'}) {
	    VERSION_MESSAGE();
	    HELP_MESSAGE();
	    exit 0;
	}
	if (defined $opts{'V'}) {
	    VERSION_MESSAGE();
	    exit 0;
	}

	$confile = $opts{'C'} if defined $opts{'C'};

	eval_file($confile) if $confile;

	$verbose     = 1          if defined $opts{'v'};
	$compression = 1          if defined $opts{'c'};
	$datadir     = $opts{'d'} if defined $opts{'d'};
	$granularity = $opts{'g'} if defined $opts{'g'};

    } else {
	select(STDERR);
	HELP_MESSAGE();
	exit 1;
    }

    if (!defined $ARGV[0]) {
	select(STDERR);
	HELP_MESSAGE();
	exit 1;
    }

    $cmd = shift @ARGV;
}

#
# Sanity check of values of some critical variables

sub check_vars()
{
    $datadir ne '' or die "Path to directory with statistics is not specified.";

    defined $multipliers{$granularity} or die "wrong granularity: ${granularity};" .
	" possible values: " . join(', ', keys %multipliers);
}

#
# Get lock (check if previous gather is still running)

sub get_lock()
{
    return unless $lockfile ne '';

    print STDERR "getting lock\n" if $verbose;

    open(LOCKFH, '+>', $lockfile)
	or die "Can't open lockfile $lockfile: $!";
    flock(LOCKFH, LOCK_EX | LOCK_NB)
	or die "Can't lock file \`$lockfile': $!";

    $SIG{INT} = \&release_lock;
}

#
# Release lock

sub release_lock()
{
    return unless $lockfile ne '';

    print STDERR "releasing lock\n" if $verbose;

    close(LOCKFH)
	or die "Can't close lockfile $lockfile: $!";
    unlink($lockfile)
	or die "Can't unlink lockfile $lockfile: $!";
}

#
# collect command

sub collect()
{
    my %opts;

    if (getopts('l:c:', \%opts)) {
	$lockfile   = $opts{'l'} if defined $opts{'l'};
	$concurrent = $opts{'c'} if defined $opts{'c'};
    } else {
	select(STDERR);
	HELP_MESSAGE();
	exit 1;
    }

    get_lock();

    my $statdir =  "${datadir}/" . strftime($dirs{$granularity}, localtime(time));

    if (! -d $statdir) {
	system("mkdir -p '$statdir'") == 0
	    or die "Can't create directory $statdir for statistics: $!";
    }

    for my $name (keys %map) {
	my $command = "$map{$name}{'cmd'}";
	$command .= " | $filter"   if $filter;
	$command .= " | $compress" if $compression;
	$command .= " > $statdir/$name";
	$command .= "$compsuffix"  if $compression;

	print STDERR "run: $command\n" if $verbose;

	if ($concurrent) { # we run the programms in parallel
	    my $pid = fork();
	    if ($pid < 0) {
		print STDERR "fork failed\n" ;
	    } elsif  ($pid == 0) {
		exec("$command")
		    or die "Can't run \`$command': $!";
	    }
	} else {
	    system("$command") == 0
		or die "Can't run \`$command': $!";
	}
    }

    if ($concurrent) { # wait for all parallel commands to terminate
	while ((my $pid = wait) > 0) {
	    print STDERR "child process $pid exited\n" if $verbose;
	}
    }

    release_lock();
}

#
# script command

sub script()
{
    my %opts;

    if (getopts('l:c:', \%opts)) {
	$lockfile   = $opts{'l'} if defined $opts{'l'};
	$concurrent = $opts{'c'} if defined $opts{'c'};
    } else {
	select(STDERR);
	HELP_MESSAGE();
	exit 1;
    }

    print "#!/bin/sh\n";
    print "#\n";
    print "# Script is generated by gather utility.\n";
    print "#\n";
    print "\n";
    if ($lockfile) {
	print "lockf -t 0 -k '$lockfile' true || exit 1\n";
	print "\n";
	print "trap \"rm -f '$lockfile'\" INT QUIT EXIT\n";
	print "\n";
    }
    print "statdir='$datadir/'\$(date +%F/%H/%M)\n";
    print "test -d \"\$statdir\" || mkdir -p \"\$statdir\"\n";
    print "\n";

    for my $name (keys %map) {
	my $command = "$map{$name}{'cmd'}";
	$command .= " | $filter"   if $filter;
	$command .= " | $compress" if $compression;
	$command .= " > \$statdir/$name";
	$command .= "$compsuffix"  if $compression;
	print "echo \"run: $command\" >&2\n" if $verbose;
	print "$command", ($concurrent ? ' &' : ''), "\n";
    }
    print "\nwait\n" if $concurrent;
    print "rm -f '$lockfile'\n" if $lockfile;
}

#
# return function that will generate date of the last collection.

sub make_lastdate_generator()
{
    my $made = 0;

    return sub {

	if ($made) {
	    return ();
	}

	$made = 1;

	my $year = '0000';
	my $mon  = '00';
	my $day  = '00';
	my $hour = '00';
	my $min  = '00';
	my $sec  = '00';

	opendir(my $dh, $datadir)
	    or die "can't opendir $datadir: $!";

	my $daydir = '0000-00-00';

	while ($_ = readdir($dh)) {
	    next if !/^(\d{4})-(\d{2})-(\d{2})$/;

	    if ($& gt $daydir and $2 le '12' and $3 le '31') {
		$daydir = $&;
		$year   = $1;
		$mon    = $2;
		$day    = $3;
	    }
	}

	closedir($dh);

	return () if $daydir eq '0000-00-00';

	opendir($dh, "$datadir/$daydir")
	    or die "can't opendir $datadir: $!";

	my $hourdir = '0';

	while ($_ = readdir($dh)) {
	    $hour = $hourdir = $& if /^\d{2}$/ and $& gt $hourdir;
	}

	closedir($dh);

	return () if $hourdir eq '0';

	if ($granularity eq 'hour') {
	    my $time = timelocal($sec, $min, $hour, $day, $mon - 1, $year - 1900);

	    return ('dir' => "$daydir/$hourdir",
		    'fmt' => strftime($timefmt, localtime($time)));
	}

	opendir($dh, "$datadir/$daydir/$hourdir")
	    or die "can't opendir $datadir: $!";

	my $mindir = '0';

	while ($_ = readdir($dh)) {
	    $min = $mindir = $& if /^\d{2}$/ and $& gt $mindir;
	}

	closedir($dh);

	return () if $mindir eq '0';

	if ($granularity eq 'min') {
	    my $time = timelocal($sec, $min, $hour, $day, $mon - 1, $year - 1900);

	    return ('dir' => "$daydir/$hourdir/$mindir",
		    'fmt' => strftime($timefmt, localtime($time)));
	}

	opendir($dh, "$datadir/$daydir/$hourdir/$mindir")
	    or die "can't opendir $datadir: $!";

	my $secdir = '0';

	while ($_ = readdir($dh)) {
	    $sec = $secdir = $& if /^\d{2}$/ and $& gt $secdir;
	}

	closedir($dh);

	return () if $secdir eq '0';

	my $time = timelocal($sec, $min, $hour, $day, $mon - 1, $year - 1900);

	return ('dir' => "$daydir/$hourdir/$mindir/$secdir",
		'fmt' => strftime($timefmt, localtime($time)));
    }
}

#
# return function that will generate date directories

sub make_date_generator($)
{
    my $pstr = shift;

    # parse periods

    # Special case: date of the last collection.
    return make_lastdate_generator() if $pstr eq '$' or $pstr eq '';

    # Start from specific cases, replacing shortcuts by general expression

    my ($sec, $min, $hour, $mday, $mon, $year ) = localtime();

    my %now = ('day'  => "@{[$year+1900]}-@{[$mon+1]}-${mday}",
	       'hour' => $hour,
	       'min'  => $min);

    my %mult = ('d' => 24 * 60 * 60, 'h' => 60 * 60, 'm' => 60);

    # Nd  range N days ago -- now
    # Nh  range N hours ago -- now
    # Nm  range N minutes ago -- now

    if ($pstr =~ /^(\d+)([dhm])$/) {
	my $N    = $1;
	my $unit = $2;
	my ($sec, $min, $hour, $mday, $mon, $year) = localtime(time() - $N * $mult{$unit});

	$pstr = "@{[$year+1900]}-@{[$mon+1]}-${mday}/${hour}/${min}--$now{'day'}/$now{'hour'}/$now{'min'}";
    } else {

	# Nd--${rest}
	# Nh--${rest}
	# Nm--${rest}

	if ($pstr =~ /^(\d+)([dhm])(--.+)$/) {
	    my $N    = $1;
	    my $unit = $2;
	    my $rest = $3;
	    my ($sec, $min, $hour, $mday, $mon, $year) = localtime(time() - $N * $mult{$unit});

	    $pstr = "@{[$year+1900]}-@{[$mon+1]}-${mday}/${hour}/${min}${rest}";
	}

	# ${beg}--Nd
	# ${beg}--Nh
	# ${beg}--Nm

	if ($pstr =~ /^(.+--)(\d+)([dhm])$/) {
	    my $beg  = $1;
	    my $N    = $2;
	    my $unit = $3;
	    my ($sec, $min, $hour, $mday, $mon, $year) = localtime(time() - $N * $mult{$unit});

	    $pstr = "$beg@{[$year+1900]}-@{[$mon+1]}-${mday}/${hour}/${min}";
	}

	# ././.  current day/current hour/current minute
	$pstr =~ s|^([^/]{1,10}/[^/-]{1,2}/)\.|${1}$now{'min'}|;
	$pstr =~ s|(--[^/]{1,10}/[^/-]{1,2}/)\.|${1}$now{'min'}|;

	# ./.    current day/current hour
	$pstr =~ s|^([^/]{1,10}/)\.|${1}$now{'hour'}|;
	$pstr =~ s|(--[^/]{1,10}/)\.|${1}$now{'hour'}|;

	# .      current day
	$pstr =~ s|\.|$now{'day'}|g;

    }

    # YYYY-MM-DD/HH/MM--/hh/mm (interpreted as YYYY-MM-DD/HH/MM--YYYY-MM-DD/hh/mm)
    # YYYY-MM-DD/HH/MM--//mm (interpreted as YYYY-MM-DD/HH/MM--YYYY-MM-DD/HH/mm)
    # YYYY-MM-DD/HH/MM--yyyy-mm-dd// (interpreted as YYYY-MM-DD/HH/MM--yyyy-mm-dd/HH/MM)
    # and so on.

    # some preparations
    $pstr =~ s|^([^/]+)--|$1/00/00--|;
    $pstr =~ s|^([^/]+)/([^/]+)--|$1/$2/00--|;

    if ($pstr =~ m|--([^/]*)(?:/([^/]*)(?:/([^/]*))?)?|) {

	my $day  = defined $1 ? $1 : '';
	my $hour = defined $2 ? $2 : '';
	my $min  = defined $3 ? $3 : '';

	$pstr =~ s|^([^/]+)(.*--)/|$1$2$1/|                   if $day  eq '';
	$pstr =~ s|^([^/]+)/([^/]+)(.*--[^/]+)//|$1/$2$3/$2/| if $hour eq '';
	$pstr =~ s|^(.+)/(.+)--(.+)/$|$1/$2--$3/$2|           if $min  eq '';
    }

    print STDERR "Using timeperiod: $pstr\n" if $verbose;

    my (%beg, %end);

    $pstr =~ s|^(\d{4})-(\d{1,2})-(\d{1,2})(?:/(\d{1,2})(?:/(\d{1,2}))?)?||;

    if (defined $3) {

	%beg = ('year' => $1,
		'mon'  => $2,
		'day'  => $3,
		'hour' => defined $4 ? $4 : '00',
		'min'  => defined $5 ? $5 : '00');

	if ($pstr eq '') {

	    %end = ('year' => $1,
		    'mon'  => $2,
		    'day'  => $3,
		    'hour' => defined $4 ? $4 : '23',
		    'min'  => defined $5 ? $5 : '59');

	} elsif ($pstr =~ m|^--(\d{4})-(\d{1,2})-(\d{1,2})(?:/(\d{1,2})(?:/(\d{1,2}))?)?$|) {

	    %end = ('year' => $1,
		    'mon'  => $2,
		    'day'  => $3,
		    'hour' => defined $4 ? $4 : '23',
		    'min'  => defined $5 ? $5 : '59');
	} else {
	    print STDERR "Can't parse timeperiod\n";
	    return sub {()};
	}

	my $begtime = timelocal(0, $beg{'min'}, $beg{'hour'}, $beg{'day'}, $beg{'mon'}-1, $beg{'year'}-1900) - 60;
	my $endtime = timelocal(0, $end{'min'}, $end{'hour'}, $end{'day'}, $end{'mon'}-1, $end{'year'}-1900);

	return sub {
	    $begtime += $multipliers{$granularity};
	    if ($begtime <= $endtime) {
		return ('dir' => strftime($dirs{$granularity}, localtime($begtime)),
			'fmt' => strftime($timefmt,            localtime($begtime)));
	    } else {
		return ();
	    }
	}

    } else {
	print STDERR "Can't parse timeperiod\n";
	return sub {()};
    }

}

#
# show command

sub show()
{
    my %opts;

    if (getopts('t:f:', \%opts)) {

	$period  = $opts{'t'} if defined $opts{'t'};
	$timefmt = $opts{'f'} if defined $opts{'f'};

    } else {
	select(STDERR);
	HELP_MESSAGE();
	exit 1;
    }

    my $cmd  = shift @ARGV;

    if ((!$cmd) || ($cmd eq 'help')) {
	print "\n";
	print "Display gathered data\n";
	print "\n";
	print "usage: $progname [options] show <cmd> <args>\n";
	print "\n";
	print "Commands:\n";
	print "\n";
	print "  help                               - display this help screen\n";
	print "  \n";
	print "  utils                              - display names of utils from map file\n";
	print "  \n";
	print "  grep [options] <regexp> <util>     - display entries matched by regexp\n";
	print "    -c            - display count of matches instead of matched entries\n";
	print "  \n";
	print "  filter <filterprg> <util>          - filter output using specified filter program\n";
	print "  \n";
	print "  assemble [option] <regexp> <util>  - assemble entries by regexp with key/value pair\n";
	print "                                       an example of regexp: '", '^\s+(?v:\d+)\s+(?k:.+)$', "'\n";
	print "    -d <dir>      - store data in this directory\n";
	print "    -t <timestr>  - look for singularity at this time\n";
	print "  \n";
	print "  plot [options] <regexp> <util>     - plot data matched by regexp\n";
	print "    -c            - plot count of matches\n";
	print "    -t <term>     - set gnuplot term\n";
	print "    -o <output>   - set gunplot output file\n";
	print "    -r <range>    - set gnuplot yrange\n";
	print "    -s <style>    - set gnuplot style\n";
	print "    -O <options>  - set additional gnuplot options\n";
	print "\n";

    } elsif ($cmd eq 'utils') {

	my $n = 0;
	my $m = 0;

	for my $name (keys %map) {
	    my $len = length($name);
	    $n = $len if $len > $n;
	    $len = length($map{$name}{'cmd'});
	    $m = $len if $len > $m;
	}

	my $out = '';
	my $max = 0;
	for my $name (sort keys %map) {
	    my $str = sprintf ("%-${n}s %-${m}s  %s",
			       $name, $map{$name}{'cmd'}, $map{$name}{'desc'});
	    $out .= "$str\n";
	    my $len = length($str);
	    $max = $len if $len > $max;
	}
	print '-' x $max, "\n",
	      sprintf ("%-${n}s %-${m}s  %s\n", 'name', 'cmd', 'desc'),
	      '-' x $max, "\n",
	      $out, "\n";

    } elsif ($cmd eq 'grep') {
	my $count = 0;

	if (defined $ARGV[0] and $ARGV[0] eq '-c') {
	    shift @ARGV;
	    $count = 1;
	}

	die "grep command needs at least two arguments" if $#ARGV < 1;

	my $regexp = shift @ARGV;

	for my $util (@ARGV) {
	    my $date_generator = make_date_generator($period);

	    while (my %date = &$date_generator()) {
		my $file = "$datadir/$date{'dir'}/$util" . ($compression ? $compsuffix : '');

		if (-f $file) {
		    $file = "$uncompress $file |" if $compression;

		    open(my $fh, $file) or next;

		    my $n = 0;

		    while (<$fh>) {
			if (/$regexp/) {
			    print "$date{'fmt'} $_" if ! $count;
			    $n++;
			}
		    }

		    close($fh);

		    print "$date{'fmt'} $n\n" if $count;
		}
	    }
	}
    } elsif ($cmd eq 'plot') {

	my $count  = 0;
	my $yrange = ':';
	my $stile  = 'point';
	my %opts;

	if (getopts('cO:o:p:r:s:t:', \%opts)) {

	    $count  = 1          if defined $opts{'c'};

	    $gnuplot{'options'} = $opts{'O'} if defined $opts{'O'};
	    $gnuplot{'output'}  = $opts{'o'} if defined $opts{'o'};
	    $gnuplot{'cmd'}     = $opts{'p'} if defined $opts{'p'};
	    $gnuplot{'yrange'}  = $opts{'r'} if defined $opts{'r'};
	    $gnuplot{'style'}   = $opts{'s'} if defined $opts{'s'};
	    $gnuplot{'term'}    = $opts{'t'} if defined $opts{'t'};

	} else {
	    die "can't parse 'show plot' command options"
	}

	die "plot command needs at least two arguments" if $#ARGV < 1;

	my $regexp = shift @ARGV;
	my $ylabel = $regexp; $ylabel =~ s/'/''/g;

	my @f = split(/\s+/, $timefmt);
	my $col = @f + 1;

	for my $util (@ARGV) {
	    open(my $plot, "| $gnuplot{'cmd'}")
		or die "can't run plotting program $gnuplot{'bin'}: $!";

	    print $plot "set term $gnuplot{'term'}\n" if defined $gnuplot{'term'};
	    print $plot "set timefmt '$timefmt'\n";
	    print $plot "set xdata time\n";
	    print $plot "set xlabel 'time'\n";
	    print $plot "set ylabel '$regexp'\n";
	    print $plot "set title '$util: $ylabel'\n";
	    print $plot "set yrange [$gnuplot{'yrange'}]\n" if defined $gnuplot{'yrange'};
	    print $plot "set style data $gnuplot{'style'}\n" if defined $gnuplot{'style'};
	    print $plot "set output '" . $gnuplot{'output'} . "'\n" if defined $gnuplot{'output'};
	    print $plot "$gnuplot{'options'}\n" if defined $gnuplot{'options'};
	    print $plot "plot '-' using 1:$col title ''\n";

	    my $date_generator = make_date_generator($period);

	    while (my %date = &$date_generator()) {
		my $file = "$datadir/$date{'dir'}/$util" . ($compression ? $compsuffix : '');
		if (-f $file) {
		    $file = "$uncompress $file |" if $compression;

		    open(my $fh, $file) or next;

		    my $n = 0;

		    while (<$fh>) {
			if (/$regexp/) {
			    print $plot "$date{'fmt'} ", (defined $1) ? "$1\n" : $_
				if !$count;
			    $n++;
			}
		    }

		    close($fh);

		    print $plot "$date{'fmt'} $n\n" if $count;
		}
	    }
	    print $plot "e\n";
	    close($plot);
	}
    } elsif ($cmd eq 'filter') {

	die "filter command needs at least two arguments" if $#ARGV < 1;

	my $filter = shift @ARGV;

	for my $util (@ARGV) {
	    my $date_generator = make_date_generator($period);

	    while (my %date = &$date_generator()) {
		my $file = "$datadir/$date{'dir'}/$util" . ($compression ? $compsuffix : '');

		if (-f $file) {
		    my $pipe = ($compression ? $uncompress : 'cat') . " $file | $filter |";

		    open(my $fh, $pipe) or next;

		    while (<$fh>) {
			print "$date{'fmt'} $_";
		    }

		    close($fh);
		}
	    }
	}
    } elsif ($cmd eq 'assemble') {

	my $dir   = '';
	my $ttime = '';

	my %opts;
	if (getopts('d:t:', \%opts)) {

	    $dir   = $opts{'d'} if defined $opts{'d'};
	    $ttime = $opts{'t'} if defined $opts{'t'};

	} else {
	    die "can't parse 'show assemble' command options"
	}

	die "assemble command needs at least two arguments" if $#ARGV < 1;

	my $regexp = shift @ARGV;
	my ($v, $k);

	# regexp should contain key/value marks specified as (?k:pattern) and (?v:pattern).
	# Example: '^\s+(?v:\d+)\s+(?k:.+)$'

	# find key/value marks in regexp
	if ($regexp =~ /\(\?v:.*\).*\(\?k:.*\)/) {
	    $v = 1;
	    $k = 2;
	} elsif ($regexp =~ /\(\?k:.*\).*\(\?v:.*\)/) {
	    $k = 1;
	    $v = 2;
	} else {
	    die "Can't find key and value in regular experession: $regexp"
	}

	# remove key/value marks in regexp
	$regexp =~ s/^(.*\()\?v:(.*\).*)$/$1$2/;
	$regexp =~ s/^(.*\()\?k:(.*\).*)$/$1$2/;

	for my $util (@ARGV) {

	    my %data;
	    my $date_generator = make_date_generator($period);

	    while (my %date = &$date_generator()) {
		my $file = "$datadir/$date{'dir'}/$util" . ($compression ? $compsuffix : '');

		if (-f $file) {
		    $file = "$uncompress $file |" if $compression;

		    open(my $fh, $file) or next;

		    while (<$fh>) {
			if (/$regexp/) {
			    no strict 'refs';
			    my $key = $$k;
			    my $val = $$v;
			    $data{$key} = {} if !defined $data{$key};
			    $data{$key}{$date{'fmt'}} = $val;
			}
		    }

		    close($fh);
		}
	    }

	    if ($dir and ! -d $dir) {
		system("mkdir -p '$dir'") == 0
		    or die "Can't create directory $dir for assembled data: $!";
	    }

	    my @worth = ();

	    for my $key (sort keys %data) {

		my $out;
		my $file;
		my $name;

		$key =~ /^(.*[^\s])\s*\n*$/;

		if ($dir) {
		    $file = "${util}_${1}.dat";
		    $file =~ s|/|_|g;
		    open $out, '>', "$dir/$file"
			or die "Can't open file $dir/$file: $!";
		} else {
		    $out = \*STDOUT;
		    $name = "${util}: ${1}";
		    print "$name:\n\n";
		}

		my $old;
		my $max = 0;

		for my $date (sort keys %{$data{$key}}) {

		    my $delta = defined $old ? $data{$key}{$date} - $old : '-';

		    print $out "$date $data{$key}{$date}\t$delta\n";

		    if ($ttime and defined $old) {
			if ($date eq $ttime) {
			    push @worth, $dir ? $file : $name if abs($delta) > $max;
			}
			$max = abs($delta) if abs($delta) > $max;
		    }

		    $old = $data{$key}{$date};
		}

		if ($dir) {
		    close $out
			or die "Can't close file: $!";
		} else {
		    print "\n";
		}
	    }

	    if ($ttime) {
		if (@worth) {
		    print "Data with singularity at '$ttime':\n\n";
		    for my $d (@worth) {
			print "$d\n";
		    }
		} else {
		    print "Haven't found data with singularity at '$ttime'\n";
		}
	    }
	}
    }
}

#
# show command

sub archive()
{
    my %opts;
    if (getopts('t:', \%opts)) {

	$period  = $opts{'t'} if defined $opts{'t'};

    } else {
	select(STDERR);
	HELP_MESSAGE();
	exit 1;
    }

    my $arch  = shift @ARGV;

    if (!$arch) {
	select(STDERR);
	HELP_MESSAGE();
	exit 1;
    }

    open(my $fh, "| tar -C '$datadir' -cf '$arch' -T -")
	or die "Can't run tar: $!";

    my $date_generator = make_date_generator($period);

    while (my %date = &$date_generator()) {
	print $fh "$date{'dir'}\n" if -d "$datadir/$date{'dir'}";
    }

    close($fh)
	or die "tar returned error: $!";

    for my $f (($confile, $mapfile)) {
    	my($dir, $file) = ($f =~ m|^(.*/)([^/]+)|) ? ($1, $2) : ('.', $f);

    	system("tar -C '$dir' -rf '$arch' '$file'") == 0
    	    or die "tar returned error: $!";
    }
}

#
# expire command

sub expire()
{
    $expiration = shift @ARGV if $ARGV[0];

    return if ($expiration !~ /^\d+$/ or $expiration <= 0);

    print STDERR "expiring data older then $expiration days\n" if $verbose;

    my $now = time;
    my ($sec, $min, $hour) = localtime $now;

    $now -= $hour * 3600 + $min * 60 + $sec;

    opendir(my $dir, $datadir)
	or die "Can't open $datadir: $!";

    for my $d (readdir($dir)) {
	if (-d "$datadir/$d" and $d =~ /^(\d{4})-(\d{2})-(\d{2})$/) {
	    my $year = $1;
	    my $mon  = $2;
	    my $day  = $3;

	    if (($now - timelocal(0, 0, 0, $day, $mon-1, $year-1900)) > ($expiration * 3600 * 24)) {
		print STDERR "delete: $datadir/$d\n" if $verbose;
		system ("rm -Rf '$datadir/$d'") == 0
		    or die "Can't remove directory $datadir/$d: $!";
	    }
	}
    }

    closedir($dir)
	or die "Can't close $datadir: $!";
}

#
# Main
#

parse_options ();
check_vars ();
eval_file($mapfile) if $mapfile;

if ($cmd =~ /^co(?:l(?:l(?:e(?:c(?:t)?)?)?)?)?$/) {
    collect();
} elsif ($cmd =~ /^sc(?:r(?:i(?:p(?:t)?)?)?)?$/) {
    script();
} elsif ($cmd =~ /^sh(?:o(?:w)?)?$/) {
    show();
} elsif ($cmd =~ /^ar(?:c(?:h(?:i(?:v(?:e)?)?)?)?)?$/) {
    archive();
} elsif ($cmd =~ /^ex(?:p(?:i(?:r(?:e)?)?)?)?$/) {
    expire();
} else {
	HELP_MESSAGE();
	exit 1;
}

__END__

=pod

=head1 NAME

gather -- collect and display system statistics

=head1 DESCRIPTION

gather can be used to store system statistics provided by variety of
system utils and display it. It is just a wrapper around these utils
to make work with system statistics more convenient.

Commands we want to use to get statistics should be described in
gather.map file. This file contains a map -- Perl hash structure
initialized to something like this:

  %map = ('uptime'   => {'desc' => 'system uptime',
                         'cmd'  => '/usr/bin/uptime'},
          'sysctl'   => {'desc' => 'sysctl variables',
                         'cmd'  => '/sbin/sysctl -a'},
          'sockstat' => {'desc' => 'sockstat output',
                         'cmd'  => '/usr/bin/sockstat'});

Location of gather.map and other gather configuration variables are
set in gather.cfg file.

When you have configured gather.cfg and the map you can run gather to
collect data:

  gather collect

gather will run all commands specified in map and store output in
$datadir directory. It is supposed that you will set up cron to run
this command with desired periodicity.

If you don't have Perl on the host where you want to collect
statistics you can generate shell script on another host (e.g. your
workstation) using the command:

  gather script

and use it. Collected statistics can be copied to your workstation and
analyzed with gather.

Also you will probably need to setup command in crontab to run daily
and expire old data:

  gather expire <days>

otherwise you will risk to run out of free space. Data older then
<days> will be deleted.

The data are stored in $datadir directory (compressed if this is
specified in config or by command line switch). You can browse and
look on it as is, but in many cases it is convenient to use gather:

  gather show <subcommand>

Run:

  gather show help

to see minihelp about available subcommands.

Running the command:

  gather arch -t <timeperiod> <archname>

you can archive data for specified time period.

=head1 OPTIONS

There are some options that can be used to modify behavior of gather
utility. Run

  gather help

to read their description.

=head1 TIMEPERIODS

Running show command to display some statistics you should specify
timeperiod with option -t.

The most general form of timeperiod is:

  YYYY-MM-DD/HH/MM--yyyy-mm-dd/hh/mm

where YYYY-MM-DD/HH/MM is start of timeperiod and yyyy-mm-dd/hh/mm is
its end. You can skip MM and HH in start or end part of range. E.g:

  2008-11-16/14--2008-11-17

This is interpreted as:

  2008-11-16/14/00--2008-11-17/23/59

It is also possible to specify only the first part of a timeperiod. E.g:

  2008-11-16/14 (interpreted as 2008-11-16/14/00--2008-11-16/14/59)

or

  2008-11-16 (interpreted as 2008-11-16/00/00--2008-11-16/23/59)

If day, hour or minute in end part of timeperiod is the same as in the
start one, you can skip it:

  YYYY-MM-DD/HH/MM--/hh/mm (interpreted as YYYY-MM-DD/HH/MM--YYYY-MM-DD/hh/mm)

  YYYY-MM-DD/HH/MM--//mm (interpreted as YYYY-MM-DD/HH/MM--YYYY-MM-DD/HH/mm)

  YYYY-MM-DD/HH/MM--yyyy-mm-dd// (interpreted as YYYY-MM-DD/HH/MM--yyyy-mm-dd/HH/MM)

  and so on.

Here are some shortcuts you can use to reduce typing:

  .         current day

  ./.       current day/current hour

  ././.     current day/current hour/current minute

  $         date of the last collection

  Nd        N days ago

  Nh        N hours ago

  Nm        N minutes ago

If N{d,h,m} is used alone (there is only start part) then it is
replaced by timeperiod "from that time by now". I.e. timeperiod "Nd"
is the same as "Nd--$".

=head1 SEE ALSO

L<sysctl(8)>,
L<netstat(1)>,
L<vmstat(8)>,
L<ps(1)>,
L<iostat(8)>

=head1 AUTHOR

Mikolaj Golub <to.my.trociny@gmail.com>

=cut
