#!/usr/bin/env perl

# "THE BEER-WARE LICENSE":
# Aldis Berjoza <aldis@berjoza.eu> wrote this file. As long as you retain this
# notice you can do whatever you want with this stuff. If we meet some day, and
# you think this stuff is worth it, you can buy me a beer in return.

# This program will generate list for Diceware
# More info about Diceware: http://world.std.com/~reinhold/diceware.html

use strict;
use warnings;

use Encode qw(decode_utf8 encode_utf8);
use List::Util qw(shuffle);
use List::MoreUtils qw(uniq);
use POSIX qw{floor};

my $MIN_LEN = 5;
my $MAX_LEN = 12;
my $SORTED_LIST = 0;
my $PRINT_NUMBERS = 1;
my $USE_DICEWAREKIT = 0;
my $DICEWAREKIT_FILE='/usr/local/share/diceware/dicewarekit.txt';
my $WORDLIST='';
my $LATEX=0;
my $PAPER='a4paper';
my $COLUMNS=5;

my $version = '1.4';

my @alist;
my @blist;
my @dicewarekit;
my $word;
my $lswitch = -1;

while ($ARGV[0]) {
  if ($ARGV[0] eq '-w') {
    $lswitch = 0;
    $WORDLIST = $ARGV[1];
    shift @ARGV;
  } elsif ($ARGV[0] eq '-d') {
    $lswitch = 0;
    $USE_DICEWAREKIT = 1;
  } elsif ($ARGV[0] eq '-f') {
    $lswitch = 0;
    $DICEWAREKIT_FILE = $ARGV[1];
    $USE_DICEWAREKIT = 1;
    shift @ARGV;
  } elsif ($ARGV[0] eq '-min') {
    die "ERR: Invalid number: $ARGV[1]\n" unless $ARGV[1] =~ m/\d+/;
    $lswitch = 0;
    $MIN_LEN = $ARGV[1];
    shift @ARGV;
  } elsif ($ARGV[0] eq '-max') {
    die "ERR: Invalid number: $ARGV[1]\n" unless $ARGV[1] =~ m/\d+/;
    $lswitch = 0;
    $MAX_LEN = $ARGV[1];
    shift @ARGV;
  } elsif ($ARGV[0] eq '-n') {
    $lswitch = 0;
    $PRINT_NUMBERS = 0;
  } elsif ($ARGV[0] eq '-s') {
    $lswitch = 0;
    $SORTED_LIST = 1;
  } elsif ($ARGV[0] eq '-c') {
    die "ERR: Invalid number: $ARGV[1]\n" unless $ARGV[1] =~ m/\d+/;
    $lswitch = 0;
    $COLUMNS = $ARGV[1];
    shift @ARGV;
  } elsif ($ARGV[0] eq '-x') {
    $lswitch = 0;
    $LATEX=1;
  } elsif ($ARGV[0] eq '-letter') {
    $lswitch = 0;
    $PAPER='letterpaper';
  } elsif ($ARGV[0] eq '-l') {
    die "ERR: Can't use -l switch with other switches\n" if $lswitch ne -1;
    my @alist = `aspell --clean-affixes --clean-words -l $ARGV[1] dump master`;
    foreach $word (@alist) {
      $word =~ s#/.*(\n)$#$1#g;
      push @blist, encode_utf8(lc(decode_utf8($word)));
    }
    print sort(uniq(@blist));
    exit
  } elsif ($ARGV[0] eq '-h' or $ARGV[0] eq '--help') {
    print << "EOF"
dicewaregen v$version by Aldis Berjoza <aldis\@berjoza.eu>

USAGE:
  dicewaregen [OPTION]

Where OPTIONS are:
  -w file      - wordlist
  -d           - use dicewarekit.txt
  -f file      - path to dicewarekit.txt file (implies -d)
  -min number  - minimal word length (Default: $MIN_LEN)
  -max number  - maximal word length (Default: $MAX_LEN)
  -n           - hide dice number
  -s           - sort list (default not sorted)
  -x           - output LaTeX file
  -letter      - use letter paper instead of A4 (for LaTeX)
  -c number    - how many columns to output, when writing LaTeX file?
                 (Default: $COLUMNS)

  -l lang      - generate word list for lang language, where lang is
                 any language installed for aspell. This option is usefull
		 to generate wordlist files for different languages.
		 HINT: you can cat these lists and make huge multilanguage
		 list. dicewaregen will exit after processing this switch.
		 -l switch shouldn't be used with pther switches.

NOTE:
  dicewaregen will not generate same dictionary over and over.
  Each time you run dicewaregen it will generate different dictionary.
  It is not a bug, it's a feature.

  If used multiple times, switches will overwride previous switches.

  More info about Diceware: http://world.std.com/~reinhold/diceware.html

EXAMPLE USAGE:
  To generate Latvian Diceware dictionary:

    dicewaregen -l lv > lv.words
    dicewaregen -w lv.words > dice.txt

EOF
;
    exit;
  } else {
    die "ERR: Invalid command line option '$ARGV[0]'\n";
  }
  shift @ARGV;
}

die "ERR: No wordlist specified! See --help\n" if $WORDLIST eq '';


if ($USE_DICEWAREKIT) {
  open (DICEWAREKIT, '<', $DICEWAREKIT_FILE) or die "ERR: Can't open '$DICEWAREKIT_FILE'\n     $!\n";
  @dicewarekit = grep m#^.+\n$#, <DICEWAREKIT>;
  close DICEWAREKIT;
}

open (ULIST, '<', $WORDLIST) or die "ERR: Can't open '$WORDLIST'\n     $!\n";
while ($word = decode_utf8(<ULIST>)) {
  push @alist, lc($word) if (length($word) -1 <= $MAX_LEN and length($word) -1 >= $MIN_LEN);
}
close ULIST;
@blist = uniq(@alist);

my $wc = $#blist + $#dicewarekit + 2;
die "ERR: Not enough words\n" if $wc < 7776;

@alist = uniq (@dicewarekit, shuffle(@blist));

if ($SORTED_LIST) {
  @blist = sort @alist[0 .. 7775];
} else {
  @blist = @alist;
}

if ($LATEX) {
  chomp(@blist);
  print << "EOF"
\\documentclass[10pt,twoside,onecolumn,final]{article}
\\usepackage{geometry,polyglossia,multicol}
\\geometry{$PAPER,left=3cm,right=1cm,top=1cm,bottom=2cm}
\\begin{document}
    \\begin{multicols}{3}
        \\begin{tabular}{c||c|c|c|c|c|c|}
            ~ & 1 & 2 & 3 & 4 & 5 & 6 \\\\\\hline\\hline
            1 & A & B & C & D & E & F \\\\\\hline
            2 & G & H & I & J & K & L \\\\\\hline
            3 & M & N & O & P & Q & R \\\\\\hline
            4 & S & T & U & V & W & X \\\\\\hline
            5 & Y & Z & 1 & 2 & 3 & 4 \\\\\\hline
            6 & 5 & 6 & 7 & 8 & 9 & 0 \\\\\\hline
        \\end{tabular}

        \\begin{tabular}{c||c|c|c|c|c|c|}
            ~ & 1 & 2 & 3 & 4 & 5 & 6 \\\\\\hline\\hline
            1 & a & b & c & d & e & f \\\\\\hline
            2 & g & h & i & j & k & l \\\\\\hline
            3 & m & n & o & p & q & r \\\\\\hline
            4 & s & t & u & v & w & x \\\\\\hline
            5 & y & z & \\textvisiblespace{} & \\_{} & \\textasciitilde{} & @ \\\\\\hline
            6 & \\# & \\\$ & \\% & \\textasciicircum{} & \\&{} & * \\\\\\hline
        \\end{tabular}

        \\begin{tabular}{c||c|c|c|c|c|c|}
            ~ & 1 & 2 & 3 & 4 & 5 & 6 \\\\\\hline\\hline
            1 & ! & @ & \\# & \\\$ & \\% & \\textasciicircum \\\\\\hline
            2 & \\&{} & * & ( & ) & - & = \\\\\\hline
            3 & + & [ & ] & \\{ & \\} & \\textbackslash{} \\\\\\hline
            4 & | & ` & ; & : & ' & " \\\\\\hline
            5 & < & > & / & ? & . & , \\\\\\hline
            6 & y & z & w & \\_{} & \\textvisiblespace{} & q \\\\\\hline
        \\end{tabular}

    \\end{multicols}
    \\begin{multicols}{$COLUMNS}
        \\setlength{\\linewidth}{5pt}
        \\setlength{\\columnseprule}{.4pt}
        \\ttfamily
        \\small
        \\par\\noindent
EOF
;

  for (my $i = 0; $i < 7776; $i++) {
    print "\t\t", &decTo5dices($i), '~~', encode_utf8($blist[$i]), "\\\\\n";
  }

  print "\t\\end{multicols}\n\\end{document}\n";
} else {
  for (my $i = 0; $i < 7776; $i++) {
    print &decTo5dices($i), "\t" if $PRINT_NUMBERS;
    print encode_utf8($blist[$i]);
  }
}

exit;

sub decTo5dices() {
  my $number = $_[0];
  my $dice = '';

  while ($number > 0) {
    $dice = ($number % 6 + 1) . $dice;
    $number = floor($number / 6);
  }
  $dice = ('1' x (5-length($dice))) . $dice;
  return $dice;
}
