#!/usr/local/bin/perl -w
# ianb@nessie.mcc.ac.uk
# $Id: cddbtofilename,v 1.8 2003/12/12 06:30:10 ianb Exp $
# v1 20030105 reimplemented after loss of other version
# v2 20030929 sigh, changed page format again
# v3 20031028 rewrite to use MP3::Archive and accept html

use strict;
use MP3::Archive;

use vars qw($me $verbose);
$me=($0=~/(?:.*\/)?(.*)/)[0];
$verbose=0;
my $archive=undef;
if(eval("require MP3::Archive;"))
{
	$archive=new MP3::Archive;
}
else
{
	warn("$me: cannot find MP3::Archive. Carrying on anyway...\n");
}

use vars qw($ALBUM $TRACK $GUESS);
$ALBUM=$MP3::Archive::ALBUM;
$TRACK=$MP3::Archive::TRACK;
$GUESS=$MP3::Archive::GUESS;
my $force=0;
my $dryrun=0;
my $tracktype=$GUESS;
my $doneargs=0;
while(($#ARGV>=0) && ($ARGV[0]=~/^-/) && !$doneargs)
{
	$_=$ARGV[0];
	if   (/-f/) { $force=1;          shift; }
	elsif(/-d/) { $dryrun=1;         shift; }
	elsif(/-q/) { $verbose=0;        shift; }
	elsif(/-v/) { $verbose=1;        shift; }
	elsif(/-a/) { $tracktype=$ALBUM; shift; }
	elsif(/-t/) { $tracktype=$TRACK; shift; }
	elsif(/-g/) { $tracktype=$GUESS; shift; }
	elsif(/--/) { $doneargs=1;       shift; }
	else { usage(); }
}

if($#ARGV<1) { usage(); }

my $cddbfile=shift;
my @files=@ARGV;

open(CDDB,"$cddbfile") or die("$me: cannot open $cddbfile: $!\n");
my $cddbdata=join('',<CDDB>);
close(CDDB) or die("$me: cannot close $cddbfile: $!\n");

if($cddbdata=~/\!DOCTYPE/)
{
	# html, run through lynx
	# first write to tmp file, this lets input from pipe work
	my $tmpdir="/tmp";
	if(exists($ENV{TMPDIR})) { $tmpdir = $ENV{TMPDIR}; }
	my $tmpfile="$tmpdir/$me.$$.html";

	open(TMP,">$tmpfile") or die("$me: cannot open tmp file $tmpfile: $!\n");
	print TMP $cddbdata;
	close(TMP) or die("$me: cannot close tmp file $tmpfile: $!\n");

	$cddbdata=`lynx -dump $tmpfile`;
	if($?) { die("$me: error running lynx: $!\n"); }

	unlink($tmpfile) or warn("$me: warning: cannot unlink $tmpfile: $!\n");
}


# parse cddb data

my $PREAMBLE=0;
my $TITLE=1;
my $TRACKSONTHISCD=2;
my $INTRACKS=3;

my $state=$PREAMBLE;
my $tracknum=0;
my ($artist,$title,@tracks);
my $expectdownloads=0;
while($cddbdata=~/^(.*)$/gm)
{
	$_=$1;
	if($state == $PREAMBLE)
	{
		if(/Disc\s+Info/)
		{
#			print "!";
			$state=$TITLE;
		}
	}
	elsif($state == $TITLE)
	{
		if(/\s*(.*)\s+\/\s+(.*)/)
		{
			$artist=$1;
			$title=$2;
#			$state=$INTRACKS;
			$state=$TRACKSONTHISCD;
#			print("Artist: $artist Title: $title\n");
		}
	}
	elsif($state == $TRACKSONTHISCD) # n
	{
		if(/^\s*Track\s+Title\s+Downloads/)
		{
			$state=$INTRACKS;
			$expectdownloads=1;
		}
		elsif(/^\s*Track\s+Title/)
		{
			$state=$INTRACKS;
			$expectdownloads=0;
		}
	}
	elsif($state == $INTRACKS)
	{
		# the 2 re's here are mostly to make [] work in filenames
		# unfortunately, with downloads, not every track has downloads.

		# "   1. Track Name     "
		my $re=qr/^\s*(\d+)\.\s+(.*\S+)/;
		if($expectdownloads)
		{
			# "   1. Track Name     [pressplay..."	or
			# "   1. Track Name     [29][pressplay..." or just
			# "   1. Track Name     "
			# This relies on there being at least 2 spaces
			# before the download stuff which may not always be true.
			$re=qr/^\s*(\d+)\.\s+((?:\S+\s)+)(?:\s+(?:\[\d+\])?\[p)?/;
		}
			
		if(/$re/)
		{
			$tracknum++;
			if($1 != $tracknum)
			{
				die("$me: error, expecting track $tracknum, got $1\n");
			}
			my $thistrack=$2;

			push(@tracks,$thistrack);
		}
		else # end of tracks
		{
			last;
		}
	}
	else # oops
	{
		die("$me: Internal error - unknown state $state\n");
	}
}

if(scalar(@files) != $tracknum)
{
	my $msg="$me: track mismatch, cddb has $tracknum, I have " . scalar(@files);
	if($force)
	{
		warn("${msg}, attempting to carry on anyway...\n");
	}
	else { die("$msg\n") }
}

@files=sort byfirstnum @files;

for(my $i=0;$i<=$#files;$i++)
{
	my $tnum=$i+1;
	if($tnum<10) { $tnum="0".$tnum; }

	my $fullpath; my $path;
	if($files[$i]=~/^(.*\/)(.*)/)
	{
		$path=Cwd::abs_path($1);
		$fullpath= $path . "/" . $2;
	}
	else
	{
		$path=Cwd::getcwd;
		$fullpath=$path . "/" . $files[$i];
	}

	my $ext;
	if($files[$i]=~/.*\.(.*)/) { $ext=$1; }
	else { $ext="mp3"; } # default

	# default if MP3::Archive not installed
	my $outfile=join(" - ",$tnum,$artist,$title,($tracks[$i].".$ext"));

	if(defined($archive))
	{
#		print "$tnum,$artist,$title,$tracks[$i],$ext,$fullpath\n";
		my $delim=$archive->delim($fullpath,$tracktype);
		$outfile=$archive->makefilename($fullpath,$tnum,san($artist,$delim),
										san($title,$delim),
										san($tracks[$i],$delim),
										san($ext,$delim),$tracktype);
	}

	if(-e $outfile) { warn("$me: $outfile exists, skipping\n"); next; }
	print "$files[$i] -> $outfile\n" if($dryrun || $verbose);
	unless($dryrun)
	{
		rename($files[$i],"$path/$outfile") or warn("$me: warning: cannot rename $files[$i] to $outfile: $!\n");
	}
}

# sort function
sub byfirstnum
{
	my $an=$a;
	my $bn=$b;
	$an=~s/\D*(\d+).*/$1/;
	$bn=~s/\D*(\d+).*/$1/;
	return ($an <=> $bn);
}

# sanitize filename
sub san
{
	my ($name,$delim)=@_;

	$name=~s/\s+$//; # delete trailing spaces

	# if we decide to strip 8th bit.
	# $name=~tr/\200-\377/\000-\177/;
	
	# Remove dodgy chars ([:/]) from name
	$name=~s/[:\/]//g;

	# replace delimiters with "-" unless
	# that is the delimiter, in which case "_"
	my $replace="-";
	if($delim eq $replace) { $replace="_"; }
	$name=~s/$delim/$replace/g;

	return $name;
}


sub usage
{
	die("Usage: $me [-v] [-q] [-d] [-a] [-t] [-g] [-f] [-h]  <cddbfile> <files...>\n",
		" -v\tVerbose.\n",
		" -q\tQuiet (default).\n",
		" -d\tDry run (don't rename, show what would be done).\n",
		" -a\tTreat as album tracks.\n",
		" -t\tTreat as non-album tracks.\n",
		" -g\tGuess track type from file location (default).\n",
		" -f\tForce renaming if wrong number of tracks.\n",
		" -h\tThis help.\n");
}

__END__

=head1 NAME

cddbtofilename - rename files according to cddb web page

=head1 SYNOPSIS

B<cddbtofilename> [I<-v>] [I<-q>] [I<-d>] [I<-a>] [I<-t>] [I<-g>] [I<-f>] [I<-h>] [I<cddbfile>|I<->] [I<file.mp3>...]

=head1 DESCRIPTION

cddbtofilename takes data from the cddb (B<http://www.gracenote.com>)
web page and renames files accordingly.

As cddbtofilename works by scraping the web page, it can easily break
if gracenote redesign the web site. If that happens, check the address
below for an update.

As cddtofilename uses data from the web page, unlike many other
freedb/cddb based tools, it has no need for the original CD.

Input from cddbfile can be either html or formatted output from
L<lynx(1)> (either from C<lynx -dump page.html> or from pressing B<p>
then selecting B<Save to a local file> from within lynx (when you have
found the page you want). If raw html is passed to cddbtofilename, it
runs lynx to format it before parsing.

L<MP3::Archive(3)> is used for creating filenames and deciding whether
a particular track should be treated as an album or non-album track
(unless overridden by B<-a> or B<-t>), see L<MP3::Archive::Config> for
details on configuring F<.mp3archiverc> to specify your filename
styles.

=head1 OPTIONS

=over 4

=item B<-v>

Verbose output.

=item B<-q>

Quiet (no output). This is the default.

=item B<-d>

Dry run. Just prints the changes it would make without actually doing
them.

=item B<-a>

Treat files as album tracks.

=item B<-t>

Treat files as non-album tracks.

=item B<-g>

Guess whether to treat files as album tracks or not, based on the
B<current> directory of the file.

=item B<-f>

If the number of files specified does not agree with the number listed
in the cddbfile, normally cddbtofilename will abort. With this option
it will warn you then carry on, renaming as many files as it can.
Use with care.

=item B<-h>

Show a short help message.

=back

=head1 BUGS

None known. Please report any found to ianb@nessie.mcc.ac.uk	

=head1 SEE ALSO    

L<MP3::Archive(3)>, L<MP3::Archive::Config(3)>,
L<freedbtofilename(1)>, L<mp3-archive-tools(1)>, L<mp3lint(1)>,
L<lynx(1)>

=head1 AUTHOR

Ian Beckwith <ianb@nessie.mcc.ac.uk>

=head1 AVAILABILITY

cddbtofilename is part of the mp3-archive-tools package.

The latest version can be found at:

B<http://nessie.mcc.ac.uk/~ianb/projects/mp3-archive-tools/>

=head1 COPYRIGHT

Copyright 2003 Ian Beckwith <ianb@nessie.mcc.ac.uk>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

=cut


