#!/usr/local/bin/perl
#
# Git commit: 3872dac107037394bdd4f6f90bf82388b5836547
#
# Convert Cyrus folders to Dovecot.
#
# Written by Holger Weiss <holger@ZEDAT.FU-Berlin.DE> at Freie Universitaet
# Berlin, Germany, Zentraleinrichtung fuer Datenverarbeitung (ZEDAT).
#
# ------------------------------------------------------------------------------
# Copyright (c) 2008 Freie Universitaet Berlin.
# All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it under
# the same terms as Perl itself.  See perlartistic(1).  This program is
# distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
# ------------------------------------------------------------------------------
#
require 5.006;	# We need Perl >= 5.6.0 for our open() calls.

use strict;
use IO::Zlib;
use warnings;
use Data::Dumper;
use File::Basename;
use Fcntl qw(:seek);
use Getopt::Long qw(:config gnu_getopt auto_help auto_version);
use Sys::Hostname;

#
# Default settings (can be overridden on the command line).
#
# Within pathnames, any occurrence of "%u" will be replaced by the current user
# name, any occurrence of "%<n>u" will be replaced by the <n>'th character of
# that user name, any occurrence of "%h" will be replaced by Cyrus' directory
# "hash" character for that user name (i.e., "%h" is equivalent to "%1u" if the
# first character of the user name is a lowercase letter), and any occurrence of
# "%x" will be replaced by Cyrus' "fulldirhash" character for that user name.
#
# See "perldoc cyrus2dovecot" for details.
#
my %DEFAULT = (
	dovecot_inbox => '/tmp/dovecot/%u/Maildir',
	cyrus_inbox => '/var/spool/imap/user/%u',
	cyrus_seen => '/var/imap/user/%h/%u.seen',
	cyrus_sub => '/var/imap/user/%h/%u.sub',
	cyrus_quota => undef,
	cyrus_quota_format => 1,        # Cyrus quota format (1: legacy).
	dovecot_uidlist_format => 3,    # Create this dovecot-uidlist format.
	dovecot_host => hostname,       # The host name for Maildir++ filenames.
	dovecot_crlf => 0,              # Use CR+LF instead of LF in Dovecot?
	dovecot_zlib => 0,              # Use zlib compressed mails in Dovecot?
	default_quota => 0,             # Use this quota as fallback (0: none).
	dump_meta => 0,                 # Print the metadata structure?
	debug => 0,                     # Print debug output?
	quiet => 0,                     # Be quiet (0: no, 1: somewhat, 2: yes)?
	edit_foldernames => []          # List of folder name substitutions.
);

#
# Plan of attack: Basically, we convert the folders of a user in two steps.
#
# 1) We call c_read_mailbox() which reads Cyrus' metadata for all folders of the
#    user and creates a data structure such as the following.  In this example,
#    the user has the folder "ac" and the subfolder "ac/dc" next to his INBOX,
#    he is subscribed to these folders, and there are two e-mails per folder.
#    The user also defined a few IMAP keywords (a.k.a. user flags).  Both the
#    system flags and user keywords are saved as bitmasks for each e-mail.
#
#    	$meta->{subscriptions} = [ 'ac', 'ac/dc' ]
#    	     ->{quota} = 2147483648
#    	     ->{box}->{'&INBOX'}->{uidvalidity} = 1107601073
#    	                        ->{uidnext} = 3
#    	                        ->{nonrecent} = 2	# Last non-recent UID.
#    	                        ->{keywords}  = [ 'Junk', '$Label1' ]
#    	                        ->{mail}->{1}->{internaldate} = 1107601472
#    	                                     ->{sysflags} = $sysmask
#    	                                     ->{usrflags} = $usrmask
#    	                                  {2}->{internaldate} = 1107601543
#    	                                     ->{sysflags} = $sysmask
#    	                                     ->{usrflags} = $usrmask
#    	            ->{'ac'}    ->{uidvalidity} = 1108639232
#    	                        ->{uidnext} = 3
#    	                        ->{nonrecent} = 1	# Last non-recent UID.
#    	                        ->{keywords}  = [ 'Private', 'Work' ]
#    	                        ->{mail}->{1}->{internaldate} = 1108639290
#    	                                     ->{sysflags} = $sysmask
#    	                                     ->{usrflags} = $usrmask
#    	                                  {2}->{internaldate} = 1108639299
#    	                                     ->{sysflags} = $sysmask
#    	                                     ->{usrflags} = $usrmask
#    	            ->{'ac/dc'} ->{uidvalidity} = 1109821442
#    	                        ->{uidnext} = 3
#    	                        ->{nonrecent} = 1	# Last non-recent UID.
#    	                        ->{keywords}  = [ 'Rock', 'Pop' ]
#    	                        ->{mail}->{1}->{internaldate} = 1109821455
#    	                                     ->{sysflags} = $sysmask
#    	                                     ->{usrflags} = $usrmask
#    	                                  {2}->{internaldate} = 1109821500
#    	                                     ->{sysflags} = $sysmask
#    	                                     ->{usrflags} = $usrmask
#
# 2) We call d_write_mailbox() which creates a Dovecot Maildir++ directory
#    including all subfolders, writes the metadata, and converts the actual
#    e-mails.
#
sub main ();
sub usage ();
sub debug (@);
sub info (@);
sub warning (@);
sub error (@);
sub fatal (@);
sub message ($@);
sub makedir ($);
sub readint ($$);
sub xread ($$$);
sub slurp ($);
sub fixpath ($$@);
sub c_fulldirhash ($);
sub c_read_skiplist ($);
sub c_read_mailbox ($$$$$);
sub d_write_mailbox ($$$);

#
# IMAP flags.
#
use constant FLAG_ANSWERED => (1 << 0);	# As stored in Cyrus' index database.
use constant FLAG_FLAGGED  => (1 << 1);	# As stored in Cyrus' index database.
use constant FLAG_DELETED  => (1 << 2);	# As stored in Cyrus' index database.
use constant FLAG_DRAFT    => (1 << 3);	# As stored in Cyrus' index database.
use constant FLAG_SEEN     => (1 << 4);	# Stored in Cyrus' seen database.

#
# Cyrus skiplist database constants.
#
use constant INORDER           => 1;
use constant ADD               => 2;
use constant DELETE            => 4;
use constant COMMIT            => 255;
use constant DUMMY             => 257;
use constant HEADER_SIZE       => 28;
use constant HEADER_MAGIC_SIZE => 20;	# length(HEADER_MAGIC)
use constant HEADER_MAGIC      => "\241\002\213\015skiplist file\0\0\0";

#
# Cyrus mailbox header constants.
#
use constant MAILBOX_HEADER_MAGIC_SIZE => 115;	# length(MAILBOX_HEADER_MAGIC)
use constant MAILBOX_HEADER_MAGIC      =>
    "\241\002\213\015Cyrus mailbox header\n" .
    "\"The best thing about this system was that it had lots of goals.\"\n" .
    "\t--Jim Morris on Andrew\n";	# Gesundheit!

#
# Miscellaneous constants.
#
use constant UINT32_MAX => 4294967295;

our $VERSION = sprintf('%d.%d (%04d-%02d-%02d)', q$Revision: 1.3 $ =~ /(\d+)/g,
    q$Date: 2008/10/05 17:45:06 $ =~ /(\d{4})\/(\d{2})\/(\d{2})/);

my $MYSELF = basename($0);
my ($FOLDERS, $MAILS, $SIZE, $USER, $FROM_STDIN, $QUOTADATA, %CONF);

GetOptions(
	\%CONF,
	'edit_foldernames|edit-foldernames|E=s@',
	'cyrus_inbox|cyrus-inbox|C=s',
	'cyrus_quota|cyrus-quota|Q=s',
	'cyrus_quota_format|cyrus-quota-format|O=i',
	'cyrus_quota_dir|cyrus-quota-dir|B=s',
	'cyrus_database_dir|cyrus-database-dir|A=s',
	'cyrus_seen|cyrus-seen|S=s',
	'cyrus_sub|cyrus-sub|U=s',
	'default_quota|default-quota|N=s',
	'dovecot_inbox|dovecot-inbox|D=s',
	'dovecot_host|dovecot-host|H=s',
	'dovecot_uidlist_format|dovecot-uidlist-format|F=i',
	'dovecot_crlf|dovecot-crlf|c',
        'dovecot_zlib|dovecot-zlib|z',
	'dump_meta|dump-meta|m',
	'quiet|q+',
	'debug|d',
	'bail|b',
	'h',
	'v'
) or usage;

if ($CONF{h}) {
	exec($0, '--help') or die;
}
if ($CONF{v}) {
	exec($0, '--version') or die;
}

if (defined($CONF{cyrus_database_dir})) {
	$DEFAULT{cyrus_seen} = $CONF{cyrus_database_dir} . '/%u.seen';
	$DEFAULT{cyrus_sub} = $CONF{cyrus_database_dir} . '/%u.sub';
}
if (defined($CONF{cyrus_quota_dir})) {
	$DEFAULT{cyrus_quota} = $CONF{cyrus_quota_dir} . '/user.%u';
	$DEFAULT{cyrus_quota_format} = 1;
}

foreach my $opt (keys %DEFAULT) {
	$CONF{$opt} = $DEFAULT{$opt} unless exists($CONF{$opt});
}

fatal('Option dovecot-uidlist-format must be set to: 1 or 3')
    if ($CONF{dovecot_uidlist_format} != 1 and
        $CONF{dovecot_uidlist_format} != 3);

$SIG{__WARN__} = sub { fatal('Caught exception:', @_) };	# Perl warnings.
$FROM_STDIN = (@ARGV == 0) ? 1 : 0;
$QUOTADATA = c_read_skiplist($CONF{cyrus_quota})
    if ($CONF{cyrus_quota} and $CONF{cyrus_quota_format} != 1);

main;
exit 0;

# ----- Generic subroutines. ---------------------------------------------------

#
# Loop over the specified users and convert their e-mails.  This is done within
# a subroutine in order to make it callable from error().
#
sub main () {
	while (my $user = $FROM_STDIN ? <STDIN> : shift(@ARGV)) {
		chomp($user);

		my $start = time;
		my $meta = {};
		my $dovecot_inbox = $CONF{dovecot_inbox};
		my $cyrus_inbox = $CONF{cyrus_inbox};
		my $cyrus_seen = $CONF{cyrus_seen};
		my $cyrus_sub = $CONF{cyrus_sub};
		my $cyrus_quota = $CONF{cyrus_quota};

		debug("Converting the e-mail folders of $user.");

		# (Re)set "global" variables.
		$USER = $user;
		$FOLDERS = $MAILS = $SIZE = 0;

		# Resolve "%u", "%h", "x", and "%<n>u" within pathnames.
		fixpath(1, $user, $dovecot_inbox);
		fixpath(0, $user, $cyrus_inbox, $cyrus_seen, $cyrus_sub);
		fixpath(0, $user, $cyrus_quota)
		    if ($CONF{cyrus_quota} and $CONF{cyrus_quota_format} == 1);

		# Do the actual conversion.
		c_read_mailbox($meta, $cyrus_inbox, $cyrus_seen, $cyrus_sub,
		    $cyrus_quota);
		d_write_mailbox($meta, $cyrus_inbox, $dovecot_inbox);

		# Give some feedback.
		print Dumper($meta) if $CONF{dump_meta};
		info(sprintf('%u messages in %u folders (%.1f MiB, %u s)',
		    $MAILS, $FOLDERS, $SIZE / 1024 / 1024, time - $start))
		    unless $CONF{quiet} >= 2;

		debug("Done converting the e-mail folders of $user.");
	}
}

#
# Print usage information to the standard error output and exit >0.
#
sub usage () {
	system("$0 --help >&2");
	exit 1;
}

#
# Print a message to the standard output if we were called with "--debug", do
# nothing otherwise.
#
sub debug (@) {
	return unless $CONF{debug};
	my @message = @_;

	info(@message);
}

#
# Print a message to the standard output.
#
sub info (@) {
	my @message = @_;

	message(\*STDOUT, @message);
}

#
# Print a message to the standard error output.
#
sub warning (@) {
	my @message = @_;

	unshift(@message, '(warning)');
	message(\*STDERR, @message);
	exit 1 if $CONF{bail};
}

#
# Print a message to the standard error output.  Then, try to continue with the
# next user (if any).  When done, exit >0.
#
sub error (@) {
	my @message = @_;

	unshift(@message, '(error)');
	message(\*STDERR, @message);
	main unless $CONF{bail};	# Continue with the next user (if any).
	exit 1;
}

#
# Print a message to the standard error output and exit >0 immediately.
#
sub fatal (@) {
	my @message = @_;

	unshift(@message, '(fatal)');
	message(\*STDERR, @message);
	exit 1;
}

#
# Print a message.
#
sub message ($@) {
	my $handle = shift;
	my @message = @_;
	my $prefix = $MYSELF;

	$prefix .= " [$USER]" if defined($USER);
	chomp(@message);
	print $handle "$prefix: @message\n";
}

#
# Create the specified directory, and recursively create parent directories as
# needed.  Bail out on error.
#
sub makedir ($) {
	my $dir = shift;

	unless (-d $dir) {
		my $parent = dirname($dir);

		makedir($parent) if not -d $parent;
		mkdir($dir) or error("Cannot create directory $dir: $!");
		debug('Created directory:', $dir);
	}
}

#
# Read and return a 32-bit integer which is in "network" (big-endian) order.
# Bail out on error.
#
sub readint ($$) {
	my ($file, $handle) = @_;
	my $buf = xread($file, $handle, 4);

	return unpack('N', $buf);
}

#
# Read the specified number of bytes or bail out.
#
sub xread ($$$) {
	my ($file, $handle, $size) = @_;
	my ($buf, $n);
	my $offset = 0;

	do {
		defined($n = read($handle, $buf, $size - $offset, $offset))
		    or error("Cannot read $file: $!");
		$offset += $n;
	} while ($offset != $size and $n > 0);
	error("EOF after reading $offset instead of $size bytes from $file.")
	    if $offset != $size;
	return $buf;
}

#
# Read (the rest of) a file into memory and return the read content.
#
sub slurp ($) {
	my $handle = shift;
	local $/;	# Slurp mode.

	return <$handle>;
}

#
# Resolve "%u", "%h", "%x", and "%<n>u" within pathnames.  Modify the specified
# arguments directly (as opposed to returning the new pathnames).
#
sub fixpath ($$@) {
	my $fixdot = shift;
	my $user = shift;
	my @char = split(//, $user);
	my $hash = $char[0];
	my $fullhash = c_fulldirhash($user);

	if ($hash !~ /^[a-z]$/) {
		# This is how Cyrus "hashes" non-[a-z]-characters.
		$hash = ($hash =~ /^[A-Z]$/) ? lc($hash) : 'q';
	}

	$user =~ s/\^/./g if $fixdot;

	for (@_) {
		s/%u/$user/g;
		s/%h/$hash/g;
		s/%x/$fullhash/g;
		s/%(\d+)u/$char[$1-1]/g;
	}
}

# ----- Cyrus subroutines. -----------------------------------------------------

sub _c_read_folder ($$$$);
sub _c_read_header ($);
sub _c_read_index ($);
sub _c_read_old_seen ($);
sub _c_read_legacy_quota ($);
sub _c_read_skiplist_item ($$);
sub _c_parse_seendata ($);
sub _c_seen ($$);
sub _c_make_uid ($$);

#
# Return Cyrus' "fulldirhash" character for the given user name.  See also:
#
# tools/rehash:dir_hash_c()
#
sub c_fulldirhash ($) {
	my $user = shift;
	my $n = 0;

	$n = (($n << 3) ^ ($n >> 5)) ^ ord($_) for split(/ */, $user);
	return chr(ord('A') + ($n % 23));
}

#
# Read a "skiplist" database file (or a flat text file) and return a reference
# to a hash containing the records.  Bail out on error.
#
sub c_read_skiplist ($) {
#
# | /*
# |  * disk format; all numbers in network byte order
# |  *
# |  * there's the data file, consisting of the multiple records of "key",
# |  * "data", and "skip pointers", where skip pointers are the record number of
# |  * the data pointer.  [...]
# |  */
# |
# | /*
# |    header "skiplist file\0\0\0"
# |    version (4 bytes)
# |    version_minor (4 bytes)
# |    maxlevel (4 bytes)
# |    curlevel (4 bytes)
# |    listsize (4 bytes)
# |      in active items
# |    log start (4 bytes)
# |      offset where log records start, used mainly to tell when to compress
# |    last recovery (4 bytes)
# |      seconds since unix epoch
# |
# |    1 or more skipnodes, one of:
# |
# |      record type (4 bytes) [DUMMY, INORDER, ADD]
# |      key size (4 bytes)
# |      key string (bit string, rounded to up to 4 byte multiples w/ 0s)
# |      data size (4 bytes)
# |      data string (bit string, rounded to up to 4 byte multiples w/ 0s)
# |      skip pointers (4 bytes each)
# |        least to most
# |      padding (4 bytes, must be -1)
# |
# |      record type (4 bytes) [DELETE]
# |      record ptr (4 bytes; record to be deleted)
# |
# |      record type (4 bytes) [COMMIT]
# |
# |    record type is either
# |      DUMMY (first node is of this type)
# |      INORDER
# |      ADD
# |      DELETE
# |      COMMIT (commit the previous records)
# | */
# |
# | enum {
# |     INORDER = 1,
# |     ADD = 2,
# |     DELETE = 4,
# |     COMMIT = 255,
# |     DUMMY = 257
# | };
# |
# | #define HEADER_MAGIC ("\241\002\213\015skiplist file\0\0\0")
# | #define HEADER_MAGIC_SIZE (20)
#
# [ lib/cyrusdb_skiplist.c ]
#
	my $file = shift;
	my $skiplist = {};
	my ($buf, $n);

	if (not -e $file) {
		my $message = "File not found, pretending it's empty: $file";

		#
		# The seen or subscription database will not be created until
		# the user saw an e-mail or subscribed a folder.  So, we just
		# return an empty $skiplist.
		#
		$CONF{quiet} ? debug($message) : info($message);
		return $skiplist;
	}

	debug('Reading:', $file);
	open(my $handle, '<', $file) or error("Cannot open $file: $!");

	# Read the header magic.
	defined($n = read($handle, $buf, HEADER_MAGIC_SIZE))
	    or error("Cannot read $file: $!");

	#
	# Assume it's a skiplist file if the header magic is okay.  Otherwise,
	# assume it's a flat text file.
	#
	if ($n == HEADER_MAGIC_SIZE and $buf eq HEADER_MAGIC) {
		# Read the actual header.
		$buf = xread($file, $handle, HEADER_SIZE);
		my @header = unpack('N7', $buf);
		warning('Unknown skiplist database version:', $header[0])
		    if $header[0] != 1;
		debug('Minor skiplist database version:', $header[1]);

		# Read the records.
		while ($n = read($handle, $buf, 4)) {
			error("Read $n instead of 4 bytes from $file.")
			    if $n != 4;
			my $rectype = unpack('N', $buf);

			# Parse the record type.
			if ($rectype == COMMIT) {
				debug('Record type: COMMIT');
				next;
			} elsif ($rectype == DELETE) {
				debug('Record type: DELETE');
				seek($handle, 4, SEEK_CUR)
				    or error('Cannot seek in:', $file);
				next;
			} elsif ($rectype == INORDER) {
				debug('Record type: INORDER');
			} elsif ($rectype == ADD) {
				debug('Record type: ADD');
			} elsif ($rectype == DUMMY) {
				debug('Record type: DUMMY');
			} else {
				error('Unknown record type:', $rectype);
			}

			# Read and save the key and the data, if any.
			my $key = _c_read_skiplist_item($file, $handle);
			my $data = _c_read_skiplist_item($file, $handle);

			if (defined($key)) {
				$skiplist->{$key} = $data;
				$data = '(undef)' if not defined($data);
				debug("Saved skiplist record: $key = $data");
			}

			#
			# Skip the "skip pointers" (4 bytes each), terminated by
			# 4 bytes of -1 padding.
			#
			1 while readint($file, $handle) != 0xFFFFFFFF;
		}
	} else {
		debug('Parsing as a flat text file:', $file);
		seek($handle, 0, SEEK_SET) or error('Cannot seek in:', $file);
		while (<$handle>) {
			my ($key, $data) = /(.*)\t(.*)/;

			error("Cannot parse $file.")
			    if not (defined($key) and defined($data));
			$skiplist->{$key} = $data;
			debug("Saved flat file record: $key = $data");
		}
	}
	close($handle) or error("Cannot close $file: $!");
	debug('Done reading:', $file);
	return $skiplist;
}

#
# Read all global and folder-specific metadata of a user into the given data
# structure.  Bail out on error.
#
sub c_read_mailbox ($$$$$) {
	my ($meta, $rootdir, $seenfile, $subfile, $quotafile) = @_;
	my (%existing, @subscriptions);
	my $seendata = ($seenfile eq 'cyrus.seen') ? undef :
	    c_read_skiplist($seenfile);

	debug('Reading Cyrus metadata.');
	_c_read_folder($meta, $rootdir, $rootdir, $seendata);

	#
	# Cyrus' subscription skiplist keys are in the form "user.<name>.<box>",
	# where <name> is the user name and <box> the name of the subscribed
	# folder.  We only save the <box> (or "&INBOX" for "user.<name>").
	# Cyrus doesn't clean up subscriptions of folders which no longer exist
	# (as per RFC 3501, 6.3.6.).  So, while at it, we check whether a folder
	# exists before adding it to the list in order to straighten up the
	# subscriptions.  Note that the subscription skiplist values are empty.
	#

	foreach my $folder (keys %{ $meta->{box} }) {
		$folder =~ s/\//./g;
		$existing{$folder} = 1;
	}
	foreach my $folder (keys %{ c_read_skiplist($subfile) }) {
		$folder =~ s/^(?:.*!)?user\.\Q$USER\E$/&INBOX/;
		$folder =~ s/^(?:.*!)?user\.\Q$USER\E\.//;
		push(@subscriptions, $folder) if $existing{$folder};
	}
	$meta->{subscriptions} = \@subscriptions;
	debug('Subscribed folders:', @subscriptions);

	#
	# Save the user's quota limit, either from Cyrus or using the specified
	# default quota.
	#
	if ($quotafile) {
		if ($CONF{cyrus_quota_format} == 1) {
			$meta->{quota} = _c_read_legacy_quota($quotafile);
		} elsif (defined($QUOTADATA->{"user.$USER"})) {
			$meta->{quota} = $QUOTADATA->{"user.$USER"};
			$meta->{quota} =~ s/^\d+\s+(\d+)$/$1/;
			error('Cannot parse quota:', $QUOTADATA->{"user.$USER"})
			    if $meta->{quota} !~ /^\d+$/;
		}
		warning('No quota information available.')
		    if (not $meta->{quota} and not $CONF{default_quota});
	}
	if ($meta->{quota}) {
		$meta->{quota} *= 1024;	# Kilobytes to bytes.
	} else {
		$meta->{quota} = $CONF{default_quota} || 0;
	}
	debug('Quota:', $meta->{quota});
	debug('Done reading Cyrus metadata.');
}

#
# Read and save the folder-specific metadata for the given folder into the given
# data structure.  Recurse into subdirectories.  Bail out on error.
#
sub _c_read_folder ($$$$) {
	my ($meta, $rootdir, $boxpath, $seendata) = @_;
	my ($box, $mailfolder, $index, $seen);

	# Let $box hold the path relative to the root directory (or "&INBOX").
	if ($boxpath eq $rootdir) {
		$box = '&INBOX';
	} else {
		$box = $boxpath;
		$box =~ s/^\Q$rootdir\E\/+//;
	}

	debug('Looking at folder:', $box);
	if (-e "$boxpath/cyrus.header" and -e "$boxpath/cyrus.index") {
		# Collect this folder's metadata.
		my $header = _c_read_header($boxpath);
		$index = _c_read_index($boxpath);
		$header->{folderuid} = _c_make_uid($index->{uidvalidity}, $box)
		    if not $header->{folderuid};
		$seen = defined($seendata) ?
		    _c_parse_seendata($seendata->{$header->{folderuid}}) :
		    _c_parse_seendata(_c_read_old_seen($boxpath));

		# Autovivify "mail" in case this folder contains no e-mails.
		$meta->{box}->{$box}->{mail} = {};
		$meta->{box}->{$box}->{nonrecent} = $seen->{nonrecent};
		$meta->{box}->{$box}->{uidvalidity} = $index->{uidvalidity};
		$meta->{box}->{$box}->{uidnext} = $index->{lastuid} + 1;
		$meta->{box}->{$box}->{keywords} =
		    [ split(/\s+/, $header->{userflags}) ];

		debug('The UIDVALIDITY is:', $index->{uidvalidity});
		debug('The last message UID is:', $index->{lastuid});
		debug('The last non-recent message UID is:', $seen->{nonrecent});
		$mailfolder = 1;
	} else {
		#
		# The folder we're in is not in a Cyrus mailbox.  However, we
		# don't simply return here because this folder might contain
		# other folders which might be Cyrus mailboxes (unless we're in
		# the INBOX, in which case something is going wrong).
		#
		error('No Cyrus INBOX at:', $boxpath) if $box eq '&INBOX';
		debug("Skipping $boxpath as it's not a Cyrus mailbox.");
		$mailfolder = 0;
	}

	opendir(my $handle, $boxpath) or error("Cannot open $boxpath: $!");
	while (my $file = readdir($handle)) {
		next if $file =~ /^\.\.?$/;
		next if $file =~ /^cyrus\.(?:header|index|cache|seen)$/;
		my $path = "$boxpath/$file";

		if (-d $path) {
			# Recurse into subfolders.
			_c_read_folder($meta, $rootdir, $path, $seendata);
		} elsif ($mailfolder and $file =~ /^(\d+)\.$/) {
			my $uid = $1;
			my $attr;

			if ($index->{$uid}) {
				# Save the e-mail's flags and its INTERNALDATE.
				debug("Saving attributes of: $box/$file");
				$attr->{usrflags} = $index->{$uid}->{usrflags};
				$attr->{sysflags} = $index->{$uid}->{sysflags};
				$attr->{internaldate} =
				    $index->{$uid}->{internaldate};
			} else {
				my @statlist = stat($path);

				warning("Index record missing for: $box/$file");
				error('Cannot stat(2) message file:', $path)
				    if not defined($statlist[9]);

				$attr->{usrflags} = 0;
				$attr->{sysflags} = 0;
				$attr->{internaldate} = $statlist[9];
			}
			$attr->{sysflags} |= FLAG_SEEN if _c_seen($uid, $seen);
			$meta->{box}->{$box}->{mail}->{$uid} = $attr;
		} else {
			warning("Skipping $box/$file, dunno what it is.");
		}
	}
	closedir($handle) or error("Cannot close $boxpath: $!");
	debug('Done with folder:', $box);
}

#
# Read a mailbox header file and return a reference to a hash which holds the
# data.  Bail out on error.  See doc/internal/mailbox-format.html for details on
# the format of the mailbox header file.
#
sub _c_read_header ($) {
#
# | This file contains mailbox-wide information that does not change that often.
# | Its format:
# |
# | <Mailbox Header Magic String>
# | <Quota Root>\t<Mailbox Unique ID String>\n
# | <Space-separated list of user flags>\n
# | <Mailbox ACL>\n
#
# [ doc/internal/mailbox-format.html ]
#
	my $boxpath = shift;
	my $file = "$boxpath/cyrus.header";
	my ($header, $buf);

	debug('Reading:', $file);
	open(my $handle, '<', $file) or error("Cannot open $file: $!");

	# Read and check the header magic.
	$buf = xread($file, $handle, MAILBOX_HEADER_MAGIC_SIZE);
	error("Cannot parse $file: Mailbox header magic incorrect")
	    if $buf ne MAILBOX_HEADER_MAGIC;

	# Slurp the rest of the file into memory and close it.
	$buf = slurp($handle);
	close($handle) or error("Cannot close $file: $!");

	# Guess the header file format and save the data into a hash.
	if ($buf =~ /^([^\t\n]*)\t([^\n]+)\n([^\n]*)\n([^\n]*)\n$/) {
		$header->{quotaroot} = $1;
		$header->{folderuid} = $2;
		$header->{userflags} = $3;
		$header->{folderacl} = $4;
	} elsif ($buf =~ /^([^\n]*)\n([^\n]*)\n([^\n]*)\n$/) {
		$header->{quotaroot} = $1;
		$header->{folderuid} = 0;	# No mailbox UID provided.
		$header->{userflags} = $2;
		$header->{folderacl} = $3;
	} else {
		error("Cannot parse $file: Too many or too few fields ");
	}
	$header->{userflags} =~ s/\s+$//;	# Cyrus adds a trailing space.

	debug('Mailbox UID:', $header->{folderuid});
	debug('Quota root:', $header->{quotaroot});	# Unused.
	debug('Mailbox ACL:', $header->{folderacl});	# Unused.
	debug('User-defined keywords:', $header->{userflags});
	debug('Done reading:', $file);
	return $header;
}

#
# Read a mailbox index file and return a reference to a hash which holds the
# interesting data.  Bail out on error.  See doc/internal/mailbox-format.html
# for details on the format of the mailbox index file.  See also:
#
# imap/mailbox.h and imap/mailbox.c:mailbox_read_index_header()
#
sub _c_read_index ($) {
	my $boxpath = shift;
	my $file = "$boxpath/cyrus.index";
	my ($data, $buf, $n);

	debug('Reading:', $file);
	open(my $handle, '<', $file) or error("Cannot open $file: $!");

	# Read and save the interesting header fields.
	seek($handle, 8, SEEK_SET) or error('Cannot seek in:', $file);

	my $version = readint($file, $handle);
	my $headersize = readint($file, $handle);
	my $recordsize = readint($file, $handle);

	debug('Index format version:', $version);

	seek($handle, 8, SEEK_CUR) or error('Cannot seek in:', $file);
	$data->{lastuid} = readint($file, $handle);

	# Skip 4 additional bytes for 64-bit quotas in Cyrus 2.2 and newer.
	seek($handle, ($version < 6) ? 8 : 12, SEEK_CUR)
	    or error('Cannot seek in:', $file);
	$data->{uidvalidity} = readint($file, $handle);

	#
	# As we try to parse future index file formats (i.e., we don't bail out
	# if the $version is unknown), we do at least a dumb consistency check:
	# Cyrus sets the UIDVALIDITY of a folder to its creation date, so let's
	# spit out a warning if its value is < 300000000 (1979-07-05 06:20:00).
	#
	warning("UIDVALIDITY looks wrong in $file:", $data->{uidvalidity})
	    if $data->{uidvalidity} < 300000000;

	seek($handle, $headersize, SEEK_SET) or error('Cannot seek in:', $file);

	# Read and save the interesting fields of all records.
	while ($n = read($handle, $buf, 4)) {
		error("Read $n instead of 4 bytes from $file.") if $n != 4;
		my $uid = unpack('N', $buf);

		debug('Reading index record for message UID:', $uid);

		# Read, save, and "check" (see above) the INTERNALDATE.
		$data->{$uid}->{internaldate} = readint($file, $handle);
		warning("INTERNALDATE of message $uid looks wrong in $file:",
		    $data->{$uid}->{internaldate})
		    if $data->{$uid}->{internaldate} < 300000000;

		#
		# Read and save the system and user flags.  Note that the size
		# of the user flags bitmask is MAX_USER_FLAGS bits.  In all
		# Cyrus versions we checked (1.4, 2.1.18, 2.2.12, and 2.3.12p2),
		# MAX_USER_FLAGS is defined to 128 by default.  However, as we
		# only make use of the first 26 user-defined keywords, we read
		# the first 4 bytes and ignore the rest.
		#
		seek($handle, 24, SEEK_CUR) or error('Cannot seek in:', $file);
		$data->{$uid}->{sysflags} = readint($file, $handle);
		$data->{$uid}->{usrflags} = readint($file, $handle);

		# Skip the rest of the record.
		seek($handle, $recordsize - 40, SEEK_CUR)
		    or error('Cannot seek in:', $file);
	}
	close($handle) or error("Cannot close $file: $!");
	debug('Done reading:', $file);
	return $data;
}

#
# Read a Cyrux 1.x seen file and return the seen state data or undef if the seen
# file is empty.  Bail out on error.
#
sub _c_read_old_seen ($) {
	my $boxpath = shift;
	my $file = "$boxpath/cyrus.seen";

	debug('Reading:', $file);
	open(my $handle, '<', $file) or error("Cannot open $file: $!");
	my @seen = <$handle>;
	close($handle) or error("Cannot close $file: $!");
	chomp(@seen);

	error("Cannot parse $file: File contains multiple lines") if @seen > 1;
	debug('Done reading:', $file);
	return $seen[0];
}

#
# Read a legacy quota file and return the quota limit (specified in kilobytes)
# or undef if the specified file does not exist.  Bail out on error.
#
sub _c_read_legacy_quota ($) {
	my $file = shift;

	if (not -e $file) {
		my $message = "Legacy quota file not found: $file";

		$CONF{quiet} ? debug($message) : info($message);
		return undef;
	}

	debug('Reading:', $file);
	open(my $handle, '<', $file) or error("Cannot open $file: $!");
	my @quota = <$handle>;
	close($handle) or error("Cannot close $file: $!");
	chomp(@quota);

	error("Cannot parse $file: Not in legacy quota format")
	    if (@quota != 2 or $quota[1] !~ /^\d+$/);
	debug('Done reading:', $file);
	return $quota[1];
}

#
# Read a skiplist string and return the string or undef if the string's length
# is zero.  Bail out on error.
#
sub _c_read_skiplist_item ($$) {
	my ($file, $handle) = @_;

	# Read the item size and the actual item.
	my $size = readint($file, $handle);
	my $item = xread($file, $handle, $size);

	# Skip four-byte-alignment padding, if any.
	seek($handle, (($size + 3) & 0xFFFFFFFC) - $size, SEEK_CUR)
	    or error("Cannot seek skiplist (item size: $size).");

	return ($size > 0) ? $item : undef;
}

#
# Parse the seen data for a mailbox and return a reference to a hash which holds
# the seen message UIDs (saved in a format optimized for fast _c_seen() lookups)
# and the last non-recent message UID.  Bail out on error.
#
sub _c_parse_seendata ($) {
#
# The third field of the seen data contains the last non-recent message UID, and
# the fifth field contains a seen UIDs string which is in the following format
# (see doc/internal/database-formats.html for a description of all seen data
# fields):
#
# | /*
# |  * Format of the seenuids string:
# |  *
# |  * no whitespace, n:m indicates an inclusive range (n to m), otherwise
# |  * list is comma separated of single messages, e.g.:
# |  *
# |  * 1:16239,16241:17015,17019:17096,17098,17100
# |  */
#
# [ imap/index.c ]
#
# See also: imap/seen_db.c:seen_readit()
#
	my $seendata = shift;
	my $seen;

	$seen->{ranges} = [];
	$seen->{nonrecent} = 0;

	if (defined($seendata)) {
		my @fields = split(/\s+/, $seendata, 5);

		error('Cannot parse seen data:', $seendata) if @fields != 5;
		debug('Parsing seen data:', $seendata);

		$fields[4] =~ s/\s//g;	# Cyrus sometimes adds a trailing tab.
		$fields[2] =~ s/\s//g;	# Just to make sure.

		foreach my $uid (split(/,/, $fields[4])) {
			debug('Parsing seen UID(s):', $uid);
			if ($uid =~ /^\d+$/) {
				$seen->{$uid} = 1;
			} elsif ($uid =~ /^(\d+):(\d+)$/) {
				my ($n, $m) = ($2 > $1) ? ($1, $2) : ($2, $1);
				push(@{ $seen->{ranges} },
				    { min => $n, max => $m });
			} else {
				error('Cannot parse seen UID(s):', $uid);
			}
		}
		error('Cannot parse last non-recent UID:', $fields[2])
		    if $fields[2] !~ /^\d+$/;
		$seen->{nonrecent} = $fields[2];
	}
	return $seen;
}

#
# Return true if the message with the given UID is seen, false otherwise.
#
sub _c_seen ($$) {
	my ($uid, $seen) = @_;

	return 1 if $seen->{$uid};
	foreach my $range (@{ $seen->{ranges} }) {
		return 1 if ($uid >= $range->{min} and $uid <= $range->{max});
	}
	return 0;
}

#
# Calculate and return Cyrus' internal mailbox UID.  See also:
#
# imap/mailbox.c:mailbox_make_uniqueid()
#
sub _c_make_uid ($$) {
	my ($uidvalidity, $box) = @_;
	my $hash = 0;

	if ($box eq '&INBOX') {
		$box = "user.$USER";
	} else {
		$box =~ s/\//./g;
		$box = "user.$USER.$box";
	}

	foreach my $character (split(//, $box)) {
		$hash *= 251;
		$hash += ord($character);
		$hash %= 2147484043;
	}

	my $uid = sprintf('%08lx%08lx', $hash, $uidvalidity);
	debug("Calculated mailbox UID for $box: $uid");
	return $uid;
}

# ----- Dovecot subroutines. ---------------------------------------------------

sub _d_convert_mail ($$);
sub _d_make_filename ($$$$);
sub _d_make_maildir ($);
sub _d_touch_maildirfolder ($);
sub _d_write_maildirsize ($$);
sub _d_write_subscriptions ($$);
sub _d_write_keywords ($$);

#
# Create the Maildir++ folder including all subfolders, write the metadata, and
# convert the actual e-mails.
#
sub d_write_mailbox ($$$) {
	my ($meta, $c_rootdir, $d_rootdir) = @_;
	my %folders;

	#
	# The Maildir++ filenames we create include a random number.  We call
	# srand(3) in order to re-create the same filenames if the conversion
	# is repeated for some reason.
	#
	srand($meta->{box}->{'&INBOX'}->{uidvalidity});

	debug('Writing Dovecot folders.');
	foreach my $c_box (keys %{ $meta->{box} }) {
		my ($c_boxpath, $d_boxpath, $d_box);

		if ($c_box eq '&INBOX') {
			$c_boxpath = $c_rootdir;
			$d_box = 'INBOX';
		} else {
			$c_boxpath = "$c_rootdir/$c_box";
			$d_box = $c_box;
			$d_box =~ s/\//./g;
			$d_box = ".$d_box";
		}

		# Edit the Maildir++ folder name if desired.
		foreach my $operation (@{ $CONF{edit_foldernames} }) {
			debug("Editing folder name $d_box: $operation");
			my $r = eval("\$d_box =~ $operation");
			error("Cannot evaluate $operation: $@") if $@;
			error("Cannot evaluate $operation.") if not defined($r);
			debug('New folder name:', $d_box);
		}

		# Maildir/.INBOX cannot be accessed via Dovecot.
		warning("Non-INBOX folder named $d_box not accessible, try:",
		    '--edit-foldernames \'s/^\.(INBOX)$/.$1-moved/i\'')
		    if uc($d_box) eq '.INBOX';

		# Now, we know the final Maildir++ pathname.
		$d_boxpath = ($d_box eq 'INBOX') ?
		    $d_rootdir : "$d_rootdir/$d_box";

		# Create an empty Maildir.
		_d_make_maildir($d_boxpath);
		_d_touch_maildirfolder($d_boxpath) unless $d_box eq 'INBOX';

		my $box = $meta->{box}->{$c_box};
		my $uidfile = "$d_boxpath/dovecot-uidlist";

		# Open the dovecot-uidlist file.
		open(my $uidfh, '>', $uidfile)
		    or error("Cannot open $uidfile: $!");

		# Write the dovecot-uidlist header line.
		if ($CONF{dovecot_uidlist_format} == 1) {
			print $uidfh "1 $box->{uidvalidity} $box->{uidnext}\n"
			    or error("Cannot write $uidfile: $!");
		} elsif ($CONF{dovecot_uidlist_format} == 3) {
			print $uidfh "3 V$box->{uidvalidity} N$box->{uidnext}\n"
			    or error("Cannot write $uidfile: $!");
		} else {
			error('Unknown dovecot-uidlist format:',
			    $CONF{dovecot_uidlist_format});
		}

		# Handle all e-mails in this folder.
		foreach my $uid (sort {$a <=> $b} keys %{ $box->{mail} }) {
			my $c_mailpath = "$c_boxpath/$uid.";
			my $d_temppath = "$d_boxpath/tmp/$MYSELF.$$.$MAILS";

			# Convert the e-mail.
			_d_convert_mail($c_mailpath, $d_temppath);

			#
			# Create the Maildir++ e-mail filename.  Include the
			# size fields used by Dovecot:
			#
			# | ,S=<size>: <size> contains the file size.  Getting
			# | the size from the filename avoids doing a stat(),
			# | which may improve the performance.  This is
			# | especially useful with Maildir++ quota.
			# |
			# | ,W=<vsize>: <vsize> contains the file's RFC822.SIZE,
			# | i.e. the file size with linefeeds being CR+LF
			# | characters.  If the message was stored with CR+LF
			# | linefeeds, <size> and <vsize> are the same.  Setting
			# | this may give a small speedup because now Dovecot
			# | doesn't need to calculate the size itself.
			#
			# [ http://wiki.dovecot.org/MailboxFormat/Maildir ]
			#
			my @c_stat = stat($c_mailpath);
			my @d_stat = stat($d_temppath);

			error('Cannot stat(2) message file:', $c_mailpath)
			    if not defined($c_stat[7]);
			error('Cannot stat(2) message file:', $d_temppath)
			    if not defined($d_stat[7]);

			my $attr = $box->{mail}->{$uid};
			my $size = $d_stat[7];	# File size with LF.
			my $vsize = $c_stat[7];	# File size with CR+LF.
			my $subdir = ($uid > $box->{nonrecent}) ? 'new' : 'cur';
			my $d_mail = _d_make_filename($attr, $size, $vsize,
			    $#{ $box->{keywords} });
			my $d_mailpath = "$d_boxpath/$subdir/$d_mail";

			rename($d_temppath, $d_mailpath)
			    or error("Cannot move $d_temppath to: $d_mailpath");

			# Set the e-mail's last access and modification times.
			utime($attr->{internaldate}, $attr->{internaldate},
			    $d_mailpath);

			# Add the e-mail to the dovecot-uidlist.
			if ($CONF{dovecot_uidlist_format} == 1) {
				print $uidfh "$uid $d_mail\n"
				    or error("Cannot write $uidfile: $!");
			} elsif ($CONF{dovecot_uidlist_format} == 3) {
				print $uidfh "$uid :$d_mail\n"
				    or error("Cannot write $uidfile: $!");
			} else {
				error('Unknown dovecot-uidlist format:',
				    $CONF{dovecot_uidlist_format});
			}
			$SIZE += $size;
			$MAILS++;
		}
		close($uidfh) or error("Cannot close $uidfile: $!");
		_d_write_keywords($box->{keywords}, $d_boxpath);
		$folders{$d_box}++;
		$FOLDERS++;
	}
	_d_write_subscriptions($meta->{subscriptions}, $d_rootdir);
	_d_write_maildirsize($d_rootdir, $meta->{quota}) if $meta->{quota};

	foreach my $folder (keys %folders) {
		error("Folder name not unique after editing: $folder")
		    if $folders{$folder} > 1;
	}
	debug('Done writing Dovecot folders.');
}

#
# Copy the given e-mail file, and replace any CR+LF with a plain LF unless
# $CONF{dovecot_crlf} is true.
#
sub _d_convert_mail ($$) {
	my ($ifile, $ofile) = @_;
        my $ofh;

	open(my $ifh, '<', $ifile) or error("Cannot open $ifile: $!");
        if ($CONF{dovecot_zlib}) {
		$ofh = IO::Zlib->new("${ofile}.gz", "wb6");
        }
        else {
		open($ofh, '>', $ofile) or error("Cannot open $ofile: $!");
        };
	while (<$ifh>) {
		s/\r+\n/\n/ unless $CONF{dovecot_crlf};
		print $ofh $_ or error("Cannot write $ofile: $!");
	}
	close($ifh) or error("Cannot close $ifile: $!");
	close($ofh) or error("Cannot close $ofile: $!");
}

#
# Create and return a Maildir++ e-mail filename including the flags and the size
# fields used by Dovecot.
#
sub _d_make_filename ($$$$) {
	my ($attr, $size, $vsize, $maxkeyword) = @_;
	my @alphabet = ('a' .. 'z');
	my $filename = sprintf('%u.R%08xQ%u.%s,S=%u,W=%u:2,',
	    $attr->{internaldate}, int(rand(UINT32_MAX)), $MAILS + 1,
	    $CONF{dovecot_host}, $size, $vsize);

	$filename .= 'S' if $attr->{sysflags} & FLAG_SEEN;
	$filename .= 'R' if $attr->{sysflags} & FLAG_ANSWERED;
	$filename .= 'F' if $attr->{sysflags} & FLAG_FLAGGED;
	$filename .= 'T' if $attr->{sysflags} & FLAG_DELETED;
	$filename .= 'D' if $attr->{sysflags} & FLAG_DRAFT;

	$maxkeyword = $#alphabet if $maxkeyword > $#alphabet;
	for (my $i = 0; $i <= $maxkeyword; $i++) {
		$filename .= $alphabet[$i] if $attr->{usrflags} & (1 << $i);
	}

	debug('Created new Maildir++ filename:', $filename);
	return $filename;
}

#
# Create the specified Maildir, including the "new, "cur", and "tmp" Maildir
# subdirectories as well as parent directories as needed.  Bail out on error.
#
sub _d_make_maildir ($) {
	my $maildir = shift;
	my $new = "$maildir/new";
	my $cur = "$maildir/cur";
	my $tmp = "$maildir/tmp";

	debug('Creating Maildir:', $maildir);
	makedir($_) for $maildir, $new, $cur, $tmp;
}

#
# Create an empty "maildirfolder" file within the given Maildir.  Bail out on
# error.
#
sub _d_touch_maildirfolder ($) {
	my $maildir = shift;
	my $file = "$maildir/maildirfolder";

	open(my $handle, '>', $file) or error("Cannot touch $file: $!");
	close($handle) or error("Cannot close $file: $!");
	debug('Touched:', $file);
}

#
# Write the maildirsize file.  Bail out on error.  See also:
#
# http://www.inter7.com/courierimap/README.maildirquota.html
#
sub _d_write_maildirsize ($$) {
	my ($rootdir, $quota) = @_;
	my $file = "$rootdir/maildirsize";

	debug('Writing:', $file);
	open(my $handle, '>', $file) or error("Cannot open $file: $!");
	print $handle $quota . "S\n";
	print $handle $SIZE . " $MAILS\n";
	close($handle) or error("Cannot close $file: $!");
	debug('Done writing:', $file);
}

#
# Write the subscriptions file.  Bail out on error.
#
sub _d_write_subscriptions ($$) {
	my ($subscriptions, $rootdir) = @_;
	my $file = "$rootdir/subscriptions";

	debug('Writing:', $file);
	open(my $handle, '>', $file) or error("Cannot open $file: $!");
	foreach my $subscription (@$subscriptions) {
		# Add a leading dot for "--edit-foldernames".
		$subscription = ($subscription eq '&INBOX') ?
		    'INBOX' : ".$subscription";
		# Edit the subscribed folder name if desired.
		foreach my $operation (@{ $CONF{edit_foldernames} }) {
			debug("Editing subscription $subscription: $operation");
			my $r = eval("\$subscription =~ $operation");
			error("Cannot evaluate $operation: $@") if $@;
			error("Cannot evaluate $operation.") if not defined($r);
			debug('New subscription:', $subscription);
		}
		# Remove the leading dot we added (if there still is one).
		$subscription =~ s/^\.//;
		print $handle "$subscription\n"
		    or error("Cannot write $file: $!");
		debug('Subscribed:', $subscription);
	}
	close($handle) or error("Cannot close $file: $!");
	debug('Done writing:', $file);
}

#
# Write the dovecot-keywords file.  Bail out on error.
#
sub _d_write_keywords ($$) {
	my ($keywords, $boxpath) = @_;
	my $file = "$boxpath/dovecot-keywords";

	return if @$keywords == 0;	# No keywords defined.

	debug('Writing:', $file);
	open(my $handle, '>', $file) or error("Cannot open $file: $!");
	for (my $i = 0; $i <= $#{ $keywords }; $i++) {
		print $handle "$i $keywords->[$i]\n"
		    or error("Cannot write $file: $!");
		debug('Added keyword:', $keywords->[$i]);
	}
	close($handle) or error("Cannot close $file: $!");
	debug('Done writing:', $file);
}

__END__

=head1 NAME

cyrus2dovecot - convert Cyrus folders to Dovecot

=head1 SYNOPSIS

B<cyrus2dovecot>
[B<-bcdmvq>[B<q>]]
S<[B<-A> I<cyrus-database-dir>]>
S<[B<-C> I<cyrus-inbox>]>
S<[B<-Q> I<cyrus-quota>]>
S<[B<-B> I<cyrus-quota-dir>]>
S<[B<-O> I<cyrus-quota-format>]>
S<[B<-S> I<cyrus-seen>]>
S<[B<-U> I<cyrus-sub>]>
S<[B<-H> I<dovecot-host>]>
S<[B<-D> I<dovecot-inbox>]>
S<[B<-F> I<dovecot-uidlist-format>]>
S<[B<-z> I<dovecot-zlib>]>
S<[B<-N> I<default-quota>]>
S<[B<-E> I<edit-foldernames>]>
[I<user> ...]

B<cyrus2dovecot>
B<-h> E<verbar> B<-v>

=head1 DESCRIPTION

B<cyrus2dovecot> converts the e-mails of one or more I<user>s from Cyrus
format to Dovecot Maildir++ folders.  If no I<user> is specified, the
I<user> names are read from the standard input, one per line.  Message
C<UID>s, C<INTERNALDATE>s, IMAP folder subscriptions, the C<UIDVALIDITY>
and C<UIDNEXT> values for each folder, as well as all IMAP flags
(including the first 26 user-defined keywords) are preserved during the
conversion.  The generated e-mail filenames include the Maildir++
extensions C<S=E<lt>sizeE<gt>> and C<W=E<lt>vsizeE<gt>> (which are used
by Dovecot for better performance).  Optionally, Maildir++
F<maildirsize> files are created.

=head1 OPTIONS

Within the specified I<PATH>s, any occurrence of C<%u> will be replaced
by the current I<user> name, any occurrence of C<%I<n>u> will be
replaced by the I<n>'th character of that I<user> name, any occurrence
of C<%h> will be replaced by Cyrus' directory "hash" character for that
I<user> name (i.e., C<%h> is equivalent to C<%1u> if the first character
of the I<user> name is a lowercase letter), and any occurrence of C<%x>
will be replaced by Cyrus' "fulldirhash" character for that I<user>
name.  However, within the specified B<--cyrus-quota> I<PATH> (if any),
these replacements will only be done if the B<--cyrus-quota-format>
I<VERSION> is set to C<1>.

The default settings can be found (and modified) at the top of the
B<cyrus2dovecot> script.

=over 8

=item B<-A>, B<--cyrus-database-dir=>I<PATH>

Set B<--cyrus-seen=>I<PATH>F</%u.seen> and
B<--cyrus-sub=>I<PATH>F</%u.sub>.  The latter options override this one.

=item B<-C>, B<--cyrus-inbox=>I<PATH>

Use this I<PATH> to the I<user>'s INBOX folder in Cyrus.

=item B<-Q>, B<--cyrus-quota=>I<PATH>

Use this I<PATH> to the quota database file in Cyrus, and create a
Maildir++ F<maildirsize> file for each I<user> whose quota limit is
found in that file.

=item B<-B>, B<--cyrus-quota-dir=>I<PATH>

Set B<--cyrus-quota=>I<PATH>F</user.%u> and
B<--cyrus-quota-format=>C<1>.  The latter options override this one.

=item B<-O>, B<--cyrus-quota-format=>I<VERSION>

Expect Cyrus' quota database(s) to be present in this format I<VERSION>,
where I<VERSION> C<1> denotes the "quotalegacy" format and I<VERSION>
C<2> denotes the "skiplist" or the "flat" text format (which of those
two formats is used will be autodetected if I<VERSION> C<2> is
specified).  This option is ignored if B<--cyrus-quota> is not
specified.

=item B<-S>, B<--cyrus-seen=>I<PATH>

Use this I<PATH> to the I<user>'s seen database file in Cyrus.  If an
"old-style" F<cyrus.seen> file exists within each Cyrus folder (as opposed to a
central F<cyrus.seen> database for all folders of a I<user>), F<cyrus.seen>
must be specified (literally) as the I<PATH>.

=item B<-U>, B<--cyrus-sub=>I<PATH>

Use this I<PATH> to the I<user>'s subscription database file in
Cyrus.

=item B<-c>, B<--dovecot-crlf>

Store e-mails with C<CR+LF> instead of plain C<LF>.  This flag should be
specified if the C<mail_save_crlf> option is set to C<yes> in the
Dovecot configuration.

=item B<-H>, B<--dovecot-host=>I<NAME>

Use this host I<NAME> for the Maildir++ e-mail file's basename.

=item B<-D>, B<--dovecot-inbox=>I<PATH>

Use this I<PATH> to the I<user>'s INBOX folder in Dovecot.

=item B<-F>, B<--dovecot-uidlist-format=>I<VERSION>

Create the F<dovecot-uidlist> files using this format I<VERSION>.  For
Dovecot releases older than 1.0.2, I<VERSION> C<1> must be specified;
otherwise, I<VERSION> C<3> can be used.

=item B<-z>, B<--dovecot-zlib>

Store e-mails zlib compressed instead of plain.

=item B<-E>, B<--edit-foldernames=>I<SUBSTITUTION>

Apply the specified I<SUBSTITUTION> to the name of each Maildir++ folder
and subscription using Perl code such as
S<C<eval('$name=~'.$substitution)>>, where $name holds either the string
"INBOX" (which denotes the main Maildir) or the full Maildir++ folder
name (e.g., F<.sub.folder>), and $substitution holds the specified
I<SUBSTITUTION>.  The resulting $name will be used as the Maildir++
folder's name.  This option may be specified multiple times, in which
case each of the I<SUBSTITUTION>s will be applied to each Maildir++
folder name in the order specified on the command line.  Note that while
Dovecot stores the subscribed folder names without the leading "." of
Maildir++ subfolders, B<cyrus2dovecot> adds a leading "." to each
subscribed subfolder name before applying the specified
I<SUBSTITUTION>(s) and removes it afterwards (if it still exists) in
order to simplify the matching.

=item B<-N>, B<--default-quota=>I<BYTES>

Create a Maildir++ F<maildirsize> file for each I<user>, and set the
quota limit to the specified number of I<BYTES> unless B<--cyrus-quota>
is also specified, in which case a I<user>-specific quota would override
the B<--default-quota> limit.  Specifying C<0> I<BYTES> disables the
creation of F<maildirsize> files unless B<--cyrus-quota> is also
specified.

=item B<-m>, B<--dump-meta>

Print a dump of the data structure which holds the metadata gathered
from scanning the Cyrus folders of a user to the standard output.

=item B<-d>, B<--debug>

Print information which is usually only useful for debugging to the
standard output.

=item B<-q>, B<--quiet>

Suppress any messages usually printed to the standard output (namely
regarding nonexistent Cyrus database files) except for the line
generated for each I<user> whose e-mails were successfully converted.
If this option is specified twice, the latter line will also be omitted.
In either case, warning and error messages, if any, will still be
printed to the standard error output.

=item B<-b>, B<--bail>

Bail out immediately on any warnings and errors.  If this flag is not
specified and a warning occurs, B<cyrus2dovecot> prints a message to the
standard error output and then tries to continue the conversion.  If
this flag is not specified and a non-fatal error occurs,
B<cyrus2dovecot> prints a message to the standard error output and then
tries to convert the e-mails of the remaining I<user>s (if any).  (Note
that B<cyrus2dovecot> always bails out immediately on warnings generated
by the Perl interpreter, as they indicate B<cyrus2dovecot> bugs.)

=item B<-h>, B<--help>

Print usage information to the standard output and exit.

=item B<-v>, B<--version>

Print version information to the standard output and exit.

=back

=head1 RETURN VALUE

B<cyrus2dovecot> exits 0 on success, and E<gt>0 if one or more errors
occur during the conversion of the e-mails of one or more I<user>s.

=head1 EXAMPLES

Given that the path to the INBOX folders in Cyrus is
F</var/spool/imap/user/%u> (where C<%u> denotes the I<user> name), that
Cyrus stores the seen and subscription databases within the directory
F</var/imap/user/%h>, and that Cyrus stores "quotalegacy" files within
the directory F</var/imap/quota/%h> (where C<%h> denotes Cyrus'
directory "hash" character for that I<user> name, respectively), the
following command would convert all e-mails of the I<user>s "bill" and
"george" from Cyrus to Dovecot, and the result would be stored below
F</tmp/dovecot> (including F<maildirsize> files for both users if their
quota limits are found):

	cyrus2dovecot --cyrus-database-dir /var/imap/user/%h    \
	              --cyrus-quota-dir /var/imap/quota/%h      \
	              --cyrus-inbox /var/spool/imap/user/%u     \
	              --dovecot-inbox /tmp/dovecot/%u/Maildir   \
	              bill george

Given that the default settings specified at the top of the
B<cyrus2dovecot> script are correct, and that F</tmp/users> holds the
names of all I<user>s whose e-mails should be converted (one per line),
the following command would convert all e-mails of those I<user>s:

	cyrus2dovecot < /tmp/users

Given that the default settings specified at the top of the
B<cyrus2dovecot> script are correct, a command such as the following
could be used in order to convert all e-mails of all I<user>s (of
course, the path to the INBOX folders in Cyrus may have to be adjusted;
e.g., if the C<hashimapspool> option is not enabled in the Cyrus
configuration, F</?> must be removed from the path):

	find /var/spool/imap/user/?/. \! -name . -prune \
	     -exec basename \{\} \; | cyrus2dovecot

Cyrus transparently replaces any "." character in folder names with a
"^" character.  Dovecot supports "." characters in Maildir++ folder
names if the "listescape" plugin is used, which replaces any "."
character in folder names with the string "\2e".  The following argument
could be added to the B<cyrus2dovecot> command line in order to replace
any "^" character in Cyrus folder names with "\2e" for the Maildir++
folder name:

	--edit-foldernames 's/\^/\\2e/g'

Dovecot 1.1 and newer support using folders such as
F<Maildir/sub/folder> (as opposed to F<Maildir/.sub.folder>) if
C<:LAYOUT=fs> was added to the C<mail_location> in the Dovecot
configuration.  The following B<cyrus2dovecot> arguments could be
specified in order to create such folders by removing the leading dot
from Maildir++ subfolder names and then substituting any following dots
with slashes:

	--edit-foldernames 's/^\.//'    \
	--edit-foldernames 's/\./\//g'

If the seen states, subscriptions, or quotas are stored in Berkeley
databases, they must first be converted to one of the formats supported
by B<cyrus2dovecot> using a command such as the following:

	cvt_cyrusdb /var/imap/user/b/bill.seen berkeley \
	            /tmp/imap/user/b/bill.seen skiplist

=head1 CAVEATS

B<cyrus2dovecot> assumes that the I<user> has no e-mails in Dovecot yet
and that neither his Cyrus folders nor his Dovecot folders will be
accessed by another process during the conversion.

If C<%I<n>u> is specified within any I<PATH> on the command line, all
I<user> names must have a length of at least I<n> characters.
Otherwise, B<cyrus2dovecot> will die with an exception.

If folder name substitutions are specified via B<--edit-foldernames>,
the resulting Maildir++ folder names must be unique.

=head1 RESTRICTIONS

Cyrus' seen and subscription databases must be present either in the
"skiplist" format or in the "flat" text format, and Cyrus' quota
database(s) (if any) must be present either in one of those formats or
in the "quotalegacy" format, as B<cyrus2dovecot> doesn't support
Berkeley databases.  However, Berkeley databases can be converted to one
of the supported formats using cvt_cyrusdb(8), see the L</EXAMPLES>.

In F<maildirsize> files created by B<cyrus2dovecot>, no limit for the
number of messages is specified (as such a limit does not seem useful).

Cyrus' ACL settings are not converted.

=head1 COMPATIBILITY

B<cyrus2dovecot> is supposed to work with all Cyrus releases up to (at
least) version 2.3.x.  So far, it has been tested with Cyrus 1.4,
2.1.18, 2.2.12, and 2.3.12p2.

=head1 SEE ALSO

Other tools for converting e-mails from Cyrus to Dovecot can be found at
L<http://wiki.dovecot.org/Migration/Cyrus>.

=head1 AUTHOR

Written by Holger WeiE<szlig> E<lt>holger@ZEDAT.FU-Berlin.DEE<gt> at
Freie UniversitE<auml>t Berlin, Germany, Zentraleinrichtung fE<uuml>r
Datenverarbeitung (ZEDAT).

=head1 COPYRIGHT AND LICENSE

Copyright (c) 2008 Freie UniversitE<auml>t Berlin.
All rights reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.  See L<perlartistic>.  This program
is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.

=head1 HISTORY

	$Log: cyrus2dovecot,v $
	Revision 1.3  2008/10/05 17:45:06  holger
	If a seen state, subscription, or quota database is not found, a
	message will now be printed.  As this does not necessarily
	indicate a problem, such messages can be suppressed by
	specifying the "--quiet" option.  Apart from that, the
	"--cyrus-database-dir" and "--cyrus-quota-dir" options (which
	are merely convenience aliases) have been added, and various
	minor enhancements have been applied.

	Revision 1.2  2008/09/24 09:52:33  holger
	Message seen states are now parsed more efficiently with regard
	to performance and memory usage.  Apart from that, minor code
	cleanups have been applied.

	Revision 1.1  2008/09/22 08:36:44  holger
	Initial release.
