#!/usr/local/bin/perl -w

print "lfile translates between";
print " LAMARC and PHYLIP input files.\n\n";

print "Problem: characters in files are assumed to take 1 byte\n";
print "due to lack of a real strlen function.\n";

print "\nTranslate from: ([P]HYLIP or [L]AMARC)\n";
$sourcetype = lc(<>);
chomp($sourcetype);

print "\nTranslate to: ([P]HYLIP or [L]AMARC)\n";
$targettype = lc(<>);
chomp($targettype);

print "Starting file is called: ";
$infilename = <>;

open(SOURCE,"$infilename");

if ($sourcetype =~ /^l|^lamarc/ && $targettype =~ /^p|^phylip/) {
   @lines = <SOURCE>;
   close(SOURCE);
   $firstline = shift(@lines);
# this while loop removes "space" characters.
   while ($firstline =~ /^\s/) {
      $firstline =~ s/^.//;
   }
   if ($firstline =~ /^s|^n/) {
      $firstline =~ s/^.//;
      $dnaseq = 1;
      print "\n\nIs the data [I]nterleaved or [S]equential? ";
      $interleaved = lc(<>);
      chomp($interleaved);
      $interleaved = ($interleaved =~ /^i|^interleave/);
   }
   while ($firstline =~ /^\s/) {
      $firstline =~ s/^.//;
   }
   $numpop = substr($firstline,0,1);
   while ($firstline =~ /^\s/) {
      $firstline =~ s/^.//;
   }
   $numloci = substr($firstline,0,1);
# if dna bases are datatype then read number of bases present per locus.
   $secondline = shift(@lines);
   if ($dnaseq) {
      LOCUSLOOP: for($locus = 0; $locus < $numloci; $locus++) {
         while ($secondline =~ /^\s/) {
            if ($locus == 1 && $secondline =~ /^\n/) {
               for(; $locus < $numloci; $locus++) {
                  $numbases[$locus] = $numbases[0];
               }
               last LOCUSLOOP;
            }
            $secondline =~ s/^.//;
         }
         while ($secondline =~ /^\d/) {
            $numbases[$locus] .= substr($secondline,0,1);
            $secondline =~ s/^.//;
         }
      }
   }
   for($pop = 0; $pop < $numpop; $pop++) {
      $popout = $pop + 1;
      open(TARGET,">$targettype" . "_infile_pop$popout");
      for($locus = 0; $locus < $numloci; $locus++) {
         $dataline = shift(@lines);
         while ($dataline =~ /^\s/) {
            $dataline =~ s/^.//;
         }
         $numseq[$locus] = $dataline;
         $numseq[$locus] =~ s/\n$//;
         print TARGET "$numseq[$locus] $numbases[$locus]\n";
         if ($interleaved) {
            while($basecount < $numbases[$locus]) {
               for($seq = 0; $seq < $numseq[$locus]; $seq++) {
                  $dataline = shift(@lines);
                  print TARGET "$dataline";
               }
               if ($basecount == 0) {
                  $basecount -= 10;
               }
               $basecount += length($dataline)-1;
            }
         } else {
            for($seq = 0; $seq < $numseq[$locus]; $seq++) {
               $basecount = 0;
               while($basecount < $numbases[$locus]) {
                  $dataline = shift(@lines);
                  print TARGET "$dataline";
                  if ($basecount == 0) {
                     $basecount -= 10;
                  }
                  $basecount += length($dataline)-1;
               }
            }
         }
      }
      close(TARGET);
   }
   print "DONE\n";
   exit;
}

if ($targettype =~ /^l|^lamarc/ && $sourcetype =~ /^p|^phylip/) {
   @lines = <SOURCE>;
   close(SOURCE);
   print "How many loci are present in the data: ";
   $numloci = <>;
   $numloci =~ s/\n$//;
   print "\n\nIs the data [I]nterleaved or [S]equential? ";
   $interleaved = lc(<>);
   chomp($interleaved);
   $interleaved = ($interleaved =~ /^i|^interleave/);
   open(TARGET,">$targettype" . "_infile");
   print TARGET "s 1 $numloci originally_a_phylip_infile\n";
# read in the data
   for($locus = 0; $locus < $numloci; $locus++) {
      $dataline = shift(@lines);
      while ($dataline =~ /^\s/) {
         $dataline =~ s/^.//;
      }
      ($numseq[$locus],$numbases[$locus]) = split(" ",$dataline);
      for($seq = 0; $seq < $numseq[$locus]; $seq++) {
         $data[$locus][$seq] = "";
      }
      if ($interleaved) {
         while($basecount < $numbases[$locus]) {
            for($seq = 0; $seq < $numseq[$locus]; $seq++) {
               $dataline = shift(@lines);
               $data[$locus][$seq] .= $dataline;
            }
            if ($basecount == 0) {
               $basecount -= 10;
            }
            $basecount += length($dataline)-1;
         }
      } else {
         for($seq = 0; $seq < $numseq[$locus]; $seq++) {
            $basecount = 0;
            while($basecount < $numbases[$locus]) {
               $dataline = shift(@lines);
               $data[$locus][$seq] .= $dataline;
               if ($basecount == 0) {
                  $basecount -= 10;
               }
               $basecount += length($dataline)-1;
            }
         }
      }
   }
# now print the data back out
   for($locus = 0; $locus < $numloci; $locus++) {
      print TARGET "$numbases[$locus] ";
   }
   print TARGET "\n";
   for($locus = 0; $locus < $numloci; $locus++) {
      print TARGET "$numseq[$locus]\n";
      for($seq = 0; $seq < $numseq[$locus]; $seq++) {
         print TARGET "$data[$locus][$seq]";
      }
   }
   close(TARGET);
   print "The new infile is now of type, sequential\n";
   print "DONE\n";
   exit;
}

print "Could not translate between $sourcetype and $targettype.\n";
