#!/usr/bin/perl #perl script to reformat the output of phase 2.1 into the imput format for #treemap. #Usage is PH2TL.pl #where$ $ designates the name of the output file that is #generated by PHASE, stands for the name of the _recom file #generated by phase, $$ is the name of the input file for #TreeLD and is a text file that contains the phenotype #information,comprised of one phenotype per line in the same order as #the input file for PHASE. # Version2 also reads in the output of the -recomb file and adjusts # the map accordingly use strict; { if (@ARGV<3 || @ARGV >4) { usage(); } my$pofn=0; my$rfn=1; my$phn=2; my$tln=3; if (@ARGV==3) # no file to adjust recombination rates { $phn=1; $tln=2; } print "\nphenotype file \t\t$ARGV[$phn]\n"; open(DAT,$ARGV[$phn])or die "Can't open $ARGV[2]\n"; my@ph=; close(DAT); chomp @ph; my@pht; for (@ph) { if ($_=~/^(\d+\.\d)/) { push @pht,$1; } elsif ($_=~/^(\d+)/) { push @pht, "$1.0"; } } my$i; my$nol; my@out; #map info if (@ARGV==4) { print "genetic distances\t$ARGV[$rfn]\n"; open(DAT,$ARGV[$rfn]) or die "can't open $ARGV[$rfn]\n"; my@map=; close (DAT); chomp @map; my@me=split /\s+/,@map[0]; $nol=@me; my@dists; for ($i=1;$i<$nol;++$i) { my@ests; my$j; for ($j=1;$j<@map;++$j) { my@line=split/\s+/,@map[$j]; push @ests,$line[$i]; } $dists[$i]=int(median(@ests)*($me[$i]-$me[$i-1])); } for ($i=1;$i<$nol;++$i) { $me[$i]=$me[$i-1]+$dists[$i]; } $out[0]="P"; for (@me) { $out[0]=$out[0]." $_"; } } print "PHASE output file\t$ARGV[$pofn]\n\n"; open(DAT,$ARGV[$pofn]) or die "Can't open $ARGV[$pofn]\n"; my@in=; close(DAT); chomp @in; my$noi; $i=0; while ($in[$i] ne "END INPUT_SUMMARY") { if ($in[$i]=~/^Number of Individuals:\s+(\d+)/) { $noi=$1; if ($noi!=@pht) { $i=@pht; print"Error: different number of individuals in $ARGV[$pofn] and $ARGV[$phn] ($i!=$noi.\n"; print "Maybe you have extra linebreaks in $ARGV[$phn]\n"; exit(8); } } if (@ARGV==4) { if ($in[$i]=~/^Number of Loci:\s+(\d+)/) { if ($nol!=$1) { print"Error: different number of markers in $ARGV[$pofn] ($1) than in $ARGV[$rfn] ($nol)\n"; exit(8); } } } else { if ($in[$i]=~/^Number of Loci:\s+(\d+)/) { $nol=$1; } if ($in[$i]=~/^Positions of loci: (.+)/) { push @out, makemap($1); } } ++$i; if($i>@in){print"File Format of $ARGV[$pofn] wrong\n";exit(8);} } while ($in[$i] ne "BEGIN BESTPAIRS1") { ++$i; if($i>@in){print"File Format of $ARGV[$pofn] wrong\n";exit(8);} } my$k; my$j; for ($k=0;$k<$noi;++$k) { ++$i; $in[$i]=~/^(\d)/; push @out,"$pht[$j] 2"; ++$j; ++$i; push @out,getseq($in[$i]); ++$i; push @out,getseq($in[$i]); } open(DAT,">$ARGV[$tln]"); print DAT"#Converted file, generated by PH2TM.pl\n"; print DAT"#original filename $ARGV[$pofn], "; if (@ARGV==4) { print DAT"map information $ARGV[$rfn]\n"; } else { print DAT"no finescale recombination map used\n"; } print DAT"# phenotype information in $ARGV[$phn]\n"; print DAT"#$noi individuals $nol markers\n"; for (@out) { print DAT"$_\n"; } close(DAT); print "$ARGV[$tln] generated\n\n"; } ######################################################## sub makemap { my@map=split " ",$_[0]; my$out="P"; for (@map) { my$in=int($_); $out="$out$in "; } return($out); } ######################################################## sub getseq { my@seq=split " ",$_[0]; my$out; for (@seq) { if ($_=~/(\d)/) { $out.=$1; } } return $out; } ############################ sub median { my$ret; my@ap=sort {$a<=>$b}@_; if (@ap/2==int(@ap/2)) { $ret=($ap[@ap/2-1]+$ap[@ap/2])/2; } else { $ret=$ap[int(@ap/2)]; } return ($ret); } ########################################### sub usage { print"\nUsage is PH2TL.pl , where designates the name of the output file that is generated by PHASE, stands for the name of the _recom file generated by phase, is the name of the input file for TreeLD and is a text file that contains the phenotype information,comprised of one phenotype per line in the same order as the input file for PHASE. The file is optional.\n Please remember that it is suggested to increase the number of iterations of the final run of the PHASE algorithm (the -X option) to generate good estimates of fine-scale recombination.\n\n"; exit(8); }