#!/usr/bin/perl ########################################################################## # Script: gettable1data.pl # # Description: Read in a raw data file, collect phenotype results on wild type # isolates, calculate Mean, SD, Median,.etc,. # The output of this script will be used as the input for # reproducibility of susceptibility tests on wild type isolates # ('analysis_table1.R') to create [Table 1]. # # Input: # 1.raw data files : vircoRAWdata_PI.txt/virologicRAWdata_PI.txt # vircoRAWdata_RT.txt/virologicRAWdata_RT.txt # 2. name of drug class: either 'PI' or 'RTI' # # Usages: # ./gettable1data.pl vircoRAWdata_PI.txt PI > vircotable1PI.txt # ./gettable1data.pl virologicRAWdata_PI.txt PI > virologictable1PI.txt # ./gettable1data.pl vircoRAWdata_RT.txt RTI >vircotable1RT.txt # ./gettable1data.pl virologicRAWdata_RT.txt RTI >virologictable1RT.txt # ########################################################################## use strict; my %drugCodes = ('PI' => ["APV","IDV","LPV","NFV","RTV","SQV"], 'RTI' => ["AZT","DDI","DDC","TC","D4T","ABC","NVP","DLV","EFV"]); my @drugs = @{$drugCodes{$ARGV[1]}}; my $Hash = getHash("$ARGV[0]",\@drugs); print "Drug\tnumber\tFold\t"; print "Log-Fold\tMean(Log-Fold)\t10^Mean(Log-Fold)\tSD(Log-Fold)\t10^SD(Log-Fold)\tMedian(Log-Fold)\t10^Median(Log-Fold)\tAbsDev(Log-Fold)\tMAD(Log-Fold)\n"; foreach my $drug (@drugs){ my $fold = $Hash->{$drug}; my $length = scalar(@$fold); my $foldlog = getlogdata($fold); my $meanlog = getmean($foldlog); my $medianlog = getmedian($foldlog); my $sdlog = getsd($foldlog); my $meanlogAnti = 10**$meanlog; my $medianlogAnti = 10**$medianlog; my $sdlogAnti = 10**$sdlog; my $madlog = getmad($foldlog); for(my $i=0; $i<$length; $i++) { my $devlog = $foldlog->[$i] - $medianlog; my $absdevlog = abs($devlog); print "$drug\t$length\t$fold->[$i]\t"; printf "%.2f\t%.2f\t",$foldlog->[$i],$meanlog; printf "%.2f\t%.2f\t%.2f\t%.2f\t", $meanlogAnti, $sdlog,$sdlogAnti,$medianlog; printf "%.2f\t%.2f\t%.2f\t\n", $medianlogAnti,$absdevlog,$madlog; } } #-------------------------------------------- sub getHash { my ($filename,$drugs) = @_; open(IN, "<$filename") || die "Can not open the file $filename"; my $hash = {}; while (my $line = ) { chomp $line; my @fields = split(/\t/, $line); next if $fields[2]; ## skip if the line contains mutation list foreach my $i (0..scalar(@$drugs)-1) { push @{$hash->{$drugs->[$i]}},$fields[$i+3] if $fields[$i+3]; } } close(IN); return $hash; } #--------------------------------------------- sub getmean { my ($rfold) = @_; if (scalar(@$rfold)<=1) { print "Error -- ", scalar(@$rfold),"\n";} my @fold = sort {$a <=> $b} @$rfold; if (scalar(@fold)<=1) { print "Error -- ", scalar(@fold),"\n";} my $sum=0; my $length = scalar(@fold); #print "----$length\n"; for (my $i=0; $i<$length; $i++) { $sum += $fold[$i]; } my $mean = $sum/$length; return $mean; } #--------------------------------------------- sub getmedian { my ($rfold) = @_; if (scalar(@$rfold)<=1) { print "Error -- ", scalar(@$rfold),"\n";} my @fold = sort {$a <=> $b} @$rfold; if (scalar(@fold)<=1) { print "Error -- ", scalar(@fold),"\n";} my $median; my $length = scalar(@fold); #print "----$length\n"; if(($length%2) ==0) { $median = ($fold[$length/2-1] + $fold[($length/2)])/2; }else { $median = $fold[($length-1)/2]; } return $median; } #--------------------------------------------- sub getmad { my ($rfold) = @_; if (scalar(@$rfold)<=1) { print "Error -- ", scalar(@$rfold),"\n";} my @fold = sort {$a <=> $b} @$rfold; if (scalar(@fold)<=1) { print "Error -- ", scalar(@fold),"\n";} my $median; my $length = scalar(@fold); #print "----$length\n"; if(($length%2) ==0) { $median = ($fold[$length/2-1] + $fold[($length/2)])/2; }else { $median = $fold[($length-1)/2]; } my @madarray; for (my $i=0; $i<$length; $i++) { my $deviance = $fold[$i] - $median; my $absdev = abs($deviance); push @madarray, $absdev; } my $mad = getmedian(\@madarray); return $mad; } #--------------------------------------------- sub getsd { my ($rfold) = @_; if (scalar(@$rfold)<=1) { print "Error -- ", scalar(@$rfold),"\n";} my @fold = sort {$a <=> $b} @$rfold; if (scalar(@fold)<=1) { print "Error -- ", scalar(@fold),"\n";} my $sum=0; my $summation=0; my $length = scalar(@fold); #print "----$length\n"; for (my $i=0; $i<$length; $i++) { $sum += $fold[$i]; } my $mean = $sum/$length; for (my $j=0; $j<$length; $j++) { $summation += ($fold[$j]-$mean)*($fold[$j]-$mean); } my $sd = sqrt($summation/($length -1)); return $sd; } #------------------------------------------------- sub getlogdata { my ($fold) = @_; my @foldlog; foreach my $fold (@$fold) { push @foldlog, log10($fold); } return \@foldlog; } #------------------------------------------------ sub log10 { my $n = shift; return -999 if $n<=0; return log($n)/log(10); }