#!/usr/bin/perl ########################################################################## # Script: gettable2data.pl # # Description: Read in a raw data file, collect phenotype results on mutant # isolates that have a pattern of drug resistance mutations that # was present in two or more isolates, calculate Mean, SD, # Median,etc,. # The output of this script will be used as the input for # reproducibility of susceptibility tests on mutant isolates # ('analysis_table2.R') to create [Table 2]. # # Input: # 1. raw data files: vircoRAWdata_PI.txt virologicRAWdata_PI.txt # vircoRAWdata_RT.txt virologicRAWdata_RT.txt # 2. name of drug class: either 'PI' or 'RTI' # # Usage: # ./gettable2data.pl vircoRAWdata_PI.txt virologicRAWdata_PI.txt PI > vircotable2PI.txt # ./gettable2data.pl virologicRAWdata_PI.txt vircoRAWdata_PI.txt PI > virologictable2PI.txt # ./gettable2data.pl vircoRAWdata_RT.txt virologicRAWdata_RT.txt RTI > vircotable2RT.txt # ./gettable2data.pl virologicRAWdata_RT.txt vircoRAWdata_RT.txt RTI > virologictable2RT.txt # ########################################################################## use strict; my %drugCodes = ('PI' => ["APV","IDV","LPV","NFV","RTV","SQV"], 'RTI' => ["AZT","DDI","DDC","TC","D4T","ABC","NVP","DLV","EFV"]); my @drugs = @{$drugCodes{$ARGV[2]}}; my $Hash1 = getHash("$ARGV[0]",\@drugs); my $Hash2 = getHash("$ARGV[1]",\@drugs); print "Drug\tNumber\tNumpatterns\tMut_patterns\tNumisloates\tFold\t"; print "Log-Fold\tPattern_mean(Log-Fold)\t10^Pattern_mean\tAbsvalue(Log_Fold-Patternmean)\tPattern_SD(Log-Fold)\tMedian(Log-Fold)\t10^Median(Log-Fold)\tAbsDev(Log-Fold)\tMAD(Log-Fold)\n"; foreach my $drug (@drugs){ my $num=0; my $numofalliso = getnum($Hash1->{$drug},$Hash2->{$drug}); my $madlog= getmad($Hash1->{$drug},$Hash2->{$drug}); foreach my $pattern(keys %{$Hash1->{$drug}}) { if(defined $Hash2->{$drug}{$pattern} && scalar(@{$Hash1->{$drug}{$pattern}})>1 && scalar(@{$Hash2->{$drug}{$pattern}})>1 ) { $num++; my $fold = $Hash1->{$drug}{$pattern}; my $foldlog = getlogdata($fold); my $length = scalar(@$fold); my $meaninpat = getmean($foldlog); my $meaninpatAnti = 10** $meaninpat; my $sdinpat = getsd($foldlog); my $medianlog = getmedian($foldlog); my $medianlogAnti = 10**$medianlog; for(my $i=0; $i<$length; $i++) { my $deviancemean = $foldlog->[$i] - $meaninpat; my $absdevmean = abs($deviancemean); my $devlog = $foldlog->[$i] - $medianlog; my $absdevlog = abs($devlog); print "$drug\t$numofalliso\t$num\t$pattern\t$length\t$fold->[$i]\t"; printf "%.2f\t%.2f\t%.2f\t",$foldlog->[$i],$meaninpat,$meaninpatAnti; printf "%.2f\t%.2f\t%.2f\t",$absdevmean,$sdinpat, $medianlog; printf "%.2f\t%.2f\t%.2f\t\n", $medianlogAnti,$absdevlog,$madlog; } } } } #-------------------------------------------- sub getHash { my ($filename,$drugs) = @_; open(IN, "<$filename") || die "Can not open the file $filename"; my $hash = {}; while (my $line = ) { $line =~ s/[\r\n]+$//; my @fields = split(/\t/, $line); next if !$fields[2]; ## skip if the line does not contain mutation list foreach my $i (0..scalar(@$drugs)-1) { push @{$hash->{$drugs->[$i]}->{$fields[$i+2]}},$fields[$i+3] if $fields[$i+3]; } } close(IN); return $hash; } #--------------------------------------------- sub getmean { my ($rfold) = @_; if (scalar(@$rfold)<=1) { print "Error -- ", scalar(@$rfold),"\n";} my @fold = sort {$a <=> $b} @$rfold; if (scalar(@fold)<=1) { print "Error -- ", scalar(@fold),"\n";} my $sum=0; my $length = scalar(@fold); #print "----$length\n"; for (my $i=0; $i<$length; $i++) { $sum += $fold[$i]; } my $mean = $sum/$length; return $mean; } #--------------------------------------------- sub getmedian { my ($rfold) = @_; if (scalar(@$rfold)<=1) { print "Error -- ", scalar(@$rfold),"\n";} my @fold = sort {$a <=> $b} @$rfold; if (scalar(@fold)<=1) { print "Error -- ", scalar(@fold),"\n";} my $median; my $length = scalar(@fold); #print "----$length\n"; if(($length%2) ==0) { $median = ($fold[$length/2-1] + $fold[($length/2)])/2; }else { $median = $fold[($length-1)/2]; } return $median; } #--------------------------------------------- sub getmadwithindata { my ($rfold) = @_; if (scalar(@$rfold)<=1) { print "Error -- ", scalar(@$rfold),"\n";} my @fold = sort {$a <=> $b} @$rfold; if (scalar(@fold)<=1) { print "Error -- ", scalar(@fold),"\n";} my $median; my $length = scalar(@fold); #print "----$length\n"; if(($length%2) ==0) { $median = ($fold[$length/2-1] + $fold[($length/2)])/2; }else { $median = $fold[($length-1)/2]; } my @madarray; for (my $i=0; $i<$length; $i++) { my $deviance = $fold[$i] - $median; my $absdev = abs($deviance); push @madarray, $absdev; } my $mad = getmedian(\@madarray); return $mad; } #--------------------------------------------- sub getsd { my ($rfold) = @_; if (scalar(@$rfold)<=1) { print "Error -- ", scalar(@$rfold),"\n";} my @fold = sort {$a <=> $b} @$rfold; if (scalar(@fold)<=1) { print "Error -- ", scalar(@fold),"\n";} my $sum=0; my $summation=0; my $length = scalar(@fold); #print "----$length\n"; for (my $i=0; $i<$length; $i++) { $sum += $fold[$i]; } my $mean = $sum/$length; for (my $j=0; $j<$length; $j++) { $summation += ($fold[$j]-$mean)*($fold[$j]-$mean); } my $sd = sqrt($summation/($length -1)); return $sd; } #------------------------------------------------- sub getlogdata { my ($fold) = @_; my @foldlog; foreach my $fold (@$fold) { if ($fold le 0) { push @foldlog,-99; }else { push @foldlog, log10($fold); } } return \@foldlog; } #------------------------------------------------ sub log10 { my $n = shift; return -999 if $n<=0; return log($n)/log(10); } #--------------------------------------------- sub getnum { my ($vircohash,$virologichash) = @_; my $num=0; foreach my $pattern(keys %$vircohash) { if(defined $virologichash->{$pattern} && scalar(@{$vircohash->{$pattern}})>1 && scalar(@{$virologichash->{$pattern}})>1 ) { my $fold = $vircohash->{$pattern}; my $length = scalar(@$fold); $num+= $length; } } return ($num); } #--------------------------------------------- sub getmad { my ($vircohash,$virologichash) = @_; my @madarray; foreach my $pattern(keys %$vircohash) { if(defined $virologichash->{$pattern} && scalar(@{$vircohash->{$pattern}})>1 && scalar(@{$virologichash->{$pattern}})>1 ) { my $fold = $vircohash->{$pattern}; my $foldlog = getlogdata($fold); my $length = scalar(@$fold); my $medianlog = getmedian($foldlog); for(my $i=0; $i<$length; $i++) { my $devlog = $foldlog->[$i] - $medianlog; my $absdevlogAnti = abs($devlog); push @madarray, $absdevlogAnti; } } } my $mad = getmedian(\@madarray); return ($mad); }