#!/usr/bin/perl ########################################################################## # Script: gettable3data.pl # # Description: Read in a raw RT data file, collect phenotype results on mutant # isolates that have the predefined common patterns, and # count how many phenotypes are above the cut-offs # that were pre-defined as the mean plus 2 standard deviations # calculated using the log-transformed levels of fold-resistance # on wild type isolates. # The output of this script will be used as the input for the test # on drug Resistance prediction using biological cut-offs for # the common NRTI resistance mutations patterns ('gettable3.R') # to create [Table 3]. # # # Input: # 1. raw data file: vircoRAWdata_RT.txt/virologicRAWdata_RT.txt # 2. the name of assay: either 'AV' or 'PS' # # Usage: # ./gettable3data.pl vircoRAWdata_RT.txt AV >vircotable3data.txt # ./gettable3data.pl virologicRAWdata_RT.txt PS>virologictable3data.txt # ########################################################################## use strict; ## Setting up the biological cut-off for each drug for each assay ## The biological cut-offs were pre-defined as the mean plus 2 standard deviations calculated ## using the log-transformed levels of fold-resistance on wild type isolates. my $cutoffhash={'AV' => {TC => 2.88, ABC => 2.75, DDI => 3.80, D4T =>3.31, AZT => 3.03}, 'PS' => {TC => 1.53, ABC => 1.35, DDI => 1.32, D4T =>1.32, AZT => 1.87} }; ## The list of drugs to pull my @drugs = ("TC","ABC","D4T","DDI","AZT"); ## The list of the common mutation patterns to pull my @patterns = ("M184V","M41L,M184V,T215Y", "M41L,M184V,L210W,T215Y"); ## Read in phenotypes on RT mutants my $Hash = &getHashRT($ARGV[0]); ## Get the name of assay from the input my $assay = $ARGV[1]; if(! defined $assay) { print "Assay is needed"; exit;} elsif($assay ne 'AV' && $assay ne 'PS'){ print "Assay should be either AV or PS"; exit;} print "Drug\tPattern\tNumofIsolates\t10^(Mean+2*SD(Log-Fold))\t"; print "Fold\tNumofIsolates(Fold>cutoff)\n"; foreach my $drug(@drugs){ foreach my $pattern (@patterns){ my $info = $Hash->{$drug}{$pattern}; my $length = scalar(@$info); ## How many phenotype results are above the cut-off my $numofoutlier1= getoutlier($info, $cutoffhash->{$assay}->{$drug}); for(my $i=0;$i<$length; $i++) { my $fold = $info->[$i]; print "$drug\t$pattern\t$length\t"; print "$cutoffhash->{$drug}{1}\t"; print "$fold\t$numofoutlier1\n"; } } } #-------------------------------------------- sub getHashRT { my ($filename) = @_; open(IN, "<$filename") || die "Can not open the file $filename"; while (my $line = ) { $line =~ s/[\r\n]+$//; #print $line; my @fields = split (/\t/, $line); next if !$fields[2]; push @{$Hash->{DDI}{$fields[2]}}, $fields[4] if $fields[4]; push @{$Hash->{D4T}{$fields[2]}}, $fields[7] if $fields[7]; push @{$Hash->{ABC}{$fields[2]}}, $fields[8] if $fields[8]; push @{$Hash->{TC}{$fields[2]}}, $fields[6] if $fields[6]; push @{$Hash->{AZT}{$fields[2]}}, $fields[3] if $fields[3]; } close(IN); return $hash; } #--------------------------------------------- sub getoutlier { my ($rinfo, $cutoff) = @_; my @info = sort {$a <=> $b} @$rinfo; my $numofoutlier=0; my $length = scalar(@info); #print "----$length\n"; for (my $i=0; $i<$length; $i++) { my $fold = $info[$i]; if($fold > $cutoff) { $numofoutlier++; } } return $numofoutlier; }