########################################################################## # This program reads in four text files and output a simplied version of # table2 in the paper. # # The four input files: # vircotable2PI.txt # vircotable2RT.txt # virologictable2PI.txt # virologictable2RT.txt # # Output file format: # Refer to table2 # # In this program, parametric and non-parametric testing were both done # to test whether the standard deviation and median absolute deviance # were significantly different between the two assays for isolates # that have a pattern of drug resistance mutations that was present in # two or more isolates tested by the Antivirogram and PhenoSense assays. ########################################################################## baseurl <- 'http://hivdb.stanford.edu/pages/published_analysis/phenotypeCompJAID2005/transformedData/' geturl <- function(tailend) { return(paste(baseurl, tailend,sep='')) } .eval <- function(evaltext) { eval(parse(text=evaltext), envir=sys.frame()) } assays <- c('virco', 'virologic') assaynames <- list(virco='AV', virologic='PS') proteins <- c('PI','RT') tables <- list() for (protein in proteins) { for (assay in assays) { filename <- paste(assay, 'table2', protein, '.txt', sep='') table.name <- paste(assay, protein, '.table', sep='') .eval(paste(table.name, ' <- read.table(geturl(filename), header=T, row.names=NULL)', sep='')) .eval(paste('cur.table <- ', table.name)) cur.table$logFold <- log10(cur.table$Fold) cur.table$residMedian <- 0 * cur.table$logFold drug <- unique(cur.table$Drug) drug <- sort(drug) ndrug <- length(drug) npattern <- numeric(ndrug) for (stat in c('mean', 'N', 'sd', 'median', 'MAD')) { .eval(paste(stat, '<- matrix(0,ndrug,2)', sep='')) } .eval(paste(assay, protein, '.MSE <- numeric(ndrug)',sep='')) .eval(paste(assay, protein, '.df <- numeric(ndrug)',sep='')) for (i in 1:ndrug) { tmp.table <- cur.table[(cur.table$Drug == drug[i]),] pattern <- unique(tmp.table$Mut) npattern[i] <- length(pattern) for (stat in c('mean', 'N', 'sd', 'median', 'MAD')) { .eval(paste('tmp', stat, '<- numeric(npattern[i])', sep='')) } for (j in 1:npattern[i]) { log.fold <- tmp.table$logFold[(tmp.table$Mut == pattern[j])] fold <- 10^log.fold tmpmean[j] <- 10^mean(log.fold) tmpN[j] <- length(log.fold) tmpsd[j] <- sd(log.fold) tmpmedian[j] <- median(fold) tmpMAD[j] <- mad(log.fold, constant=1.0) cur.table$residMedian[(cur.table$Mut == pattern[j]) & (cur.table$Drug == drug[i])] <- log.fold - median(log.fold) .eval(paste(table.name, ' <- cur.table')) } for (stat in c('mean', 'N', 'sd', 'median', 'MAD')) { .eval(paste('curtmp <- tmp', stat, sep='')) .eval(paste(stat, '[i,]<- c(min(curtmp[(curtmp > 0)]), max(curtmp[(curtmp > 0)]))', sep='')) } .eval(paste(assay, protein, '.MSE[i] <- anova(lm(tmp.table$logFold ~ factor(tmp.table$Mut)))$Mean[2]', sep='')) .eval(paste(assay, protein, '.df[i] <- anova(lm(tmp.table$logFold ~ factor(tmp.table$Mut)))$Df[2]', sep='')) } for (stat in c('mean', 'N', 'sd', 'median', 'MAD')) { .eval(paste('cur.stat <- ', stat)) .eval(paste(assaynames[assay], stat, '.max <- cur.stat[,2]', sep='')) .eval(paste(assaynames[assay], stat, '.min <- cur.stat[,1]', sep='')) .eval(paste(assaynames[assay], stat, '.table <- data.frame(', assaynames[assay], stat, '.min, ', assaynames[assay], stat, '.max)', sep='')) } } fligner <- numeric(ndrug) virco.mad <- numeric(ndrug) virologic.mad <- numeric(ndrug) virco.name <- paste('virco', protein, '.table', sep='') virologic.name <- paste('virologic', protein, '.table', sep='') for (i in 1:ndrug) { .eval(paste('virco.table <- ', virco.name, sep='')) virco <- virco.table$residMedian[(virco.table$Drug == drug[i])] .eval(paste('virologic.table <- ', virologic.name, sep='')) virologic <- virologic.table$residMedian[(virologic.table$Drug == drug[i])] fligner[i] <- fligner.test(list(virco, virologic))$p.value virco.mad[i] <- mad(virco, constant=1.0) virologic.mad[i] <- mad(virologic, constant=1.0) } virco.MSE <- .eval(paste('virco', protein, '.MSE', sep='')) virco.sd <- sqrt(virco.MSE) virologic.MSE <- .eval(paste('virologic', protein, '.MSE', sep='')) virologic.sd <- sqrt(virologic.MSE) virco.df <- .eval(paste('virco', protein, '.df', sep='')) virologic.df <- .eval(paste('virologic', protein, '.df', sep='')) fstat <- virco.MSE / virologic.MSE ftest <- 0 * fstat for (i in 1:ndrug) { ftest[i] <- min(2 * (1 - pf(fstat[i], virco.df[i], virologic.df[i])), 2 * pf(fstat[i], virco.df[i], virologic.df[i])) } .eval(paste(protein, '.out.table <- data.frame(drug, npattern, AVmean.table, PSmean.table, virologic.sd, virco.sd, ftest, AVmedian.table, PSmedian.table, virologic.mad, virco.mad, fligner)', sep='')) } write.table(PI.out.table, file='Table2PI.csv', sep=',', row.names=F, col.names=T, quote=F) write.table(RT.out.table, file='Table2RT.csv', sep=',', row.names=F, col.names=T, quote=F)