## ---- echo = FALSE------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ---- results = "hide", message = FALSE, warning=FALSE------------------- library(IsoformSwitchAnalyzeR) ## ------------------------------------------------------------------------ data("exampleSwitchList") exampleSwitchList ## ---- results = "hide", message = FALSE, warning=FALSE------------------- ### isoformSwitchAnalysisPart1 needs the genomic sequence to predict ORFs. # These are readily available from Biocindoctor as BSgenome objects: # http://bioconductor.org/packages/release/BiocViews.html#___BSgenome # Here we use Hg19 - which can be download by copy/pasting the following two lines into the R terminal: # source("https://bioconductor.org/biocLite.R") # biocLite("BSgenome.Hsapiens.UCSC.hg19") library(BSgenome.Hsapiens.UCSC.hg19) exampleSwitchList <- isoformSwitchAnalysisPart1( input=exampleSwitchList, genomeObject = Hsapiens, dIFcutoff = 0.4, # Set high for short runtime in example data outputSequences = FALSE # keeps the function from outputting the fasta files from this example ) ## ------------------------------------------------------------------------ extractSwitchSummary(exampleSwitchList, dIFcutoff = 0.4) # supply the same cutoff to the summary function ## ---- results = "hide", message = FALSE---------------------------------- exampleSwitchList <- isoformSwitchAnalysisPart2( switchAnalyzeRlist = exampleSwitchList, dIFcutoff = 0.4, # Set high for short runtime in example data n = 10, # if plotting was enabled it would only output the top 10 switches removeNoncodinORFs = TRUE, # Because ORF was predicted de novo pathToCPATresultFile = system.file("extdata/cpat_results.txt" , package = "IsoformSwitchAnalyzeR"), pathToPFAMresultFile = system.file("extdata/pfam_results.txt" , package = "IsoformSwitchAnalyzeR"), pathToSignalPresultFile = system.file("extdata/signalP_results.txt", package = "IsoformSwitchAnalyzeR"), codingCutoff = 0.725, # the coding potential cutoff we suggested for human outputPlots = FALSE # keeps the function from outputting the plots from this example ) ## ------------------------------------------------------------------------ extractSwitchSummary(exampleSwitchList, filterForConsequences = TRUE, dIFcutoff = 0.4) # supply the same cutoff to the summary function ## ---- message = FALSE---------------------------------------------------- extractTopSwitches(exampleSwitchList, filterForConsequences = TRUE, n=5) ## ---- fig.width=12, fig.height=7----------------------------------------- switchPlot(exampleSwitchList, gene='KIF1B') ## ------------------------------------------------------------------------ data("exampleSwitchListAnalyzed") exampleSwitchListAnalyzed ## ---- fig.width=12, fig.height=8----------------------------------------- extractConsequenceSummary(exampleSwitchListAnalyzed, asFractionTotal = FALSE) ## ---- fig.width=8, fig.height=6, warning=FALSE--------------------------- data("exampleSwitchListAnalyzed") ### Vulcano like plot: ggplot(data=exampleSwitchListAnalyzed$isoformFeatures, aes(x=dIF, y=-log10(isoform_switch_q_value))) + geom_point( aes( color=abs(dIF) > 0.1 & isoform_switch_q_value < 0.05 ), # default cutoff size=1 ) + geom_hline(yintercept = -log10(0.05), linetype='dashed') + # default cutoff geom_vline(xintercept = c(-0.1, 0.1), linetype='dashed') + # default cutoff facet_grid(condition_1 ~ condition_2, scales = 'free') + scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) + theme_bw() ## ---- fig.width=8, fig.height=6, warning=FALSE--------------------------- ### Switch vs Gene changes: ggplot(data=exampleSwitchListAnalyzed$isoformFeatures, aes(x=gene_log2_fold_change, y=dIF)) + geom_point( aes( color=abs(dIF) > 0.1 & isoform_switch_q_value < 0.05 ), # default cutoff size=1 ) + facet_grid(condition_1 ~ condition_2, scales = 'free') + geom_hline(yintercept = 0, linetype='dashed') + geom_vline(xintercept = 0, linetype='dashed') + scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) + labs(x='Gene log2 fold change', y='dIF') + theme_bw() ## ------------------------------------------------------------------------ data("exampleSwitchList") # A newly created switchAnalyzeRlist + switch analysis names(exampleSwitchList) data("exampleSwitchListAnalyzed") # A fully analyzed switchAnalyzeRlist names(exampleSwitchListAnalyzed) ## ------------------------------------------------------------------------ ### Preview head(exampleSwitchList, 2) identical( head(exampleSwitchList), head(exampleSwitchList$isoformFeatures) ) identical( tail(exampleSwitchList), tail(exampleSwitchList$isoformFeatures) ) ### Dimentions dim(exampleSwitchList$isoformFeatures) nrow(exampleSwitchList) ncol(exampleSwitchList) dim(exampleSwitchList) ## ------------------------------------------------------------------------ exampleSwitchListAnalyzed ### Subset subsetSwitchAnalyzeRlist( exampleSwitchListAnalyzed, exampleSwitchListAnalyzed$isoformFeatures$gene_name == 'ARHGEF19' ) ## ------------------------------------------------------------------------ head(exampleSwitchList$exons,2) ## ---- warning=FALSE, message=FALSE--------------------------------------- cuffDB <- prepareCuffExample() cuffDB ## ---- warning=FALSE------------------------------------------------------ aSwitchList <- importCufflinksCummeRbund(cuffDB) aSwitchList ## ---- warning=FALSE------------------------------------------------------ testSwitchList <- importCufflinksFiles( pathToGTF = system.file('extdata/chr1_snippet.gtf', package = "cummeRbund"), pathToGeneDEanalysis = system.file('extdata/gene_exp.diff', package = "cummeRbund"), pathToIsoformDEanalysis = system.file('extdata/isoform_exp.diff', package = "cummeRbund"), pathToGeneFPKMtracking = system.file('extdata/genes.fpkm_tracking', package = "cummeRbund"), pathToIsoformFPKMtracking = system.file('extdata/isoforms.fpkm_tracking', package = "cummeRbund"), pathToIsoformReadGroupTracking = system.file('extdata/isoforms.read_group_tracking', package = "cummeRbund"), pathToSplicingAnalysis = system.file('extdata/splicing.diff', package = "cummeRbund"), pathToReadGroups = system.file('extdata/read_groups.info', package = "cummeRbund"), pathToRunInfo = system.file('extdata/run.info', package = "cummeRbund"), fixCufflinksAnnotationProblem=TRUE, quiet=TRUE ) testSwitchList ## ---- warning=FALSE, message=FALSE--------------------------------------- ### Construct data needed from example data in cummeRbund package ### (The recomended way of analyzing Cufflinks/Cuffdiff datat is via importCufflinksCummeRbund() ### This is jus an easy way to get some example data ). cuffDB <- prepareCuffExample() ## ------------------------------------------------------------------------ isoRepCount <- repCountMatrix(isoforms(cuffDB)) isoRepCount$isoform_id <- rownames(isoRepCount) ### Design matrix designMatrix <- cummeRbund::replicates(cuffDB)[,c('rep_name','sample_name')] colnames(designMatrix) <- c('sampleID','condition') localAnnotaion <- import(system.file("extdata/chr1_snippet.gtf", package="cummeRbund"))[,c('transcript_id','gene_id')] colnames(localAnnotaion@elementMetadata)[1] <- 'isoform_id' ### Take a look at the data head(isoRepCount, 2) designMatrix head(localAnnotaion, 3) ### Create switchAnalyzeRlist aSwitchList <- importRdata( isoformCountMatrix=isoRepCount, designMatrix=designMatrix, isoformExonAnnoation=localAnnotaion, showProgress=FALSE ) aSwitchList ## ---- warning=FALSE, message=FALSE--------------------------------------- aSwitchList <- importGTF(pathToGTF = system.file("extdata/chr1_snippet.gtf", package = "cummeRbund")) ## ------------------------------------------------------------------------ aSwitchList head(aSwitchList,2) head(aSwitchList$conditions,2) ## ------------------------------------------------------------------------ data("exampleSwitchList") exampleSwitchList exampleSwitchListFiltered <- preFilter(exampleSwitchList, geneExpressionCutoff = 1, isoformExpressionCutoff = 0, removeSingleIsoformGenes = TRUE) exampleSwitchListFilteredStrict <- preFilter(exampleSwitchList, geneExpressionCutoff = 10, isoformExpressionCutoff = 3, removeSingleIsoformGenes = TRUE) ## ---- warning=FALSE------------------------------------------------------ # Load example data and prefilter data("exampleSwitchList") exampleSwitchList <- preFilter(exampleSwitchList) # preFilter for fast runtime # Perfom test (takes ~10 sec) exampleSwitchListAnalyzed <- isoformSwitchTestDRIMSeq( switchAnalyzeRlist = exampleSwitchList, testIntegration='isoform_only', reduceToSwitchingGenes=TRUE ) # Summarize swiching geatures extractSwitchSummary(exampleSwitchListAnalyzed) ## ------------------------------------------------------------------------ ### Show arguments of function args(isoformSwitchTest) ## ------------------------------------------------------------------------ # Load example data and prefilter data("exampleSwitchList") exampleSwitchList <- preFilter(exampleSwitchList) # preFilter for fast runtime # Perfom test exampleSwitchListAnalyzed <- isoformSwitchTest(exampleSwitchList) # Summarize swiching geatures extractSwitchSummary(exampleSwitchListAnalyzed) ## ------------------------------------------------------------------------ extractCalibrationStatus(exampleSwitchListAnalyzed) ## ------------------------------------------------------------------------ # Perfom test exampleSwitchListAnalyzed <- isoformSwitchTest(exampleSwitchList, calibratePvalues = FALSE) # Summarize swiching geatures extractSwitchSummary(exampleSwitchListAnalyzed) # check callibration status extractCalibrationStatus(exampleSwitchListAnalyzed) ## ------------------------------------------------------------------------ ### This example relies on the example data from the 'Usage of The Statistical Test' section above ### analyzeORF needs the genomic sequence to predict ORFs. # These are readily advailable from Biocindoctor as BSgenome orbjects: # http://bioconductor.org/packages/release/BiocViews.html#___BSgenome # Here we use Hg19 - which can be download by copy/pasting the following two lines into the R termminal: # source("https://bioconductor.org/biocLite.R") # biocLite("BSgenome.Hsapiens.UCSC.hg19") library(BSgenome.Hsapiens.UCSC.hg19) exampleSwitchListAnalyzed <- analyzeORF(exampleSwitchListAnalyzed, genomeObject = Hsapiens, showProgress=FALSE) head(exampleSwitchListAnalyzed$orfAnalysis, 3) ## ------------------------------------------------------------------------ ### This example relies on the example data from the 'Analyzing Open Reading Frames' section above exampleSwitchListAnalyzed <- extractSequence( exampleSwitchListAnalyzed, genomeObject = Hsapiens, pathToOutput = '', writeToFile=FALSE # to avoid output when running this example data ) head(exampleSwitchListAnalyzed$ntSequence,2) head(exampleSwitchListAnalyzed$aaSequence,2) ## ------------------------------------------------------------------------ ### Load test data (maching the external sequence analysis results) data("exampleSwitchListIntermediary") exampleSwitchListIntermediary ### Add PFAM analysis exampleSwitchListAnalyzed <- analyzePFAM( switchAnalyzeRlist = exampleSwitchListIntermediary, pathToPFAMresultFile = system.file("extdata/pfam_results.txt", package = "IsoformSwitchAnalyzeR"), showProgress=FALSE ) ### Add SignalP analysis exampleSwitchListAnalyzed <- analyzeSignalP( switchAnalyzeRlist = exampleSwitchListAnalyzed, pathToSignalPresultFile = system.file("extdata/signalP_results.txt", package = "IsoformSwitchAnalyzeR") ) ### Add CPAT analysis exampleSwitchListAnalyzed <- analyzeCPAT( switchAnalyzeRlist = exampleSwitchListAnalyzed, pathToCPATresultFile = system.file("extdata/cpat_results.txt", package = "IsoformSwitchAnalyzeR"), codingCutoff = 0.725, # the coding potential cutoff we suggested for human removeNoncodinORFs = TRUE # because ORF was predicted de novo ) exampleSwitchListAnalyzed ## ------------------------------------------------------------------------ ### This example relies on the example data from the 'Importing External Sequences Analysis' section above exampleSwitchListAnalyzed <- analyzeIntronRetention(exampleSwitchListAnalyzed, quiet=TRUE) ### overview of number of intron retentions (IR) table(exampleSwitchListAnalyzed$isoformFeatures$IR) ## ------------------------------------------------------------------------ ### This example relies on the example data from the 'Predicting Intron Retentions' section above # the consequences highlighted in the text above consequencesOfInterest <- c('intron_retention','coding_potential','NMD_status','domains_identified','ORF_seq_similarity') exampleSwitchListAnalyzed <- analyzeSwitchConsequences( exampleSwitchListAnalyzed, consequencesToAnalyze = consequencesOfInterest, dIFcutoff = 0.4, # very high cutoff for fast runtimes showProgress=FALSE ) extractSwitchSummary(exampleSwitchListAnalyzed, dIFcutoff = 0.4, filterForConsequences = FALSE) extractSwitchSummary(exampleSwitchListAnalyzed, dIFcutoff = 0.4, filterForConsequences = TRUE) ## ------------------------------------------------------------------------ ### Load already analyzed data data("exampleSwitchListAnalyzed") extractSwitchSummary(exampleSwitchListAnalyzed, filterForConsequences = FALSE) extractSwitchSummary(exampleSwitchListAnalyzed, filterForConsequences = TRUE) ## ------------------------------------------------------------------------ ### Load already analyzed data data("exampleSwitchListAnalyzed") ### Lets reduce the switchAnalyzeRlist to only one condition exampleSwitchListAnalyzedSubset <- subsetSwitchAnalyzeRlist( exampleSwitchListAnalyzed, exampleSwitchListAnalyzed$isoformFeatures$condition_1 == 'iPS' & exampleSwitchListAnalyzed$isoformFeatures$condition_2 == 'hESC' ) exampleSwitchListAnalyzedSubset ### Extract top 2 switching genes (by dIF values) extractTopSwitches( exampleSwitchListAnalyzedSubset, filterForConsequences = TRUE, n = 2, sortByQvals = FALSE ) ### Extract top 2 switching genes (by q-value) extractTopSwitches( exampleSwitchListAnalyzedSubset, filterForConsequences = TRUE, n = 2, sortByQvals = TRUE ) ## ------------------------------------------------------------------------ ### Extract data.frame with all switching isoforms switchingIso <- extractTopSwitches( exampleSwitchListAnalyzedSubset, filterForConsequences = TRUE, n = NA, # n=NA: all features are returned extractGenes = FALSE, # when FALSE isoforms are returned sortByQvals = TRUE ) subset(switchingIso, gene_name == 'TNFRSF1B') ## ---- fig.width=12, fig.height=7----------------------------------------- switchPlot(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B') ## ---- fig.width=12, fig.height=6----------------------------------------- ### Load the large example dataset data("exampleSwitchListAnalyzed") ### Extract summary consequenceSummary <- extractConsequenceSummary( exampleSwitchListAnalyzed, returnResult = TRUE, plotGenes = TRUE ) subset(consequenceSummary, featureCompared=='Intron retention') ## ---- fig.width=12, fig.height=6----------------------------------------- symmaryStatistics <- extractGenomeWideAnalysis( switchAnalyzeRlist = exampleSwitchListAnalyzed, featureToExtract = 'isoformUsage', # default - alternatives are 'isoformExp', 'geneExp' and 'all' plot=TRUE, returnResult = TRUE ) ## ---- fig.width=12, fig.height=3----------------------------------------- ### Load already analyzed data data("exampleSwitchListAnalyzed") switchPlotTranscript(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B') ## ---- fig.width=3, fig.height=3------------------------------------------ switchPlotGeneExp (exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B', condition1='iPS', condition2='hESC') ## ---- fig.width=4, fig.height=3------------------------------------------ switchPlotIsoExp(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B', condition1='iPS', condition2='hESC') ## ---- fig.width=4, fig.height=3------------------------------------------ switchPlotIsoUsage(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B', condition1='iPS', condition2='hESC') ## ------------------------------------------------------------------------ data("exampleSwitchListIntermediary") ifMatrix <- extractExpressionMatrix(exampleSwitchListAnalyzed) head(ifMatrix) dim(ifMatrix) ## ---- fig.width=4, fig.height=3------------------------------------------ # correlation plot ggplot(melt(cor(ifMatrix)), aes(x=Var1, y=Var2, fill=value)) + geom_tile() + scale_fill_continuous('Correlation') + labs(x='Condition', y='Condition') + theme_minimal() ## ---- fig.width=8, fig.height=4------------------------------------------ library(pheatmap) pheatmap(t(ifMatrix), show_colnames=FALSE) ## ---- results = "hide", message = FALSE---------------------------------- ### Load example data data("exampleSwitchListAnalyzed") ### Reduce datasize for fast runtime randomGenes <- sample(unique(exampleSwitchListAnalyzed$isoformFeatures$gene_id), size = 40) exampleSwitchListAnalyzedSubset <- subsetSwitchAnalyzeRlist( exampleSwitchListAnalyzed, exampleSwitchListAnalyzed$isoformFeatures$gene_id %in% randomGenes ) ### analyze the biological mechanismes bioMechanismeAnalysis <- analyzeSwitchConsequences( exampleSwitchListAnalyzedSubset, consequencesToAnalyze = c('tss','tts','intron_structure'), showProgress = FALSE )$switchConsequence # only the consequences are interesting here ### subset to those with differences bioMechanismeAnalysis <- bioMechanismeAnalysis[which(bioMechanismeAnalysis$isoformsDifferent),] ### extract the consequences of interest alerady stored in the switchAnalyzeRlist myConsequences <- exampleSwitchListAnalyzedSubset$switchConsequence myConsequences <- myConsequences[which(myConsequences$isoformsDifferent),] myConsequences$isoPair <- paste(myConsequences$isoformUpregulated, myConsequences$isoformDownregulated) # id for specific iso comparison ### Obtain the mechanisms of the isoform switches with consequences bioMechanismeAnalysis$isoPair <- paste(bioMechanismeAnalysis$isoformUpregulated, bioMechanismeAnalysis$isoformDownregulated) bioMechanismeAnalysis <- bioMechanismeAnalysis[which(bioMechanismeAnalysis$isoPair %in% myConsequences$isoPair),] # id for specific iso comparison ## ---- fig.width=3.5, fig.height=3.5-------------------------------------- ### Create list with the isoPair ids for each consequencee AS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'intron_structure')] aTSS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'tss' )] aTTS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'tts' )] mechList <- list( AS=AS, aTSS=aTSS, aTTS=aTTS ) ### Create venn diagram library(VennDiagram) myVenn <- venn.diagram(mechList, col='transparent', alpha=0.4, fill=brewer.pal(n=3,name='Dark2'), filename=NULL) ### Plot the venn diagram grid.newpage() ; grid.draw(myVenn) ## ------------------------------------------------------------------------ sessionInfo()