## ---- echo = FALSE-------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

## ---- results = "hide", message = FALSE, warning=FALSE-------------------
library(IsoformSwitchAnalyzeR)

## ------------------------------------------------------------------------
data("exampleSwitchList")
exampleSwitchList

## ---- results = "hide", message = FALSE, warning=FALSE-------------------
### isoformSwitchAnalysisPart1 needs the genomic sequence to predict ORFs. 
# These are readily available from Biocindoctor as BSgenome objects: 
# http://bioconductor.org/packages/release/BiocViews.html#___BSgenome
# Here we use Hg19 - which can be download by copy/pasting the following two lines into the R terminal:
# source("https://bioconductor.org/biocLite.R")
# biocLite("BSgenome.Hsapiens.UCSC.hg19")

library(BSgenome.Hsapiens.UCSC.hg19)

exampleSwitchList <- isoformSwitchAnalysisPart1(
    input=exampleSwitchList, 
    genomeObject = Hsapiens, 
    dIFcutoff = 0.4,         # Set high for short runtime in example data
    outputSequences = FALSE # keeps the function from outputting the fasta files from this example
) 

## ------------------------------------------------------------------------
extractSwitchSummary(exampleSwitchList, dIFcutoff = 0.4) # supply the same cutoff to the summary function

## ---- results = "hide", message = FALSE----------------------------------
exampleSwitchList <- isoformSwitchAnalysisPart2(
    switchAnalyzeRlist      = exampleSwitchList, 
    dIFcutoff               = 0.4,   # Set high for short runtime in example data
    n                       = 10,    # if plotting was enabled it would only output the top 10 switches
    removeNoncodinORFs      = TRUE,  # Because ORF was predicted de novo
    pathToCPATresultFile    = system.file("extdata/cpat_results.txt"   , package = "IsoformSwitchAnalyzeR"),
    pathToPFAMresultFile    = system.file("extdata/pfam_results.txt"   , package = "IsoformSwitchAnalyzeR"),
    pathToSignalPresultFile = system.file("extdata/signalP_results.txt", package = "IsoformSwitchAnalyzeR"),
    codingCutoff            = 0.725, # the coding potential cutoff we suggested for human 
    outputPlots             = FALSE  # keeps the function from outputting the plots from this example
)

## ------------------------------------------------------------------------
extractSwitchSummary(exampleSwitchList, filterForConsequences = TRUE, dIFcutoff = 0.4) # supply the same cutoff to the summary function

## ---- message = FALSE----------------------------------------------------
extractTopSwitches(exampleSwitchList, filterForConsequences = TRUE, n=5)

## ---- fig.width=12, fig.height=7-----------------------------------------
switchPlot(exampleSwitchList, gene='KIF1B')

## ------------------------------------------------------------------------
data("exampleSwitchListAnalyzed")
exampleSwitchListAnalyzed

## ---- fig.width=12, fig.height=8-----------------------------------------
extractConsequenceSummary(exampleSwitchListAnalyzed, asFractionTotal = FALSE)

## ---- fig.width=8, fig.height=6, warning=FALSE---------------------------
data("exampleSwitchListAnalyzed")

### Vulcano like plot:
ggplot(data=exampleSwitchListAnalyzed$isoformFeatures, aes(x=dIF, y=-log10(isoform_switch_q_value))) +
     geom_point(
        aes( color=abs(dIF) > 0.1 & isoform_switch_q_value < 0.05 ), # default cutoff
        size=1
    ) +
    geom_hline(yintercept = -log10(0.05), linetype='dashed') + # default cutoff
    geom_vline(xintercept = c(-0.1, 0.1), linetype='dashed') + # default cutoff
    facet_grid(condition_1 ~ condition_2, scales = 'free') +
    scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) +
    theme_bw()

## ---- fig.width=8, fig.height=6, warning=FALSE---------------------------
### Switch vs Gene changes:

ggplot(data=exampleSwitchListAnalyzed$isoformFeatures, aes(x=gene_log2_fold_change, y=dIF)) +
    geom_point(
        aes( color=abs(dIF) > 0.1 & isoform_switch_q_value < 0.05 ), # default cutoff
        size=1
    ) + 
    facet_grid(condition_1 ~ condition_2, scales = 'free') +
    geom_hline(yintercept = 0, linetype='dashed') +
    geom_vline(xintercept = 0, linetype='dashed') +
    scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) +
    labs(x='Gene log2 fold change', y='dIF') +
    theme_bw()

## ------------------------------------------------------------------------
data("exampleSwitchList")         # A newly created switchAnalyzeRlist + switch analysis
names(exampleSwitchList)

data("exampleSwitchListAnalyzed") # A fully analyzed switchAnalyzeRlist
names(exampleSwitchListAnalyzed)

## ------------------------------------------------------------------------
### Preview
head(exampleSwitchList, 2)

identical(
    head(exampleSwitchList), head(exampleSwitchList$isoformFeatures)
)

identical(
    tail(exampleSwitchList), tail(exampleSwitchList$isoformFeatures)
)

### Dimentions
dim(exampleSwitchList$isoformFeatures)

nrow(exampleSwitchList)
ncol(exampleSwitchList)
dim(exampleSwitchList)

## ------------------------------------------------------------------------
exampleSwitchListAnalyzed

### Subset
subsetSwitchAnalyzeRlist(
    exampleSwitchListAnalyzed,
    exampleSwitchListAnalyzed$isoformFeatures$gene_name == 'ARHGEF19'
)

## ------------------------------------------------------------------------
head(exampleSwitchList$exons,2)

## ---- warning=FALSE, message=FALSE---------------------------------------
cuffDB <- prepareCuffExample()
cuffDB

## ---- warning=FALSE------------------------------------------------------
aSwitchList <- importCufflinksCummeRbund(cuffDB)
aSwitchList

## ---- warning=FALSE------------------------------------------------------
testSwitchList <- importCufflinksFiles(
    pathToGTF                      = system.file('extdata/chr1_snippet.gtf',             package = "cummeRbund"),
    pathToGeneDEanalysis           = system.file('extdata/gene_exp.diff',                package = "cummeRbund"),
    pathToIsoformDEanalysis        = system.file('extdata/isoform_exp.diff',             package = "cummeRbund"),
    pathToGeneFPKMtracking         = system.file('extdata/genes.fpkm_tracking',          package = "cummeRbund"),
    pathToIsoformFPKMtracking      = system.file('extdata/isoforms.fpkm_tracking',       package = "cummeRbund"),
    pathToIsoformReadGroupTracking = system.file('extdata/isoforms.read_group_tracking', package = "cummeRbund"),
    pathToSplicingAnalysis         = system.file('extdata/splicing.diff',                package = "cummeRbund"),
    pathToReadGroups               = system.file('extdata/read_groups.info',             package = "cummeRbund"),
    pathToRunInfo                  = system.file('extdata/run.info',                     package = "cummeRbund"),
    fixCufflinksAnnotationProblem=TRUE,
    quiet=TRUE
)
testSwitchList

## ---- warning=FALSE, message=FALSE---------------------------------------
### Construct data needed from example data in cummeRbund package
### (The recomended way of analyzing Cufflinks/Cuffdiff datat is via importCufflinksCummeRbund()
### This is jus an easy way to get some example data ).
cuffDB <- prepareCuffExample()

## ------------------------------------------------------------------------
isoRepCount <- repCountMatrix(isoforms(cuffDB))
isoRepCount$isoform_id <- rownames(isoRepCount)

### Design matrix
designMatrix <- cummeRbund::replicates(cuffDB)[,c('rep_name','sample_name')]
colnames(designMatrix) <- c('sampleID','condition')

localAnnotaion <- import(system.file("extdata/chr1_snippet.gtf", package="cummeRbund"))[,c('transcript_id','gene_id')]
colnames(localAnnotaion@elementMetadata)[1] <- 'isoform_id'


### Take a look at the data
head(isoRepCount, 2)

designMatrix

head(localAnnotaion, 3)

### Create switchAnalyzeRlist
aSwitchList <- importRdata(
    isoformCountMatrix=isoRepCount,
    designMatrix=designMatrix,
    isoformExonAnnoation=localAnnotaion,
    showProgress=FALSE
)
aSwitchList

## ---- warning=FALSE, message=FALSE---------------------------------------
aSwitchList <- importGTF(pathToGTF = system.file("extdata/chr1_snippet.gtf", package = "cummeRbund"))

## ------------------------------------------------------------------------
aSwitchList

head(aSwitchList,2)
head(aSwitchList$conditions,2)

## ------------------------------------------------------------------------
data("exampleSwitchList")
exampleSwitchList

exampleSwitchListFiltered <- preFilter(exampleSwitchList, geneExpressionCutoff = 1, isoformExpressionCutoff = 0, removeSingleIsoformGenes = TRUE)

exampleSwitchListFilteredStrict <- preFilter(exampleSwitchList, geneExpressionCutoff = 10, isoformExpressionCutoff = 3, removeSingleIsoformGenes = TRUE)

## ---- warning=FALSE------------------------------------------------------
# Load example data and prefilter
data("exampleSwitchList")
exampleSwitchList <- preFilter(exampleSwitchList) # preFilter for fast runtime

# Perfom test (takes ~10 sec)
exampleSwitchListAnalyzed <- isoformSwitchTestDRIMSeq(
    switchAnalyzeRlist = exampleSwitchList,
    testIntegration='isoform_only',
    reduceToSwitchingGenes=TRUE
)

# Summarize swiching geatures
extractSwitchSummary(exampleSwitchListAnalyzed)

## ------------------------------------------------------------------------
### Show arguments of function
args(isoformSwitchTest)

## ------------------------------------------------------------------------
# Load example data and prefilter
data("exampleSwitchList")
exampleSwitchList <- preFilter(exampleSwitchList) # preFilter for fast runtime

# Perfom test
exampleSwitchListAnalyzed <- isoformSwitchTest(exampleSwitchList)

# Summarize swiching geatures
extractSwitchSummary(exampleSwitchListAnalyzed)

## ------------------------------------------------------------------------
extractCalibrationStatus(exampleSwitchListAnalyzed)

## ------------------------------------------------------------------------
# Perfom test
exampleSwitchListAnalyzed <- isoformSwitchTest(exampleSwitchList, calibratePvalues = FALSE)

# Summarize swiching geatures
extractSwitchSummary(exampleSwitchListAnalyzed)

# check callibration status
extractCalibrationStatus(exampleSwitchListAnalyzed)

## ------------------------------------------------------------------------
### This example relies on the example data from the 'Usage of The Statistical Test' section above 

### analyzeORF needs the genomic sequence to predict ORFs. 
# These are readily advailable from Biocindoctor as BSgenome orbjects: 
# http://bioconductor.org/packages/release/BiocViews.html#___BSgenome
# Here we use Hg19 - which can be download by copy/pasting the following two lines into the R termminal:
# source("https://bioconductor.org/biocLite.R")
# biocLite("BSgenome.Hsapiens.UCSC.hg19")

library(BSgenome.Hsapiens.UCSC.hg19)

exampleSwitchListAnalyzed <- analyzeORF(exampleSwitchListAnalyzed, genomeObject = Hsapiens, showProgress=FALSE)

head(exampleSwitchListAnalyzed$orfAnalysis, 3)


## ------------------------------------------------------------------------
### This example relies on the example data from the 'Analyzing Open Reading Frames' section above 

exampleSwitchListAnalyzed <- extractSequence(
    exampleSwitchListAnalyzed, 
    genomeObject = Hsapiens,
    pathToOutput = '<insert_path>',
    writeToFile=FALSE # to avoid output when running this example data
)

head(exampleSwitchListAnalyzed$ntSequence,2)

head(exampleSwitchListAnalyzed$aaSequence,2)

## ------------------------------------------------------------------------
### Load test data (maching the external sequence analysis results)
data("exampleSwitchListIntermediary")
exampleSwitchListIntermediary

### Add PFAM analysis
exampleSwitchListAnalyzed <- analyzePFAM(
    switchAnalyzeRlist   = exampleSwitchListIntermediary,
    pathToPFAMresultFile = system.file("extdata/pfam_results.txt", package = "IsoformSwitchAnalyzeR"),
    showProgress=FALSE
    )

### Add SignalP analysis
exampleSwitchListAnalyzed <- analyzeSignalP(
    switchAnalyzeRlist       = exampleSwitchListAnalyzed,
    pathToSignalPresultFile = system.file("extdata/signalP_results.txt", package = "IsoformSwitchAnalyzeR")
    )
    
### Add CPAT analysis
exampleSwitchListAnalyzed <- analyzeCPAT(
    switchAnalyzeRlist   = exampleSwitchListAnalyzed,
    pathToCPATresultFile = system.file("extdata/cpat_results.txt", package = "IsoformSwitchAnalyzeR"),
    codingCutoff         = 0.725, # the coding potential cutoff we suggested for human
    removeNoncodinORFs   = TRUE   # because ORF was predicted de novo
    )

exampleSwitchListAnalyzed

## ------------------------------------------------------------------------
### This example relies on the example data from the 'Importing External Sequences Analysis' section above 

exampleSwitchListAnalyzed <- analyzeIntronRetention(exampleSwitchListAnalyzed, quiet=TRUE)

### overview of number of intron retentions (IR)
table(exampleSwitchListAnalyzed$isoformFeatures$IR)

## ------------------------------------------------------------------------
### This example relies on the example data from the 'Predicting Intron Retentions' section above 

# the consequences highlighted in the text above
consequencesOfInterest <- c('intron_retention','coding_potential','NMD_status','domains_identified','ORF_seq_similarity')

exampleSwitchListAnalyzed <- analyzeSwitchConsequences(
    exampleSwitchListAnalyzed,
    consequencesToAnalyze = consequencesOfInterest, 
    dIFcutoff = 0.4, # very high cutoff for fast runtimes
    showProgress=FALSE
)

extractSwitchSummary(exampleSwitchListAnalyzed, dIFcutoff = 0.4, filterForConsequences = FALSE)
extractSwitchSummary(exampleSwitchListAnalyzed, dIFcutoff = 0.4, filterForConsequences = TRUE)

## ------------------------------------------------------------------------
### Load already analyzed data
data("exampleSwitchListAnalyzed")

extractSwitchSummary(exampleSwitchListAnalyzed, filterForConsequences = FALSE)
extractSwitchSummary(exampleSwitchListAnalyzed, filterForConsequences = TRUE)

## ------------------------------------------------------------------------
### Load already analyzed data
data("exampleSwitchListAnalyzed")

### Lets reduce the switchAnalyzeRlist to only one condition
exampleSwitchListAnalyzedSubset <- subsetSwitchAnalyzeRlist(
    exampleSwitchListAnalyzed, 
    exampleSwitchListAnalyzed$isoformFeatures$condition_1 == 'iPS' &
    exampleSwitchListAnalyzed$isoformFeatures$condition_2 == 'hESC'
)
exampleSwitchListAnalyzedSubset

### Extract top 2 switching genes (by dIF values)
extractTopSwitches(
    exampleSwitchListAnalyzedSubset, 
    filterForConsequences = TRUE, 
    n = 2, 
    sortByQvals = FALSE
)

### Extract top 2 switching genes (by q-value)
extractTopSwitches(
    exampleSwitchListAnalyzedSubset, 
    filterForConsequences = TRUE, 
    n = 2, 
    sortByQvals = TRUE
)

## ------------------------------------------------------------------------
### Extract data.frame with all switching isoforms
switchingIso <- extractTopSwitches( 
    exampleSwitchListAnalyzedSubset, 
    filterForConsequences = TRUE, 
    n = NA,                  # n=NA: all features are returned
    extractGenes = FALSE,    # when FALSE isoforms are returned
    sortByQvals = TRUE
)

subset(switchingIso, gene_name == 'TNFRSF1B')

## ---- fig.width=12, fig.height=7-----------------------------------------
switchPlot(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B')

## ---- fig.width=12, fig.height=6-----------------------------------------
### Load the large example dataset
data("exampleSwitchListAnalyzed")

### Extract summary
consequenceSummary <- extractConsequenceSummary(
    exampleSwitchListAnalyzed, 
    returnResult = TRUE, 
    plotGenes = TRUE
)

subset(consequenceSummary, featureCompared=='Intron retention')

## ---- fig.width=12, fig.height=6-----------------------------------------
symmaryStatistics <- extractGenomeWideAnalysis(
    switchAnalyzeRlist = exampleSwitchListAnalyzed,
    featureToExtract = 'isoformUsage', # default - alternatives are 'isoformExp', 'geneExp' and 'all'
    plot=TRUE,
    returnResult = TRUE
)

## ---- fig.width=12, fig.height=3-----------------------------------------
### Load already analyzed data
data("exampleSwitchListAnalyzed")

switchPlotTranscript(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B')

## ---- fig.width=3, fig.height=3------------------------------------------
switchPlotGeneExp (exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B', condition1='iPS', condition2='hESC')

## ---- fig.width=4, fig.height=3------------------------------------------
switchPlotIsoExp(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B', condition1='iPS', condition2='hESC')

## ---- fig.width=4, fig.height=3------------------------------------------
switchPlotIsoUsage(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B', condition1='iPS', condition2='hESC')

## ------------------------------------------------------------------------
data("exampleSwitchListIntermediary")
ifMatrix <- extractExpressionMatrix(exampleSwitchListAnalyzed)

head(ifMatrix)

dim(ifMatrix)


## ---- fig.width=4, fig.height=3------------------------------------------
# correlation plot
ggplot(melt(cor(ifMatrix)), aes(x=Var1, y=Var2, fill=value)) + 
    geom_tile() +
    scale_fill_continuous('Correlation') +
    labs(x='Condition', y='Condition') +
    theme_minimal()

## ---- fig.width=8, fig.height=4------------------------------------------
library(pheatmap)
pheatmap(t(ifMatrix), show_colnames=FALSE)

## ---- results = "hide", message = FALSE----------------------------------
### Load example data
data("exampleSwitchListAnalyzed")

### Reduce datasize for fast runtime
randomGenes <- sample(unique(exampleSwitchListAnalyzed$isoformFeatures$gene_id), size = 40)
exampleSwitchListAnalyzedSubset <- subsetSwitchAnalyzeRlist(
    exampleSwitchListAnalyzed,
    exampleSwitchListAnalyzed$isoformFeatures$gene_id %in% randomGenes
)

### analyze the biological mechanismes
bioMechanismeAnalysis <- analyzeSwitchConsequences(
    exampleSwitchListAnalyzedSubset, 
    consequencesToAnalyze = c('tss','tts','intron_structure'),
    showProgress = FALSE
)$switchConsequence # only the consequences are interesting here

### subset to those with differences
bioMechanismeAnalysis <- bioMechanismeAnalysis[which(bioMechanismeAnalysis$isoformsDifferent),]

### extract the consequences of interest alerady stored in the switchAnalyzeRlist
myConsequences <- exampleSwitchListAnalyzedSubset$switchConsequence
myConsequences <- myConsequences[which(myConsequences$isoformsDifferent),]
myConsequences$isoPair <- paste(myConsequences$isoformUpregulated, myConsequences$isoformDownregulated) # id for specific iso comparison

### Obtain the mechanisms of the isoform switches with consequences
bioMechanismeAnalysis$isoPair <- paste(bioMechanismeAnalysis$isoformUpregulated, bioMechanismeAnalysis$isoformDownregulated)
bioMechanismeAnalysis <- bioMechanismeAnalysis[which(bioMechanismeAnalysis$isoPair %in% myConsequences$isoPair),]  # id for specific iso comparison

## ---- fig.width=3.5, fig.height=3.5--------------------------------------
### Create list with the isoPair ids for each consequencee
AS   <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'intron_structure')]
aTSS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'tss'             )]
aTTS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'tts'             )]

mechList <- list(
    AS=AS,
    aTSS=aTSS,
    aTTS=aTTS
)

### Create venn diagram
library(VennDiagram)
myVenn <- venn.diagram(mechList, col='transparent', alpha=0.4, fill=brewer.pal(n=3,name='Dark2'), filename=NULL)

### Plot the venn diagram
grid.newpage() ; grid.draw(myVenn)


## ------------------------------------------------------------------------
sessionInfo()