## ----style, echo = FALSE, results = 'asis'---------------------------------
BiocStyle::markdown()

## ----global_options, include=FALSE--------------------------------------------------------------------------
knitr::opts_chunk$set(fig.width=10, fig.height=7, warning=FALSE, message=FALSE)
options(width=110)
set.seed(123)

## -----------------------------------------------------------------------------------------------------------
## load ABAEnrichment package
library(ABAEnrichment)
## create input vector with candidate genes
gene_ids = c('NCAPG', 'APOL4', 'NGFR', 'NXPH4', 'C21orf59', 'CACNG2', 'AGTR1',
    'ANO1', 'BTBD3', 'MTUS1', 'CALB1', 'GYG1', 'PAX2')
is_candidate = rep(1, length(gene_ids))
input_hyper = data.frame(gene_ids, is_candidate)
head(input_hyper)

## ---- eval=FALSE--------------------------------------------------------------------------------------------
#  ## run enrichment analyses with default parameters
#  ## for the adult and developing human brain
#  res_adult = aba_enrich(input_hyper, dataset='adult')
#  res_devel = aba_enrich(input_hyper, dataset='5_stages')

## ----results='hide'-----------------------------------------------------------------------------------------
## run enrichment analysis with less cutoffs and randomsets
## to save computation time
res_devel = aba_enrich(input_hyper, dataset='5_stages',
    cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
## extract first element from the output list, which contains the statistics
fwers_devel = res_devel[[1]]
## see results for the brain regions with highest enrichment
## for children (3-11 yrs, age_category 3)
fwers_3 = fwers_devel[fwers_devel[,1]==3, ]
head(fwers_3)

## -----------------------------------------------------------------------------------------------------------
res_devel[2:3]

## ----eval=FALSE---------------------------------------------------------------------------------------------
#  ## run enrichment analysis, with randomsets dependent on gene length
#  res_len = aba_enrich(input_hyper, gene_len=TRUE)
#  ## run the same analysis using gene coordinates
#  ## from GRCh38 instead of the default GRCh37
#  res_len_grch38 = aba_enrich(input_hyper, gene_len=TRUE, ref_genome='grch38')

## -----------------------------------------------------------------------------------------------------------
## assign random scores to the genes used above
scores = sample(1:50, length(gene_ids))
input_wicox = data.frame(gene_ids, scores)
head(input_wicox)

## ---- results='hide'----------------------------------------------------------------------------------------
## test for enrichment of expressed genes with high scores in the adult brain
## using the Wilcoxon rank-sum test
res_wilcox = aba_enrich(input_wicox, test='wilcoxon',
    cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
head(res_wilcox[[1]])

## -----------------------------------------------------------------------------------------------------------
## create a toy example dataset with two counts per gene
high_A_genes = c('RFFL', 'NTS', 'LIPE', 'GALNT6', 'GSN', 'BTBD16', 'CERS2')
low_A_genes = c('GDA', 'ENC1', 'EGR4', 'VIPR1', 'DOC2A', 'OASL', 'FRY', 'NAV3')
A_counts = c(sample(20:30, length(high_A_genes)),
    sample(5:15, length(low_A_genes)))
B_counts = c(sample(5:15, length(high_A_genes)),
    sample(20:30, length(low_A_genes)))
input_binom = data.frame(gene_ids=c(high_A_genes, low_A_genes),
    A_counts, B_counts)
head(input_binom)

## -----------------------------------------------------------------------------------------------------------
## run binomial test
res_binom = aba_enrich(input_binom, cutoff_quantiles=c(0.2,0.9),
    test='binomial', n_randsets=100, silent=TRUE)
head(res_binom[[1]])

## -----------------------------------------------------------------------------------------------------------
## create a toy example with four counts per gene
high_substi_genes = c('RFFL', 'NTS', 'LIPE', 'GALNT6', 'GSN', 'BTBD16', 'CERS2')
low_substi_genes = c('ENC1', 'EGR4', 'NPTX1', 'DOC2A', 'OASL', 'FRY', 'NAV3')
subs_non_syn = c(sample(5:15, length(high_substi_genes), replace=TRUE),
    sample(0:5, length(low_substi_genes), replace=TRUE))
subs_syn = sample(5:15, length(c(high_substi_genes, low_substi_genes)),
    replace=TRUE)
vari_non_syn = c(sample(0:5, length(high_substi_genes), replace=TRUE),
    sample(0:10, length(low_substi_genes), replace=TRUE))
vari_syn = sample(5:15, length(c(high_substi_genes, low_substi_genes)),
    replace=TRUE)
input_conti = data.frame(gene_ids=c(high_substi_genes, low_substi_genes),
    subs_non_syn, subs_syn, vari_non_syn, vari_syn)
head(input_conti)

## the corresponding contingency table for the first gene would be:
matrix(input_conti[1, 2:5], ncol=2, dimnames=list(c('non-synonymous',
    'synonymous'), c('substitution','variable')))

## ---- results='hide'----------------------------------------------------------------------------------------
res_conti = aba_enrich(input_conti, test='contingency',
    cutoff_quantiles=c(0.7,0.8,0.9), n_randset=100)


## -----------------------------------------------------------------------------------------------------------
head(res_conti[[1]])

## -----------------------------------------------------------------------------------------------------------
## create input vector with a candidate region on chromosome 8
## and background regions on chromosome 7, 8 and 9
regions = c('8:82000000-83000000', '7:1300000-56800000',
    '7:74900000-148700000', '8:7400000-44300000', '8:47600000-146300000',
    '9:0-39200000', '9:69700000-140200000')
is_candidate = c(1, rep(0,6))
input_region = data.frame(regions, is_candidate)

## ----results='hide'-----------------------------------------------------------------------------------------
## run enrichment analysis for the adult human brain
res_region = aba_enrich(input_region, dataset='adult',
    cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
## look at the results from the enrichment analysis
fwers_region = res_region[[1]]
head(fwers_region)
## see which genes are located in the candidate region
input_genes = res_region[[2]]
candidate_genes = input_genes[input_genes[,2]==1,]
candidate_genes

## -----------------------------------------------------------------------------------------------------------
## get expression data for the first 5 brain regions
## from the last aba_enrich-analysis
top_regions = fwers_region[1:5, 'structure_id']
top_regions
expr = get_expression(top_regions, background=FALSE)
head(expr)

## ---- eval=FALSE--------------------------------------------------------------------------------------------
#  ## get expression data independent from previous aba_enrich analysis
#  regions = c('Allen:12926', 'Allen:4738', 'Allen:4671', 'Allen:12909')
#  gene_ids = c('CHMP4C', 'FABP12', 'FABP4', 'FABP5', 'FABP9', 'IMPA1',
#      'PAG1', 'PMP2', 'SLC10A5', 'SNX16', 'ZFAND1')
#  expr2 = get_expression(regions, gene_ids=gene_ids, dataset='adult',
#      background=FALSE)

## -----------------------------------------------------------------------------------------------------------
## get expression data for the first 5 brain regions
## from the last aba_enrich-analysis
top_regions = fwers_region[1:5, 'structure_id']
plot_expression(top_regions, background=FALSE)

## -----------------------------------------------------------------------------------------------------------
## plot the same expression data without dendrogram
plot_expression(top_regions, dendro=FALSE, background=FALSE)

## -----------------------------------------------------------------------------------------------------------
## plot expression of some genes for the frontal neocortex (Allen:10161)
## in age category 3 (children, 3-11 yrs)
gene_ids = c('ENSG00000157764', 'ENSG00000163041', 'ENSG00000182158',
    'ENSG00000147889', 'ENSG00000103126', 'ENSG00000184634')
plot_expression('Allen:10161', gene_ids=gene_ids, dataset='5_stages',
    age_category=3)

## -----------------------------------------------------------------------------------------------------------
## get IDs of the substructures of the frontal neocortex (Allen:10161)
## for which expression data are available
subs = get_sampled_substructures('Allen:10161')
subs
## get the full name of those substructures
get_name(subs)
## get the superstructures of the frontal neocortex (from general to special)
supers = get_superstructures('Allen:10161')
supers
## get the full names of those superstructures
get_name(supers)

## -----------------------------------------------------------------------------------------------------------
## get structure IDs of brain regions that contain 'accumbens' in their name
get_id('accumbens')
## get structure IDs of brain regions that contain 'telencephalon' in their name
get_id('telencephalon')

## -----------------------------------------------------------------------------------------------------------
## get all brain regions included in ABAEnrichment together with thier IDs
all_regions = get_id('')
head(all_regions)

## ---- results='hide'----------------------------------------------------------------------------------------
## run an enrichment analysis with 7 candidate and 7 background genes
## for the developing brain
gene_ids = c('FOXJ1', 'NTS', 'LTBP1', 'STON2', 'KCNJ6', 'AGT', 
  'ANO3', 'TTR', 'ELAVL4', 'BEAN1', 'TOX', 'EPN3', 'PAX2', 'KLHL1')
is_candidate = rep(c(1,0), each=7)
input_hyper = data.frame(gene_ids, is_candidate)
res = aba_enrich(input_hyper, dataset='5_stages',
    cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
head(res[[1]])
## see which candidate genes are annotated to brain regions with a FWER < 0.01
anno = get_annotated_genes(res, fwer_threshold=0.1)
anno

## -----------------------------------------------------------------------------------------------------------
## find out which of the above genes have expression
## above the 70% and 90% expression-cutoff
## in the basal ganglia of the adult human brain (Allen:4276)
anno2 = get_annotated_genes(structure_ids='Allen:4276', dataset='adult', 
  cutoff_quantiles=c(0.7,0.9), genes=gene_ids)
anno2

## -----------------------------------------------------------------------------------------------------------
## use previously defined input genes dataframe
head(input_hyper)

## ----results='hide'-----------------------------------------------------------------------------------------
## run enrichment analysis with developmental effect score
res_dev_effect = aba_enrich(input_hyper, dataset='dev_effect',
    cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100)

## -----------------------------------------------------------------------------------------------------------
## see the 5 brain regions with the lowest FWERs
top_regions = res_dev_effect[[1]][1:5, ]
top_regions

## -----------------------------------------------------------------------------------------------------------
## plot developmental effect score of the 5 brain regions with the lowest FWERs
plot_expression(top_regions[ ,'structure_id'])

## -----------------------------------------------------------------------------------------------------------
sessionInfo()