## ----style, echo = FALSE, results = 'asis'--------------------------------- BiocStyle::markdown() ## ----global_options, include=FALSE-------------------------------------------------------------------------- knitr::opts_chunk$set(fig.width=10, fig.height=7, warning=FALSE, message=FALSE) options(width=110) set.seed(123) ## ----------------------------------------------------------------------------------------------------------- ## load ABAEnrichment package library(ABAEnrichment) ## create input vector with candidate genes gene_ids = c('NCAPG', 'APOL4', 'NGFR', 'NXPH4', 'C21orf59', 'CACNG2', 'AGTR1', 'ANO1', 'BTBD3', 'MTUS1', 'CALB1', 'GYG1', 'PAX2') is_candidate = rep(1, length(gene_ids)) input_hyper = data.frame(gene_ids, is_candidate) head(input_hyper) ## ---- eval=FALSE-------------------------------------------------------------------------------------------- # ## run enrichment analyses with default parameters # ## for the adult and developing human brain # res_adult = aba_enrich(input_hyper, dataset='adult') # res_devel = aba_enrich(input_hyper, dataset='5_stages') ## ----results='hide'----------------------------------------------------------------------------------------- ## run enrichment analysis with less cutoffs and randomsets ## to save computation time res_devel = aba_enrich(input_hyper, dataset='5_stages', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100) ## ----------------------------------------------------------------------------------------------------------- ## extract first element from the output list, which contains the statistics fwers_devel = res_devel[[1]] ## see results for the brain regions with highest enrichment ## for children (3-11 yrs, age_category 3) fwers_3 = fwers_devel[fwers_devel[,1]==3, ] head(fwers_3) ## ----------------------------------------------------------------------------------------------------------- res_devel[2:3] ## ----eval=FALSE--------------------------------------------------------------------------------------------- # ## run enrichment analysis, with randomsets dependent on gene length # res_len = aba_enrich(input_hyper, gene_len=TRUE) # ## run the same analysis using gene coordinates # ## from GRCh38 instead of the default GRCh37 # res_len_grch38 = aba_enrich(input_hyper, gene_len=TRUE, ref_genome='grch38') ## ----------------------------------------------------------------------------------------------------------- ## assign random scores to the genes used above scores = sample(1:50, length(gene_ids)) input_wicox = data.frame(gene_ids, scores) head(input_wicox) ## ---- results='hide'---------------------------------------------------------------------------------------- ## test for enrichment of expressed genes with high scores in the adult brain ## using the Wilcoxon rank-sum test res_wilcox = aba_enrich(input_wicox, test='wilcoxon', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100) ## ----------------------------------------------------------------------------------------------------------- head(res_wilcox[[1]]) ## ----------------------------------------------------------------------------------------------------------- ## create a toy example dataset with two counts per gene high_A_genes = c('RFFL', 'NTS', 'LIPE', 'GALNT6', 'GSN', 'BTBD16', 'CERS2') low_A_genes = c('GDA', 'ENC1', 'EGR4', 'VIPR1', 'DOC2A', 'OASL', 'FRY', 'NAV3') A_counts = c(sample(20:30, length(high_A_genes)), sample(5:15, length(low_A_genes))) B_counts = c(sample(5:15, length(high_A_genes)), sample(20:30, length(low_A_genes))) input_binom = data.frame(gene_ids=c(high_A_genes, low_A_genes), A_counts, B_counts) head(input_binom) ## ----------------------------------------------------------------------------------------------------------- ## run binomial test res_binom = aba_enrich(input_binom, cutoff_quantiles=c(0.2,0.9), test='binomial', n_randsets=100, silent=TRUE) head(res_binom[[1]]) ## ----------------------------------------------------------------------------------------------------------- ## create a toy example with four counts per gene high_substi_genes = c('RFFL', 'NTS', 'LIPE', 'GALNT6', 'GSN', 'BTBD16', 'CERS2') low_substi_genes = c('ENC1', 'EGR4', 'NPTX1', 'DOC2A', 'OASL', 'FRY', 'NAV3') subs_non_syn = c(sample(5:15, length(high_substi_genes), replace=TRUE), sample(0:5, length(low_substi_genes), replace=TRUE)) subs_syn = sample(5:15, length(c(high_substi_genes, low_substi_genes)), replace=TRUE) vari_non_syn = c(sample(0:5, length(high_substi_genes), replace=TRUE), sample(0:10, length(low_substi_genes), replace=TRUE)) vari_syn = sample(5:15, length(c(high_substi_genes, low_substi_genes)), replace=TRUE) input_conti = data.frame(gene_ids=c(high_substi_genes, low_substi_genes), subs_non_syn, subs_syn, vari_non_syn, vari_syn) head(input_conti) ## the corresponding contingency table for the first gene would be: matrix(input_conti[1, 2:5], ncol=2, dimnames=list(c('non-synonymous', 'synonymous'), c('substitution','variable'))) ## ---- results='hide'---------------------------------------------------------------------------------------- res_conti = aba_enrich(input_conti, test='contingency', cutoff_quantiles=c(0.7,0.8,0.9), n_randset=100) ## ----------------------------------------------------------------------------------------------------------- head(res_conti[[1]]) ## ----------------------------------------------------------------------------------------------------------- ## create input vector with a candidate region on chromosome 8 ## and background regions on chromosome 7, 8 and 9 regions = c('8:82000000-83000000', '7:1300000-56800000', '7:74900000-148700000', '8:7400000-44300000', '8:47600000-146300000', '9:0-39200000', '9:69700000-140200000') is_candidate = c(1, rep(0,6)) input_region = data.frame(regions, is_candidate) ## ----results='hide'----------------------------------------------------------------------------------------- ## run enrichment analysis for the adult human brain res_region = aba_enrich(input_region, dataset='adult', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100) ## ----------------------------------------------------------------------------------------------------------- ## look at the results from the enrichment analysis fwers_region = res_region[[1]] head(fwers_region) ## see which genes are located in the candidate region input_genes = res_region[[2]] candidate_genes = input_genes[input_genes[,2]==1,] candidate_genes ## ----------------------------------------------------------------------------------------------------------- ## get expression data for the first 5 brain regions ## from the last aba_enrich-analysis top_regions = fwers_region[1:5, 'structure_id'] top_regions expr = get_expression(top_regions, background=FALSE) head(expr) ## ---- eval=FALSE-------------------------------------------------------------------------------------------- # ## get expression data independent from previous aba_enrich analysis # regions = c('Allen:12926', 'Allen:4738', 'Allen:4671', 'Allen:12909') # gene_ids = c('CHMP4C', 'FABP12', 'FABP4', 'FABP5', 'FABP9', 'IMPA1', # 'PAG1', 'PMP2', 'SLC10A5', 'SNX16', 'ZFAND1') # expr2 = get_expression(regions, gene_ids=gene_ids, dataset='adult', # background=FALSE) ## ----------------------------------------------------------------------------------------------------------- ## get expression data for the first 5 brain regions ## from the last aba_enrich-analysis top_regions = fwers_region[1:5, 'structure_id'] plot_expression(top_regions, background=FALSE) ## ----------------------------------------------------------------------------------------------------------- ## plot the same expression data without dendrogram plot_expression(top_regions, dendro=FALSE, background=FALSE) ## ----------------------------------------------------------------------------------------------------------- ## plot expression of some genes for the frontal neocortex (Allen:10161) ## in age category 3 (children, 3-11 yrs) gene_ids = c('ENSG00000157764', 'ENSG00000163041', 'ENSG00000182158', 'ENSG00000147889', 'ENSG00000103126', 'ENSG00000184634') plot_expression('Allen:10161', gene_ids=gene_ids, dataset='5_stages', age_category=3) ## ----------------------------------------------------------------------------------------------------------- ## get IDs of the substructures of the frontal neocortex (Allen:10161) ## for which expression data are available subs = get_sampled_substructures('Allen:10161') subs ## get the full name of those substructures get_name(subs) ## get the superstructures of the frontal neocortex (from general to special) supers = get_superstructures('Allen:10161') supers ## get the full names of those superstructures get_name(supers) ## ----------------------------------------------------------------------------------------------------------- ## get structure IDs of brain regions that contain 'accumbens' in their name get_id('accumbens') ## get structure IDs of brain regions that contain 'telencephalon' in their name get_id('telencephalon') ## ----------------------------------------------------------------------------------------------------------- ## get all brain regions included in ABAEnrichment together with thier IDs all_regions = get_id('') head(all_regions) ## ---- results='hide'---------------------------------------------------------------------------------------- ## run an enrichment analysis with 7 candidate and 7 background genes ## for the developing brain gene_ids = c('FOXJ1', 'NTS', 'LTBP1', 'STON2', 'KCNJ6', 'AGT', 'ANO3', 'TTR', 'ELAVL4', 'BEAN1', 'TOX', 'EPN3', 'PAX2', 'KLHL1') is_candidate = rep(c(1,0), each=7) input_hyper = data.frame(gene_ids, is_candidate) res = aba_enrich(input_hyper, dataset='5_stages', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100) ## ----------------------------------------------------------------------------------------------------------- head(res[[1]]) ## see which candidate genes are annotated to brain regions with a FWER < 0.01 anno = get_annotated_genes(res, fwer_threshold=0.1) anno ## ----------------------------------------------------------------------------------------------------------- ## find out which of the above genes have expression ## above the 70% and 90% expression-cutoff ## in the basal ganglia of the adult human brain (Allen:4276) anno2 = get_annotated_genes(structure_ids='Allen:4276', dataset='adult', cutoff_quantiles=c(0.7,0.9), genes=gene_ids) anno2 ## ----------------------------------------------------------------------------------------------------------- ## use previously defined input genes dataframe head(input_hyper) ## ----results='hide'----------------------------------------------------------------------------------------- ## run enrichment analysis with developmental effect score res_dev_effect = aba_enrich(input_hyper, dataset='dev_effect', cutoff_quantiles=c(0.5,0.7,0.9), n_randsets=100) ## ----------------------------------------------------------------------------------------------------------- ## see the 5 brain regions with the lowest FWERs top_regions = res_dev_effect[[1]][1:5, ] top_regions ## ----------------------------------------------------------------------------------------------------------- ## plot developmental effect score of the 5 brain regions with the lowest FWERs plot_expression(top_regions[ ,'structure_id']) ## ----------------------------------------------------------------------------------------------------------- sessionInfo()