## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 6, fig.height = 4.5 ) ## ----eval = FALSE------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("zitools") ## ----message = FALSE---------------------------------------------------------- library(zitools) ## ----message = FALSE---------------------------------------------------------- library(phyloseq) library(DESeq2) library(tidyverse) library(microbiome) ## ----Dataset------------------------------------------------------------------ data("peerj32") phyloseq <- peerj32[["phyloseq"]] sample_data(phyloseq)$time <- factor(sample_data(phyloseq)$time) ## ----echo = FALSE------------------------------------------------------------- str(phyloseq, max.level = 3) ## ----ziMain------------------------------------------------------------------- Zi <- ziMain(phyloseq) print(Zi) ## ----------------------------------------------------------------------------- mean(Zi) sd(Zi) var(Zi) median(Zi) quantile(Zi) ## ----boxplot, echo=TRUE------------------------------------------------------- boxplot(log2p(Zi), xlab = "samples", ylab = "log2(count+1)", main = "ZI considered") boxplot(log2p(inputcounts(Zi)), xlab = "samples", ylab = "log2(count+1)", main = "ZI not considered") ## ----repeat plot-------------------------------------------------------------- i <- 1 repeat { Zi <- resample_deinflatedcounts(Zi) boxplot(log2p(Zi), xlab = "samples", ylab = "log2(count+1)", main = paste("Iteration", i)) i = i+1 if(i==6){ break } } ## ----DDS---------------------------------------------------------------------- DDS <- zi2deseq2(Zi, ~time) DDS$Subject <- relevel(DDS$time, ref = "1") DDS <- DESeq(DDS, test = "Wald", fitType = "local", sfType = "poscounts") ## ----echo=TRUE---------------------------------------------------------------- (result <- results(DDS, cooksCutoff = FALSE)) result <- as.data.frame(result) ## ----df, echo=FALSE, eval=TRUE, warning = FALSE------------------------------- result_df<- cbind(as(result, "data.frame"), as(tax_table(phyloseq)[rownames(result), ], "matrix")) result_df <- result_df %>% rownames_to_column(var = "OTU") %>% arrange(padj) #drop the rows with NA result_df <- result_df[complete.cases(result_df), ] result_df$diffexpressed[result_df$log2FoldChange > 1 & result_df$pvalue < 0.05] <- result_df$Genus result_df$diffexpressed[result_df$log2FoldChange < -1 & result_df$pvalue < 0.05] <- result_df$Genus result_df <- data.frame("OTU" = result_df$OTU, "log2FoldChange" = result_df$log2FoldChange, "pvalue" = result_df$pvalue, "Genus" = result_df$diffexpressed) ## ----echo = TRUE, warning = FALSE, fig.width=10------------------------------- print(ggplot(data=result_df, aes(x=log2FoldChange, y=-log10(pvalue), color = Genus)) + geom_point()+ theme_minimal()+ xlim(-6,6)+ ylim(-0.5,4)+ ylab("-log10(pvalue)\n")+ xlab("\nLog2FoldChange")+ geom_vline(xintercept=c(-1, 1), col="black") + geom_hline(yintercept=-log10(0.05), col="black")+ theme(text = element_text(size = 20))+ theme(panel.spacing.x = unit(2, "lines"))) ## ----eval = TRUE, echo = FALSE------------------------------------------------ Zi2 <- Zi mat <- deinflatedcounts(Zi2) mat[mat > 500] <- 500 deinflatedcounts(Zi2) <- mat ## ----heatmap------------------------------------------------------------------ MissingValueHeatmap(Zi2, xlab = "Sample") ## ----results='hide'----------------------------------------------------------- PCA <- princomp(covmat = cor(t(Zi)), cor = FALSE) centered_data <- (inputcounts(t(Zi))-colMeans2(t(Zi)))/sqrt(colVars(t(Zi))) loadings <- PCA$loadings scores <- centered_data %*% loadings PCA$scores <- scores df_PCA <- data.frame("PC1" = PCA[["scores"]][,1], "PC2" = PCA[["scores"]][,2], "group" = sample_data(Zi)$group) ## ----plot PCA, echo=FALSE----------------------------------------------------- print(ggplot(df_PCA, aes(x = PC1, y = PC2, color = group))+ geom_point()+ xlim(-200,100)+ ylim(-250,260)+ xlab("PC1 14.6%")+ ylab("PC2 9.9%")) ## ----------------------------------------------------------------------------- new_phyloseq <- zi2phyloseq(Zi) ## ----------------------------------------------------------------------------- str(otu_table(new_phyloseq), max.level = 3) ## ----warning=FALSE------------------------------------------------------------ subset <- subset_taxa(new_phyloseq, Phylum=="Firmicutes") subset <- prune_taxa(names(sort(taxa_sums(subset),TRUE)[1:300]), subset) (plot_heatmap(subset, )) ## ----------------------------------------------------------------------------- plot_richness(new_phyloseq, measures = c("Chao1"), color = sample_data(new_phyloseq)$group)+ theme(axis.text.x = element_blank()) ## ----------------------------------------------------------------------------- sessionInfo()