Volcano plots represent a useful way to visualise the results of differential expression analyses. Here, we present a highly-configurable function that produces publication-ready volcano plots. EnhancedVolcano (Blighe, Rana, and Lewis 2018) will attempt to fit as many labels in the plot window as possible, thus avoiding ‘clogging’ up the plot with labels that could not otherwise have been read. Other functionality allows the user to identify up to 5 different types of attributes in the same plot space via colour, shape, size, encircling, and shade parameter configurations.
if (!requireNamespace('BiocManager', quietly = TRUE))
install.packages('BiocManager')
BiocManager::install('EnhancedVolcano')
## Bioconductor version 3.19 (BiocManager 1.30.23), R 4.4.0 (2024-04-24)
## Warning: package(s) not installed when version(s) same as or greater than current; use
## `force = TRUE` to re-install: 'EnhancedVolcano'
## Old packages: 'abind', 'afex', 'ape', 'arrow', 'backports', 'bayestestR', 'BH',
## 'bigD', 'BiocManager', 'Biostrings', 'bit', 'bit64', 'bitops', 'BMA', 'boot',
## 'broom', 'car', 'caTools', 'class', 'cluster', 'collapse', 'commonmark',
## 'correlation', 'corrplot', 'cpp11', 'crayon', 'credentials', 'data.table',
## 'datawizard', 'DBI', 'DEoptimR', 'Deriv', 'doBy', 'dotCall64', 'downlit',
## 'dqrng', 'Ecfun', 'effectsize', 'ensembldb', 'faraway', 'fastDummies',
## 'fastmatch', 'fitdistrplus', 'FNN', 'foreign', 'future', 'future.apply',
## 'GenomeInfoDb', 'GenomicRanges', 'gert', 'ggblanket', 'gginnards', 'ggiraph',
## 'ggnewscale', 'ggrepel', 'ggstatsplot', 'gmp', 'gplots', 'gt', 'gtable',
## 'HDF5Array', 'hdf5r', 'hexbin', 'httr2', 'igraph', 'inline', 'insight',
## 'IRanges', 'KEGGREST', 'KernSmooth', 'labelled', 'later', 'lme4', 'locfit',
## 'lubridate', 'MASS', 'Matrix', 'matrixStats', 'microbenchmark', 'minqa',
## 'mvtnorm', 'nlme', 'nloptr', 'nnet', 'parallelly', 'parameters', 'patchwork',
## 'performance', 'pillar', 'pkgbuild', 'pkgdown', 'pkgload', 'plm',
## 'PMCMRplus', 'polyclip', 'poweRlaw', 'processx', 'profvis', 'progressr',
## 'promises', 'ps', 'psych', 'purrr', 'quantreg', 'qvcalc', 'R.oo', 'ragg',
## 'RANN', 'raster', 'Rcpp', 'RcppArmadillo', 'RcppEigen', 'RcppRoll', 'RCurl',
## 'reactR', 'renv', 'reprex', 'reticulate', 'rjson', 'Rmpfr', 'roxygen2',
## 'rpart', 'RSpectra', 'RSQLite', 'rstudioapi', 'S4Arrays', 'S4Vectors',
## 'sessioninfo', 'Seurat', 'shiny', 'Signac', 'sp', 'spam', 'SparseArray',
## 'SparseM', 'spatial', 'spatstat.data', 'spatstat.explore', 'spatstat.geom',
## 'spatstat.random', 'spatstat.sparse', 'spatstat.utils', 'statsExpressions',
## 'SuppDists', 'survival', 'survminer', 'systemfonts', 'terra', 'testthat',
## 'textshaping', 'usethis', 'uuid', 'V8', 'vioplot', 'waldo', 'withr', 'XML',
## 'zip'
Note: to install development version:
if (!("EnhancedVolcano" %in% installed.packages())) {
devtools::install_github('kevinblighe/EnhancedVolcano')
}
library(EnhancedVolcano)
## Loading required package: ggplot2
## Loading required package: ggrepel
if (!("canvasXpress" %in% installed.packages())) {
devtools::install_github('neuhausi/canvasXpress')
}
#devtools::install_local("~/git/canvas/R/canvasXpress.tar.gz", build_manual = TRUE, upgrade = "always")
library(canvasXpress)
For this example, we will follow the tutorial (from Section 3.1) of RNA-seq workflow: gene-level exploratory analysis and differential expression. Specifically, we will load the ‘airway’ data, where different airway smooth muscle cells were treated with dexamethasone.
library(airway)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:GenomicRanges':
##
## subtract
data('airway')
airway$dex %<>% relevel('untrt')
Annotate the Ensembl gene IDs to gene symbols:
ens <- rownames(airway)
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
##
symbols <- mapIds(org.Hs.eg.db, keys = ens, column = c('SYMBOL'), keytype = 'ENSEMBL')
## 'select()' returned 1:many mapping between keys and columns
symbols <- symbols[!is.na(symbols)]
symbols <- symbols[match(rownames(airway), names(symbols))]
rownames(airway) <- symbols
keep <- !is.na(rownames(airway))
airway <- airway[keep,]
Conduct differential expression using DESeq2 in order to create 2 sets of results:
library('DESeq2')
dds <- DESeqDataSet(airway, design = ~ cell + dex)
## Warning in DESeqDataSet(airway, design = ~cell + dex): 1337 duplicate rownames
## were renamed by adding numbers
dds <- DESeq(dds, betaPrior=FALSE)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
res <- results(dds, contrast = c('dex','trt','untrt'))
res <- lfcShrink(dds, contrast = c('dex','trt','untrt'), res=res, type = 'normal')
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
##
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
For the most basic volcano plot, only a single data-frame, data-matrix, or tibble of test results is required, containing point labels, log2FC, and adjusted or unadjusted P values. The default cut-off for log2FC is >|2|; the default cut-off for P value is 10e-6.
v <- EnhancedVolcano(res,
lab = rownames(res),
x = 'log2FoldChange',
y = 'pvalue')
v
canvasXpress(v, width = 700, height = 500)