--- title: 'Reproducing Manuscript Figures' date: '`r Sys.Date()`' author: 'Helena Winata' output: rmarkdown::pdf_document vignette: > %\VignetteIndexEntry{Reproducing Manuscript Figures} %\VignetteEngine{rmarkdown} \usepackage[utf8]{inputenc} --- ```{r, echo=FALSE, message=FALSE} library(knitr); library(CancerEvolutionVisualization); opts_chunk$set(warning = FALSE, message = FALSE, fig.align = 'center'); if (!dir.exists('figures')) dir.create('figures'); ``` ```{r plot-params, echo=F} clone.col <- setNames( c('#a90000', '#e20000', '#f42404', '#df6b0b', '#f48004', '#f2a123', '#d7c454', '#90de3a', '#5ecf1c', '#37a400', '#158800', '#16a149', '#1ecfa9', '#05f8f2', '#00daff', '#00abff', '#007dff', '#004aff', '#1922ff', '#5703ff', '#8600ff', '#ab19ff', '#bd4cff', '#de56fc', '#f43ded', '#ff00d1'), c('MRCA', 1:25) ); spatial.col <- setNames( c('#A1D59C', '#BEA0CE', '#722A7F'), c('Primary', 'Shared', 'Metastasis') ); ``` ```{r echo=FALSE} load('data/SNV.rda'); load('data/multi.phylogeny.rda'); load('data/single.phylogeny.rda'); load('data/phylogeny.500.rda'); phy.500 <- phylogeny.500; # process multi phylogeny data rownames(multi.phylogeny) <- multi.phylogeny$label; multi.phylogeny[c('3', '11'), ] <- multi.phylogeny[c('11', '3'), ]; multi.phylogeny$spread[multi.phylogeny$parent == '3'] <- 1.3; multi.phylogeny$spread[multi.phylogeny$parent == '7'] <- 0.4; rownames(multi.phylogeny) <- multi.phylogeny$label; multi.phylogeny$node.col <- clone.col[multi.phylogeny$label]; # process single phylogeny data rownames(single.phylogeny) <- single.phylogeny$node.id <- single.phylogeny$label; colnames(single.phylogeny)[colnames(single.phylogeny) == 'CCF'] <- 'CP'; single.phylogeny$node.col <- clone.col[single.phylogeny$label]; ``` # Introduction This vignette demonstrates how to reproduce the figures presented in our manuscript 'CEV: A Tool for Standardized Cancer Evolution Visualization' using the CEV package. We will walk through the generation of each figure using example datasets provided with the package. The code snippets below show how to create customized phylogenetic trees, CCF heatmaps, and other visualizations that highlight different aspects of tumor evolution. Users can easily adapt these examples to their own datasets by following the same workflow and customization options. # Figure 1: Multi-sample Analysis ```{r echo=F} kable( x = head(snv), caption = 'Mutation-to-subclone assignment data.' ); kable( x = single.phylogeny, caption = 'Example phylogeny data for a single tumour sample.' ); ``` \pagebreak ## 1B. CCF Heatmap ```{r ccf-hm} plt <- create.cluster.heatmap( filename = 'figures/1B_CCF-heatmap.png', DF = snv, ccf.limits = c(0, 1), clone.colours = clone.col, ylab.label = 'Sample', ylab.cex = 1.2, yaxis.cex = 1, y.spacing = -0.2, height = 7, width = 11 ); ``` ```{r echo=FALSE, out.width='100%'} include_graphics('figures/1B_CCF-heatmap.png'); ``` \pagebreak ## 1C. CCF Summary Heatmap ```{r summary-hm} # Calculate median CCF per sample median.ccf <- aggregate(CCF ~ ID + clone.id, data = snv, FUN = median); names(median.ccf)[names(median.ccf) == 'CCF'] <- 'median.ccf.per.sample'; # Calculate total unique SNV.id per sample total.nsnv <- aggregate( SNV.id ~ ID + clone.id, data = snv, FUN = function(x) length(unique(x)) ); names(total.nsnv)[names(total.nsnv) == 'SNV.id'] <- 'total.nsnv'; # Merge the results back to the original data frame snv <- merge(snv, median.ccf, by = c('ID', 'clone.id')); snv <- merge(snv, total.nsnv, by = c('ID', 'clone.id')); create.ccf.summary.heatmap( filename = 'figures/1C_CCF-summary-heatmap.png', DF = snv, ccf.limits = c(0, 1), clone.colours = clone.col, clone.order = levels(snv$clone.id), sample.order = levels(snv$ID), y.spacing = c(-0.5, -2.5), xlab.axis.padding = c(0, 0, -2), plot.objects.heights = c(0.3, 1, 0.25), height = 7, width = 11 ); ``` ```{r echo=FALSE, out.width='90%'} include_graphics('figures/1C_CCF-summary-heatmap.png'); ``` \pagebreak ## 1D. Clone Genome Distribution Plot ```{r clone-genome-dist, message=FALSE} selected.clone <- c('1', subset(multi.phylogeny, spatial %in% c('Shared', 'Metastasis'))$label) sub.dt <- droplevels(snv[snv$clone.id %in% selected.clone, ]); create.clone.genome.distribution.plot( filename = 'figures/1D_clone-genome-distribution.png', snv.df = sub.dt, clone.order = levels(sub.dt$clone.id), clone.colours = clone.col, legend.y = 0.47, legend.x = 0.1, y.spacing = -0.5, alpha = 0.6, ylab.axis.padding = c(-1), width = 26, ); ``` ```{r echo=FALSE, out.width='100%'} include_graphics('figures/1D_clone-genome-distribution.png'); ``` \pagebreak ## 1G. Multi-sample Phylogenetic Tree ```{r multi-tree-prep, echo=F} phy.dt <- multi.phylogeny; phy.dt$spatial <- phy.dt$length.1 <- phy.dt$length.2 <- NULL; ``` ```{r multi-tree} create.phylogenetic.tree( filename = 'figures/1G_multi-sample-phylogeny.png', phy.dt, add.normal = TRUE, scale1 = 2, horizontal.padding = 1, height = 7, width = 7 ); ``` ```{r echo=FALSE, out.width='80%'} include_graphics('figures/1G_multi-sample-phylogeny.png'); ``` ## 1G. Single-sample Phylogenetic Tree ```{r single-prep, echo=F} single.dt <- single.phylogeny; single.dt$spatial <- single.dt$length.2 <- NULL; single.dt$length.1 <- 5; ``` ```{r single-tree} create.phylogenetic.tree( filename = 'figures/1H_single-sample-phylogeny.png', single.dt, add.normal = TRUE, polygon.scale = 2.5, height = 5, width = 3 ); ``` ```{r echo=FALSE, out.width='30%'} include_graphics('figures/1H_single-sample-phylogeny.png'); ``` \pagebreak # Figure 2: Phylogenetic tree features ## 2A. Sample-specific subclones ```{r} sample.list <- c('LN1', 'LN2', 'R2', 'R7'); cp.dt <- aggregate(CCF ~ clone.id + ID, data = snv, FUN = function(x) median(x, na.rm = TRUE)) names(cp.dt) <- c('label', 'ID', 'node.size'); for (s in sample.list) { temp.dt <- merge( x = phy.dt, y = cp.dt[cp.dt$ID == s, ], by = 'label', all.x = TRUE ); temp.dt$node.id <- temp.dt$label; idx <- !temp.dt$node.size > 0; temp.dt$edge.type.1[idx] <- 'dotted'; temp.dt$edge.width.1[idx] <- 1; temp.dt$node.size[idx] <- 0.8; temp.dt$node.col[idx] <- 'white'; temp.dt$node.label.col[idx] <- 'grey'; temp.dt$border.type[idx] <- 'dotted'; temp.dt$label[temp.dt$node.size <= 0.5] <- ''; create.phylogenetic.tree( filename = paste0('figures/2A_', s, '-phylogeny.png'), temp.dt, add.normal = TRUE, scale1 = 2, height = 7, width = 7 ); } ``` ```{r echo=F, results='asis'} # include_graphics(paste0('figures/2A_', sample.list, '-phylogeny.png')); png.list <- paste0('figures/2A_', sample.list, '-phylogeny.png'); # Generate Markdown table with each image in a cell cat('|', paste(paste0('![](', png.list, '){width=50%}'), collapse = ' | '), '|') ``` ## 2B. Annotated single-sample phylogenetic tree ```{r single-tree-annot} text.data <- data.frame( node = c('MRCA', 'MRCA', 4, 13, 'MRCA'), name = c('chr1q gain', 'chr12q loss', 'chr2p gain', 'MYC SNV', 'TP53 SNV'), col = c('blue', 'red', 'blue', 'black', 'black'), fontface = c(rep('plain', 3), 'bold', 'bold') ); create.phylogenetic.tree( filename = 'figures/2B_annot-single-sample-phylogeny.png', single.dt, add.normal = TRUE, polygon.colour.scheme = c(NA, single.dt$node.col), node.text = text.data, node.text.line.dist = 0.6, polygon.scale = 2.5, height = 5, width = 3 ); ``` ```{r echo=FALSE, out.width='40%'} include_graphics('figures/2B_annot-single-sample-phylogeny.png'); ``` \pagebreak ## 2C. Mutation timeline tree ```{r echo=F} # placeholder before implementing text positioning text.data <- data.frame( node = c('MRCA', 'MRCA', 4, 13, 'MRCA'), name = c('chr1q gain', 'chr12q loss\n', 'chr2p gain\n\n\n', 'MYC SNV', 'TP53 SNV\n\n\n\n\n\n\n\n'), col = c('blue', 'red', 'blue', 'black', 'black'), fontface = c(rep('plain', 3), 'bold', 'bold') ); single.dt <- single.phylogeny[, c('label', 'parent', 'length.1', 'CP')]; single.dt$edge.col.1 <- c('seagreen', 'darkorchid', 'maroon', 'maroon'); ``` ```{r timeline-tree, results='hide'} create.phylogenetic.tree( filename = 'figures/2C_mutation-timeline-phylogeny.png', single.dt, scale1 = 3, yaxis1.label = 'Number of SNVs', horizontal.padding = -0.3, node.text.line.dist = 0.2, node.text.cex = 1.2, add.normal = TRUE, node.text = text.data, polygon.scale = 2.5, height = 10, width = 6 ); ``` ```{r echo=FALSE, out.width='50%'} include_graphics('figures/2C_mutation-timeline-phylogeny.png'); ``` ## 2D. Radial mode with 2 edges ```{r echo=F} phy.dt <- multi.phylogeny; phy.dt$edge.col.1 <- 'steelblue'; phy.dt$edge.col.2 <- 'black'; phy.dt$edge.type.2 <- 'dotted'; kable( phy.dt[, !(names(phy.dt) %in% c('spatial', 'spread'))], caption = 'Tree dataframe for customizing 2 edges.' ); ``` ```{r radial, results='hide'} create.phylogenetic.tree( filename = 'figures/2D_radial-phylogeny.png', phy.dt, add.normal = TRUE, scale1 = 1.5, scale2 = 1.5, yaxis1.label = 'Number of SNVs', yaxis2.label = 'Number of CNAs', scale.bar = TRUE, scale.bar.coords = c(0.25, 0.85), ylab.cex = 1.4, yaxis.cex = 1.3, scale.size.1 = 20, height = 10, width = 8 ); ``` ```{r echo=FALSE, out.width='50%'} include_graphics('figures/2D_radial-phylogeny.png'); ``` \pagebreak ## 2E. Dendrogram mode with 2 edges ```{r dend} dend.dt <- phy.dt; dend.dt$mode <- 'dendrogram'; dend.dt$spread <- 1.3; create.phylogenetic.tree( filename = 'figures/2E_dendrogram-phylogeny.png', dend.dt, add.normal = TRUE, scale1 = 1.5, scale2 = 1.5, yaxis1.label = 'Number of SNVs', yaxis2.label = 'Number of CNAs', scale.bar = TRUE, scale.bar.coords = c(0.2, 0.85), ylab.cex = 1.4, yaxis.cex = 1.3, scale.size.1 = 20, height = 11, width = 8 ); ``` ```{r echo=FALSE, out.width='50%'} include_graphics('figures/2E_dendrogram-phylogeny.png'); ``` \pagebreak ## 2F. Mixed mode with 2 edges ```{r mixed} mixed.dt <- update.descendant.property( phy.dt, parent.id = c('3', '4'), property = 'mode', value = 'dendrogram' ); mixed.dt <- update.descendant.property( mixed.dt, parent.id = '7', property = 'mode', value = 'radial' ); mixed.dt$spread[mixed.dt$label == '4'] <- 1.7; create.phylogenetic.tree( filename = 'figures/2F_mixed-phylogeny.png', mixed.dt, add.normal = TRUE, scale1 = 1.5, scale2 = 1.5, yaxis1.label = 'Number of SNVs', yaxis2.label = 'Number of CNAs', scale.bar = TRUE, scale.bar.coords = c(0.25, 0.85), ylab.cex = 1.4, yaxis.cex = 1.3, scale.size.1 = 20, height = 11, width = 8 ); ``` ```{r echo=FALSE, out.width='50%'} include_graphics('figures/2F_mixed-phylogeny.png'); ``` \pagebreak ## 2G. 500 node phylogentic tree ```{r phy-500} phy.500 <- phylogeny.500; phy.500$node.id <- phy.500$label; phy.500$length.1 <- 1; phy.500$length.2 <- 0; phy.500$mode <- 'dendrogram'; node.list <- setNames( c('navy', 'steelblue', 'orchid', 'maroon', 'seagreen', 'tomato'), c('1', '7', '26', '53', '125', '204') ); for (i in seq_along(node.list)) { idx <- which(phy.500$label == names(node.list)[i]); phy.500[idx, 'node.col'] <- node.list[i]; phy.500[idx, 'connector.col'] <- node.list[i]; phy.500 <- update.descendant.property( phy.500, parent.id = names(node.list)[i], property = 'edge.col.1', value = node.list[i] ); } phy.500[which(phy.500$edge.col.1 == 'tomato'), 'edge.type.1'] <- 'dotted'; phy.500[which(phy.500$edge.col.1 == 'seagreen'), 'length.2'] <- 1; phy.500[which(phy.500$edge.col.1 == 'seagreen'), 'edge.col.2'] <- 'palegreen3'; phy.500[!(phy.500$label %in% names(node.list)), 'draw.node'] <- FALSE; phy.500$label <- ''; create.phylogenetic.tree( filename = 'figures/2G_phylogeny-500.png', phy.500, scale2 = 0.1, horizontal.padding = 10, add.normal = TRUE, width = 23, height = 9 ); ``` ```{r echo=FALSE, out.width='100%'} include_graphics('figures/2G_phylogeny-500.png'); ```