-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquast.rmd
95 lines (87 loc) · 3 KB
/
quast.rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
---
title: Plot and tabulate assembly metrics
author: Shaun Jackman
output:
html_document:
keep_md: true
params:
input_tsv:
label: "Input TSV file of QUAST metrics"
value: "assemblies.quast.tsv"
input: text
output_tsv:
label: "Output TSV file of parameters and assembly metrics"
value: "assemblies.quast.metrics.tsv"
input: text
---
```{r setup, message=FALSE}
library(dplyr)
library(ggplot2)
library(ggrepel)
library(knitr)
library(readr)
library(scales)
library(stringr)
library(tibble)
library(tidyr)
knit_print.data.frame <- function(x, ...) kable(x) %>% paste(collapse = "\n") %>% asis_output
input_tsv <- params$input_tsv
output_tsv <- params$output_tsv
c(input_tsv, output_tsv)
```
# Read the data
```{r read-data, message=FALSE}
quast_orig <- read_tsv(input_tsv)
metrics <- quast_orig %>%
filter(!str_detect(Assembly, "_broken")) %>%
rename(
Misassemblies = `# misassemblies`,
Scaffold_NG50 = NG50,
Scaffold_NGA50 = NGA50) %>%
mutate(
Assembly = Assembly %>%
str_replace("na12878.", "") %>%
str_replace("abyss2", "ABySS") %>%
str_replace("canu", "Canu") %>%
str_replace("discovardenovo_abyss", "DISCO+ABySS") %>%
str_replace("discovardenovo_besst", "DISCO+BESST") %>%
str_replace("falcon", "Falcon") %>%
str_replace("sim.abyss", "Sim") %>%
str_replace("supernova2", "Supernova") %>%
str_replace("supernova", "Supernova") %>%
str_replace("\\.(hg004|HNJJKCCXX|sim.lr).*nxrepair", "+NxRepair") %>%
str_replace("\\.(hg004|HNJJKCCXX|sim.lr).*breaktigs", "+Tigmint") %>%
str_replace("\\.(hg004|HNJJKCCXX|sim.lr).*abyss_scaffold", "+ARCS") %>%
str_replace("\\.(hg004|HNJJKCCXX|sim.lr).*links", "+ARCS"),
Assembler = str_extract(Assembly, "^(DISCO\\+)?[A-Za-z]+"),
Scaffolder = str_extract(Assembly, "Tigmint\\+ARCS|ARCS|Tigmint|NxRepair"),
Scaffolder = ifelse(!is.na(Scaffolder), Scaffolder, Assembler))
```
# Plot NGA50 and breakpoints
```{r nga50-breakpoints, fig.width=6, fig.height=2.5, dpi=300}
ggplot(metrics) +
aes(x = Misassemblies, y = Scaffold_NGA50, label = Scaffolder, shape = Assembler, colour = Assembler) +
geom_point(colour = "black") +
geom_text_repel(segment.alpha = 0.5) +
scale_x_continuous(name = "QUAST Misassemblies", labels = comma) +
scale_y_continuous(name = "Scaffold NGA50", labels = unit_format(unit = "Mbp", scale = 1e-6)) +
scale_colour_brewer(palette = "Dark2", guide = FALSE) +
expand_limits(y = c(0, 25e6)) +
theme_minimal(base_size = 14) +
theme(legend.position = "bottom", legend.title = element_blank())
```
# Table of assembly metrics
```{r metrics-table}
metrics_table <- metrics %>%
transmute(
Assembly,
`NG50 (Mbp)` = round(Scaffold_NG50 / 1e6, 2),
`NGA50 (Mbp)` = round(Scaffold_NGA50 / 1e6, 2),
Misassemblies,
Reduction = lag(Misassemblies) - Misassemblies,
Reduction = str_c(Reduction, " (", percent(Reduction / lag(Misassemblies)), ")"),
Reduction = ifelse(str_detect(Assembly, "Tigmint"), Reduction, NA),
Misassemblies = comma(Misassemblies))
metrics_table
write_tsv(metrics_table, output_tsv)
```