Preliminary analyses for microeukaryote tag-sequence survey.
Description: Investigate the diversity of single-celled microbial eukaryotic communities across several deep-sea hydrothermal vent sites (including NE Pacific, Caribbean). We plan to address questions related to the environmental factors that shape protistan community dynamics, and determine if patterns in species diversity and distribution vary at different deep-sea habitats. These questions will be addressed using similarly generated metabarcoding data from several distinct hydrothermal vents. Along with characterizing community structure, we plan to evaluate interactions between protist species (to identify putative predator-prey or parasite-host relationships) and their environment (to explore their relationship to geochemical properties).
Questions to address
What is the general biogeography and distribution of the deep-sea hydrothermal vent microbial eukaryotic community?
What community structure features (i.e., species richness, proportion cosmopolitan versus endemic, species evenness) are shared across or unique to deep-sea hydrothermal vent sites?
What environmental features (i.e., temperature, geochemistry) influence microbial eukaryotic community diversity? Can we identify if certain environmental factors select for putative vent endemics?
All upstream processing of sequences from QIIME2 can be found here.
## Warning: package 'ggplot2' was built under R version 4.1.3
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'lattice' was built under R version 4.1.1
<- c("Axial")
axial <- c("VonDamm", "Piccard")
mcr <- c("GordaRidge")
gr <- c("Axial", "VonDamm", "Piccard", "GordaRidge")
all load("data-input/asv-tables-processed-27012022.RData", verbose = TRUE)
## Loading objects:
## asv_insitu
## asv_insitu_qc
## insitu_asv_wClass
<- read.delim("data-input/samplelist-metadata.txt", na.strings = "") metadata
Investigate overall protistan community composition.
First, establish order and color key for all taxonomic visuals.
<- c("Alveolata-Ciliophora", "Alveolata-Dinoflagellata", "Protalveolata", "Other Alveolata", "Amoebozoa", "Apusozoa", "Excavata", "Hacrobia", "Archaeplastida", "Rhizaria", "Rhizaria-Radiolaria", "Rhizaria-Cercozoa", "Stramenopiles", "Stramenopiles-Opalozoa", "Stramenopiles-Sagenista", "Stramenopiles-Ochrophyta", "Opisthokonta", "Unknown Eukaryota")
all_taxa_order # length(all_taxa_order)
= c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c","#cc4c02", "#c7e9b4", "#238b45", "#c6dbef", "#99d8c9","#1d91c0", "#253494", "#9e9ac8", "#54278f", "#bdbdbd", "#252525", "#f0f0f0")
all_taxa_color # length(all_taxa_color)
<- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
alv <- c("Deep seawater", "BSW", "Shallow seawater", "Near vent BW")
bkgd <- c("Candelabra Plume", "Mt Edwards Plume", "Plume", "Anemone Plume")
plume
= c("#fa9fb5", "#c51b8a","#f0f0f0", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c","#cc4c02", "#c7e9b4", "#238b45", "#c6dbef", "#99d8c9","#1d91c0", "#253494", "#9e9ac8", "#54278f", "#bdbdbd", "#252525", "#f0f0f0")
all_taxa_color_protalve
<- function(df, selection){
make_bar_relabun <- df %>%
df_out filter(SITE %in% selection) %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species,
%>%
VENT, SITE, SAMPLETYPE, YEAR, DATASET, SUPERGROUP) summarise(SEQ_AVG_REP = mean(value)) %>%
ungroup()
%>%
df_out group_by(SITE, SAMPLETYPE, VENT, YEAR, SUPERGROUP) %>%
summarise(SEQ_SUM = sum(SEQ_AVG_REP)) %>%
mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
ggplot(aes(x = VENT, y = SEQ_SUM, fill = SUPERGROUP_ORDER)) +
geom_bar(stat = "identity", position = "fill", color = "black", width = 0.9) +
facet_grid(. ~ SITE +YEAR + SAMPLETYPE, scale = "free", space = "free") +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, face = "bold", color = "black"),
strip.background = element_blank(),
strip.text = element_text(color = "black", face = "bold", angle = 90),
axis.title = element_text(color = "black", face = "bold"),
axis.text.y = element_text(color = "black", face = "bold"),
legend.position = "right",
legend.title = element_blank()) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = all_taxa_color_protalve) +
labs(x = "", y = "Relative sequence abundance")
}
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/figs2-barplotall.svg", h = 9, w = 13)
<- c("Axial", "VonDamm", "Piccard", "GordaRidge")
all make_bar_relabun(insitu_asv_wClass, all)
# dev.off()
<- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
alv <- c("Deep seawater", "BSW", "Shallow seawater", "Near vent BW")
bkgd <- c("Candelabra Plume", "Mt Edwards Plume", "Plume", "Anemone Plume")
plume
<- insitu_asv_wClass %>%
tmp_phyla filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class,
%>%
SITE, SAMPLETYPE, VENT, YEAR, SUPERGROUP_ORDER) summarise(SEQ_AVG_REP = mean(value)) %>%
ungroup() %>%
group_by(SITE, SAMPLETYPE, VENT, YEAR) %>%
mutate(TOTAL = sum(SEQ_AVG_REP)) %>%
ungroup(SITE, SAMPLETYPE, VENT, YEAR) %>%
group_by(SITE, SAMPLETYPE, VENT, YEAR, SUPERGROUP_ORDER, Phylum, Class) %>%
summarise(SEQ_SUM = sum(SEQ_AVG_REP),
REL_ABUN = (SEQ_SUM/TOTAL)) %>%
distinct()
###
# ggplot(tmp_phyla, aes(x = VENT, y = Class, fill = (REL_ABUN))) +
# geom_tile(stat = "identity", color = "black") +
# facet_grid(SUPERGROUP_ORDER + Phylum ~ SITE +YEAR + SAMPLETYPE, scale = "free", space = "free") +
# theme_linedraw() +
# theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
# strip.background = element_blank(), strip.text = element_text(color = "black"),
# legend.position = "right",
# strip.text.y = element_text(angle = 0)) +
# scale_fill_gradient(low = "white", high = "red") +
# labs(x = "", y = "Relative abundance")
Out of all the ASVs and sequences, what percentage of ASVs were found at all 4 of my vent sites? what % of ASVs were unique to individual sites? what % of sequences?
<- length(unique(insitu_asv_wClass$FeatureID)); totalasvs totalasvs
## [1] 12375
<- sum(insitu_asv_wClass$value); totalseq totalseq
## [1] 3788734
unique(insitu_asv_wClass$SITE_CLASS)
## [1] "Gorda Ridge only" "Axial only"
## [3] "Von Damm only" "Piccard & Von Damm"
## [5] "Piccard only" "MCR & Axial"
## [7] "Axial & Gorda Ridge" "Von Damm & Gorda Ridge"
## [9] "Von Damm, Axial, & Gorda Ridge" "Von Damm & Axial"
## [11] "All sites" "MCR & Gorda Ridge"
## [13] "Piccard, Axial, & Gorda Ridge" "Piccard & Gorda Ridge"
## [15] "Piccard & Axial"
<- filter(insitu_asv_wClass, grepl(" only", SITE_CLASS))
tmp_unique <- filter(insitu_asv_wClass, !(grepl(" only", SITE_CLASS)))
tmp_shared <- filter(insitu_asv_wClass, SITE_CLASS == "All sites")
tmp_allshared
# Total number of ASVs that were unique to an individual vent field, includes "only" in the site classification.
<- length(unique(tmp_unique$FeatureID)); a a
## [1] 10191
100*(a/totalasvs)
## [1] 82.35152
# % of sequences
<- sum(tmp_unique$value); a_sum a_sum
## [1] 1187229
100*(a_sum/totalseq)
## [1] 31.33577
# Total number of ASVs that were found in more than 1 sample
<- length(unique(tmp_shared$FeatureID)); b b
## [1] 2184
100*(b/totalasvs)
## [1] 17.64848
<- sum(tmp_shared$value); b_sum b_sum
## [1] 2601505
100*(b_sum/totalseq)
## [1] 68.66423
# Total number of ASVs at all sites
<- length(unique(tmp_allshared$FeatureID)); c c
## [1] 194
100*(c/totalasvs)
## [1] 1.567677
<- sum(tmp_allshared$value); c_sum c_sum
## [1] 834734
100*(c_sum/totalseq)
## [1] 22.032
# Select ASVs found in 1 field and at vent only
<- filter(insitu_asv_wClass, grepl(" only", SITE_CLASS) & CLASS == "Vent only")
tmp_1field_vent <- length(unique(tmp_1field_vent$FeatureID)); d d
## [1] 7325
100*(d/totalasvs)
## [1] 59.19192
<- sum(tmp_1field_vent$value); d_sum d_sum
## [1] 682645
100*(d_sum/totalseq)
## [1] 18.01776
<- insitu_asv_wClass %>%
df_wide_asv filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, SAMPLETYPE, SITE, VENTNAME, sep = "_", remove = FALSE) %>%
group_by(FeatureID, SAMPLE) %>%
summarise(SUM = sum(AVG)) %>%
pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "SAMPLE")
## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# look at eigenvalues
<- prcomp(data.frame(compositions::clr(df_wide_asv)))
pca_lr <- (pca_lr$sdev^2)/sum(pca_lr$sdev^2)
variance_lr ## View bar plot
barplot(variance_lr, main = "Log-Ratio PCA Screeplot", xlab = "PC Axis", ylab = "% Variance",
cex.names = 1.5, cex.axis = 1.5, cex.lab = 1.5, cex.main = 1.5)
## Extract PCR points
data.frame(pca_lr$x, SAMPLE = rownames(pca_lr$x)) %>%
separate(SAMPLE, c("SAMPLETYPE", "REGION", "VENTNAME"), sep = "_", remove = FALSE) %>%
## Generate PCA plot
ggplot(aes(x = PC1, y = PC2, shape = SAMPLETYPE, fill = REGION)) +
geom_hline(yintercept = 0) + geom_vline(xintercept = 0, color = "#525252") +
geom_point(size=3, stroke = 1, aes(fill = REGION)) +
ylab(paste0('PC2 ',round(variance_lr[2]*100,2),'%')) +
xlab(paste0('PC1 ',round(variance_lr[1]*100,2),'%')) +
scale_shape_manual(values = c(21, 23, 24)) +
scale_fill_manual(values = c("#fdbb84", "#31a354", "#ef3b2c", "#02818a")) +
theme_bw() +
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 14),
plot.margin = margin(2, 1, 2, 1, "cm")) +
guides(fill = guide_legend(override.aes = list(shape = 21) ),
shape = guide_legend(override.aes = list(fill = "black")))
<- as.data.frame(t(df_wide_asv))
df names(df)
## [1] "Vent_GordaRidge_Venti Latte"
## [2] "Background_GordaRidge_Shallow seawater"
## [3] "Vent_Axial_Boca 2013"
## [4] "Vent_Axial_Escargot 2014"
## [5] "Background_GordaRidge_Deep seawater"
## [6] "Vent_VonDamm_VonDamm Rav2"
## [7] "Vent_Axial_N3Area 2013"
## [8] "Vent_GordaRidge_Candelabra"
## [9] "Background_GordaRidge_Near vent BW"
## [10] "Plume_GordaRidge_Candelabra Plume"
## [11] "Plume_GordaRidge_Mt Edwards Plume"
## [12] "Vent_GordaRidge_Sir Ventsalot"
## [13] "Vent_Axial_El Guapo 2013"
## [14] "Vent_Axial_Marker33 2014"
## [15] "Plume_VonDamm_VonDamm Plume"
## [16] "Vent_VonDamm_VonDamm OldManTree"
## [17] "Vent_Piccard_Piccard LotsOShrimp"
## [18] "Vent_Piccard_Piccard Shrimpocalypse"
## [19] "Vent_VonDamm_VonDamm ArrowLoop"
## [20] "Vent_Axial_Anemone 2013"
## [21] "Vent_VonDamm_VonDamm ShrimpHole"
## [22] "Vent_VonDamm_VonDamm X18"
## [23] "Vent_Axial_Marker113 2015"
## [24] "Vent_Axial_Marker33 2013"
## [25] "Background_Piccard_Piccard BSW"
## [26] "Plume_Axial_Anemone Plume 2015"
## [27] "Vent_Axial_Marker113 2013"
## [28] "Vent_Axial_Marker113 2014"
## [29] "Vent_GordaRidge_Mt Edwards"
## [30] "Background_VonDamm_VonDamm BSW"
## [31] "Background_Axial_Deep seawater 2015"
## [32] "Plume_Piccard_Piccard Plume"
## [33] "Vent_Axial_Skadi 2013"
## [34] "Vent_VonDamm_VonDamm WhiteCastle"
## [35] "Vent_VonDamm_VonDamm MustardStand"
###
colnames(df) <- gsub(x = names(df), pattern = "_", replacement = " ")
colnames(df) <- gsub(x = names(df), pattern = "Vent Axial", replacement = "Axial")
colnames(df) <- gsub(x = names(df), pattern = "Vent GordaRidge", replacement = "Gorda Ridge")
colnames(df) <- gsub(x = names(df), pattern = "GordaRidge", replacement = "Gorda Ridge")
colnames(df) <- gsub(x = names(df), pattern = "Vent VonDamm VonDamm", replacement = "Von Damm")
colnames(df) <- gsub(x = names(df), pattern = "Vent Piccard Piccard", replacement = "Piccard")
colnames(df) <- gsub(x = names(df), pattern = "Background Gorda Ridge", replacement = "Gorda Ridge")
colnames(df) <- gsub(x = names(df), pattern = "Plume Gorda Ridge", replacement = "Gorda Ridge")
colnames(df) <- gsub(x = names(df), pattern = "VonDamm VonDamm", replacement = "Von Damm")
colnames(df) <- gsub(x = names(df), pattern = "Piccard Piccard", replacement = "Piccard")
colnames(df) <- gsub(x = names(df), pattern = " BSW", replacement = "")
colnames(df) <- gsub(x = names(df), pattern = "Background Axial Deep seawater 2015", replacement = "Background Axial 2015")
colnames(df) <- gsub(x = names(df), pattern = "Plume Axial Anemone Plume 2015", replacement = "Axial Anemone Plume 2015")
colnames(df) <- gsub(x = names(df), pattern = "Rav2", replacement = "Ravelin #2")
colnames(df) <- gsub(x = names(df), pattern = "Plume Von Damm", replacement = "Von Damm")
colnames(df) <- gsub(x = names(df), pattern = "Plume Piccard", replacement = "Piccard")
# names(df)
# Write over same data frame - fix sample names
<- df
dendro_input # head(dendro_input)
Estimate Jaccard distance
<- vegan::vegdist(t(dendro_input), method = "jaccard")
dendro_jacc # dendro_jacc <- vegan::vegdist(t(compositions::clr(dendro_input)), method = "euclidean")
<- hclust(dist(t(dendro_jacc)), method = "average")
cluster_jacc
library(ggdendro)
<- ggdendro::dendro_data(as.dendrogram(cluster_jacc), type = "rectangle")
dendro_plot_df <- as.character(dendro_plot_df$labels$label)
label_dendro_order # label_dendro_order
Check stress
# euc_test <- vegan::vegdist(t(compositions::clr(dendro_input)), method = "euclidean")
#
# jac_test <- vegan::vegdist(t(dendro_input), method = "jaccard")
#
# set.seed(2930) # set seed
# ## So we can compare the relative composition based distance metric to the presence/absence based distance metric
# euc_nmds <- metaMDS(euc_test, k=2, autotransform=FALSE)
# jac_nmds <- metaMDS(jac_test, k=2, autotransform=FALSE)
#
# euc_nmds$stress
# jac_nmds$stress
Plot dendrogram
<- ggplot(segment(dendro_plot_df)) +
dendro_plot_output geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
coord_flip() +
scale_y_reverse(expand = c(0.2, 0.5), breaks = c(0, 0.2, 0.4, 0.6, 0.8)) +
geom_text(aes(x = x, y = y, label = label, angle = 0, hjust = 0), data = label(dendro_plot_df)) +
theme_dendro() +
labs(y = "Dissimilarity", title = "Jaccard distance") +
theme(axis.text.x = element_text(color = "black", size = 14),
axis.line.x = element_line(color = "#252525"),
axis.ticks.x = element_line(), axis.title.x = element_text(color = "black", size = 14))
dendro_plot_output
Add bar plot in the same order to show proportion of resident versus cosmpolitan ASVs in each sample.
# head(insitu_asv_wClass)
# unique(insitu_asv_wClass$SITE_CLASS)
# unique(insitu_asv_wClass$CLASS)
<- insitu_asv_wClass %>%
dendro_bar filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, SITE_CLASS) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION= case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, SAMPLETYPE, SITE, VENTNAME, sep = "_", remove = FALSE) %>%
mutate(SITE_CLASS_2 = case_when(
== "Piccard & Von Damm" ~ "Piccard & Von Damm only",
SITE_CLASS == "Piccard & Axial" ~ "MCR & Axial",
SITE_CLASS == "Piccard & Gorda Ridge" ~ "MCR & Gorda Ridge",
SITE_CLASS == "Von Damm & Axial" ~ "MCR & Axial",
SITE_CLASS == "Von Damm & Gorda Ridge" ~ "MCR & Gorda Ridge",
SITE_CLASS == "Piccard, Axial, & Gorda Ridge" ~ "MCR, Axial, & Gorda Ridge",
SITE_CLASS == "Von Damm, Axial, & Gorda Ridge" ~ "MCR, Axial, & Gorda Ridge",
SITE_CLASS TRUE ~ SITE_CLASS
%>%
)) group_by(SITE_CLASS_2, SAMPLE) %>%
summarise(SEQ_SUM = sum(AVG),
ASV_COUNT = n()) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "_", replacement = " ")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Axial", replacement = "Axial")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent VonDamm VonDamm", replacement = "Von Damm")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Piccard Piccard", replacement = "Piccard")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Background Gorda Ridge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Gorda Ridge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "VonDamm VonDamm", replacement = "Von Damm")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Piccard Piccard", replacement = "Piccard")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = " BSW", replacement = "")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Background Axial Deep seawater 2015", replacement = "Background Axial 2015")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Axial Anemone Plume 2015", replacement = "Axial Anemone Plume 2015")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Rav2", replacement = "Ravelin #2")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Von Damm", replacement = "Von Damm")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Piccard", replacement = "Piccard"))
## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# unique(insitu_asv_wClass$CLASS)
<- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
cosmo
<- insitu_asv_wClass %>%
dendro_res_cos_df filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
mutate(DISTRIBUTION = case_when(
%in% cosmo ~ "Cosmopolitan",
CLASS TRUE ~ CLASS
%>%
)) # Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, DISTRIBUTION) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION= case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, SAMPLETYPE, SITE, VENTNAME, sep = "_", remove = FALSE) %>%
group_by(DISTRIBUTION, SAMPLE) %>%
summarise(SEQ_SUM = sum(AVG),
ASV_COUNT = n()) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "_", replacement = " ")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Axial", replacement = "Axial")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "GordaRidge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent VonDamm VonDamm", replacement = "Von Damm")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Piccard Piccard", replacement = "Piccard")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Background Gorda Ridge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Gorda Ridge", replacement = "Gorda Ridge")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "VonDamm VonDamm", replacement = "Von Damm")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Piccard Piccard", replacement = "Piccard")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = " BSW", replacement = "")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Background Axial Deep seawater 2015", replacement = "Background Axial 2015")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Axial Anemone Plume 2015", replacement = "Axial Anemone Plume 2015")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Rav2", replacement = "Ravelin #2")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Von Damm", replacement = "Von Damm")) %>%
mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Piccard", replacement = "Piccard"))
## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
$SAMPLE_ORDER <- factor(dendro_bar$SAMPLE, levels = label_dendro_order)
dendro_bar$SAMPLE_ORDER <- factor(dendro_res_cos_df$SAMPLE, levels = label_dendro_order)
dendro_res_cos_df
$SITE_CLASS_ORDER <- factor(dendro_bar$SITE_CLASS_2, levels = c("All sites","Von Damm only","Piccard only","Piccard & Von Damm only","MCR & Axial","MCR & Gorda Ridge", "MCR, Axial, & Gorda Ridge", "Gorda Ridge only","Axial only","Axial & Gorda Ridge")) dendro_bar
<- ggplot(dendro_bar, aes(x = SAMPLE_ORDER, y = ASV_COUNT, fill = SITE_CLASS_ORDER)) +
dendro_bar_plot geom_bar(stat = "identity", position = "fill", color = "black", width = 0.7, alpha = 0.8) +
coord_flip() +
scale_y_continuous(expand = c(0,0)) +
# scale_fill_manual(values = c("grey", "#e6550d", "#fdbb84", "#31a354", "#1c9099", "#fde0dd", "#c51b8a")) +
scale_fill_manual(values = c("#636363","#bd0026","#fed976","#fd8d3c","#a63603","#fdae6b","#c994c7","#00441b","#ce1256","#addd8e","#f7fcb9","#016c59","#41ab5d","#6a51a3","#3690c0")) +
theme_bw() +
theme(axis.text = element_text(color="black", size=5),
legend.title = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 12),
plot.margin = margin(1, 1, 1, 1, "cm"),
legend.position = "top") +
labs(x = "", y = "Proportion of ASVs")
<- ggplot(dendro_res_cos_df, aes(x = SAMPLE_ORDER, y = ASV_COUNT, fill = DISTRIBUTION)) +
dendro_bar_plot_res_cos geom_bar(stat = "identity", position = "fill", color = "black", width = 0.7, alpha = 0.8) +
coord_flip() +
scale_y_continuous(expand = c(0,0)) +
::scale_fill_viridis(discrete=TRUE, option = "H") +
viridistheme_bw() +
theme(axis.text = element_text(color="black", size=5),
legend.title = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 12),
plot.margin = margin(1, 1, 1, 1, "cm"),
legend.position = "top") +
labs(x = "", y = "Proportion of ASVs")
# dendro_bar_plot_res_cos
# ?scale_fill_viridis
<- ggplot(dendro_bar, aes(x = SAMPLE_ORDER, y = SEQ_SUM, fill = SITE_CLASS_ORDER)) +
dendro_bar_plot_SEQ geom_bar(stat = "identity", position = "fill", color = "black", width = 0.7, alpha = 0.8) +
coord_flip() +
scale_y_continuous(expand = c(0,0)) +
# scale_fill_manual(values = c("grey", "#e6550d", "#fdbb84", "#31a354", "#1c9099", "#fde0dd", "#c51b8a")) +
scale_fill_manual(values = c("#636363","#bd0026","#fed976","#fd8d3c","#a63603","#fdae6b","#c994c7","#00441b","#ce1256","#addd8e","#f7fcb9","#016c59","#41ab5d","#6a51a3","#3690c0")) +
theme_bw() +
theme(axis.text = element_text(color="black", size=5),
legend.title = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 12),
plot.margin = margin(1, 1, 1, 1, "cm"),
legend.position = "top") +
labs(x = "", y = "Proportion of sequences")
<- ggplot(dendro_res_cos_df, aes(x = SAMPLE_ORDER, y = SEQ_SUM, fill = DISTRIBUTION)) +
dendro_bar_plot_res_cos_SEQ geom_bar(stat = "identity", position = "fill", color = "black", width = 0.7, alpha = 0.8) +
coord_flip() +
scale_y_continuous(expand = c(0,0)) +
::scale_fill_viridis(discrete=TRUE, option = "H") +
viridistheme_bw() +
theme(axis.text = element_text(color="black", size=5),
legend.title = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 12),
plot.margin = margin(1, 1, 1, 1, "cm"),
legend.position = "t∂p") +
labs(x = "", y = "Proportion of sequences")
# dendro_bar_plot
Combine all plots with patchwork.
# library(patchwork)
# svg("dendrogram_wbarplots.svg", w = 18, h = 12)
+ dendro_bar_plot + dendro_bar_plot_res_cos + patchwork::plot_layout(nrow = 1, widths = c(1, 0.2, 0.2),
dendro_plot_output heights = c(1.5, 0.2, 0.2)) + plot_annotation(tag_levels = "a")
# dev.off()
ASV richness with customized color schema
%>%
insitu_asv_wClass filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Supergroup) %>%
summarise(AVG = mean(value)) %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>%
ungroup() %>%
group_by(SITE, REGION, SAMPLE, SAMPLETYPE) %>%
summarise(NUM_ASV = n()) %>%
ggplot(aes(x = SAMPLETYPE, y = NUM_ASV, shape = SAMPLETYPE)) +
geom_boxplot(aes(group = SAMPLETYPE), alpha = 0.8, width = 0.4) +
geom_jitter(size=2, width = 0.3, aes(fill = SITE)) +
scale_shape_manual(values = c(21, 23, 24)) +
scale_fill_manual(values = c("#fdbb84", "#31a354", "#ef3b2c", "#02818a")) +
theme_bw() +
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 14)) +
guides(fill = guide_legend(override.aes = list(shape = 21) ),
shape = guide_legend(override.aes = list(fill = NA) ) ) +
labs(x = "", y = "Total number of ASVs")
## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
Figure S4.
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/figs4-ASVrichness-bytax.svg", h = 8, w = 12)
%>%
insitu_asv_wClass filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) # Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, SUPERGROUP) %>%
summarise(AVG = mean(value)) %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>%
ungroup() %>%
group_by(SITE, REGION, SAMPLE, SAMPLETYPE, SUPERGROUP) %>%
summarise(NUM_ASV = n()) %>%
mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
ggplot(aes(x = SAMPLETYPE, y = NUM_ASV, shape = SAMPLETYPE)) +
geom_boxplot(aes(group = SAMPLETYPE), alpha = 0.8, width = 0.4) +
geom_jitter(size=2, width = 0.3, aes(fill = SUPERGROUP_ORDER)) +
facet_wrap(.~ SUPERGROUP_ORDER, scales = "free_y") +
scale_shape_manual(values = c(21, 23, 24)) +
scale_fill_manual(values = all_taxa_color_protalve) +
theme_bw() +
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 14),
strip.background = element_blank(),
axis.text.x = element_text(color="black", size=12, angle = 90, hjust = 1, vjust = 0.5)) +
guides(fill = guide_legend(override.aes = list(shape = 21) ),
shape = guide_legend(override.aes = list(fill = NA) ) ) +
labs(x = "", y = "Total number of ASVs")
## Warning: Expected 4 pieces. Additional pieces discarded in 23244 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# dev.off()
# set.seed(1234)
<- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
alv # svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/fig2b-upsetR-all_revised.svg", h=8, w=12)
%>%
insitu_asv_wClass filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum == "Protalveolata" ~ "Other Alveolata",
Supergroup TRUE ~ Supergroup
%>%
)) # Taxa to supergroup
mutate(SupergroupPhylum = SUPERGROUP) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, SupergroupPhylum) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>%
group_by(FeatureID, SupergroupPhylum, SAMPLE) %>%
summarise(SUM = sum(AVG)) %>%
ungroup() %>%
mutate(SUPERGROUP_ORDER = factor(SupergroupPhylum, levels = all_taxa_order)) %>%
distinct(FeatureID, SUPERGROUP_ORDER, SUM, SAMPLE, .keep_all = TRUE) %>%
group_by(FeatureID, SUPERGROUP_ORDER) %>%
summarise(SAMPLE = list(SAMPLE)) %>%
ggplot(aes(x = SAMPLE)) +
geom_hline(yintercept = 200, linetype = "dashed", alpha = 0.6) +
geom_bar(color = NA, width = 0.7, aes(fill = SUPERGROUP_ORDER)) +
scale_x_upset(order_by = "freq", n_intersections = 25) +
scale_y_continuous(expand = c(0,0)) +
labs(x = "", y = "Shared ASVs") +
theme_linedraw() +
theme(axis.text.y = element_text(color="black", size=14, face = "bold"),
axis.text.x = element_text(color="black", size=14, face = "bold"),
axis.title = element_text(color="black", size=14, face = "bold"),
legend.text = element_text(color = "black", size = 12),
plot.margin = margin(1, 1, 1, 5, "cm"),
legend.title = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
legend.position = "none") +
scale_fill_manual(values = all_taxa_color) +
theme_combmatrix(
combmatrix.panel.striped_background.color.two = "#878686",
combmatrix.panel.point.size = 3.5
)
## Warning: Expected 4 pieces. Additional pieces discarded in 23244 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 1425 rows containing non-finite values (stat_count).
# dev.off()
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/fig-revised-200_upset.svg", h=8, w=12)
%>%
insitu_asv_wClass filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum == "Protalveolata" ~ "Other Alveolata",
Supergroup TRUE ~ Supergroup
%>%
)) # Taxa to supergroup
mutate(SupergroupPhylum = SUPERGROUP) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, SupergroupPhylum) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>%
group_by(FeatureID, SupergroupPhylum, SAMPLE) %>%
summarise(SUM = sum(AVG)) %>%
# filter(SUM > 200) %>%
ungroup() %>%
distinct(FeatureID, SupergroupPhylum, SUM, SAMPLE, .keep_all = TRUE) %>%
group_by(FeatureID, SupergroupPhylum) %>%
summarise(SAMPLE = list(SAMPLE)) %>%
mutate(samples = sapply(SAMPLE, paste, collapse = " ")) %>%
group_by(samples) %>%
filter(n() < 250) %>%
ungroup() %>%
mutate(SUPERGROUP_ORDER = factor(SupergroupPhylum, levels = all_taxa_order)) %>%
#how to order samples in the same way as above?
# mutate(SAMPLE_ORDER = factor(SAMPLE, levels = c("Axial Vent", "GordaRidge Vent", "VonDamm Vent", "GordaRidge Background", "Piccard Vent", "VonDamm Plume", "GordaRidge Plume", "VonDamm Background", "Axial Plume", "Piccard Background", "Axial Background", "Piccard Plume"))) %>%
ggplot(aes(x = SAMPLE)) +
geom_hline(yintercept = 200, linetype = "dashed", alpha = 0.6) +
geom_bar(color = NA, width = 0.7, aes(fill = SUPERGROUP_ORDER)) +
scale_x_upset(order_by = "freq", n_intersections = 25) +
# axis_combmatrix(sep = "-") +
scale_y_continuous(expand = c(0,0)) +
labs(x = "", y = "Shared ASVs") +
theme_linedraw() +
theme(axis.text.y = element_text(color="black", size=14, face = "bold"),
axis.text.x = element_text(color="black", size=14, face = "bold"),
axis.title = element_text(color="black", size=14, face = "bold"),
legend.text = element_text(color = "black", size = 12),
plot.margin = margin(1, 1, 1, 5, "cm"),
legend.title = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
legend.position = "none") + #when revising this figure, always check color legend
scale_fill_manual(values = all_taxa_color) +
theme_combmatrix(
combmatrix.panel.striped_background.color.two = "#878686",
combmatrix.panel.point.size = 3.5
)
## Warning: Expected 4 pieces. Additional pieces discarded in 23244 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 1254 rows containing non-finite values (stat_count).
# dev.off()
Observations regarding above plot: - Axial and Gorda Ridge vent sites have more shared ASVs than any other pairwise comparison. After this, there were also many ASVs shared throughout MCR (vent, plume, + background). May be a reflection of sample size, as MCR had more vent sites - a small subset of ASVs were found at all vent sites or all samples. - ASVs within the vents had much higher unique # of ASVs (not shared with another habitat type) than any other sample type/location (furtherest left bars).
Repeat upsetR plot, but summarize at genus level, rather than “species” or “strain”
<- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
alv # svg("upsetR-bysite-sampletype-nov2.svg", h=9, w=15)
<- insitu_asv_wClass %>%
genus_upset filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) # Taxa to supergroup
mutate(SupergroupPhylum = SUPERGROUP) %>%
unite(GENUS, Domain:Genus, sep = ";") %>%
# Average across replicates
group_by(GENUS, SAMPLENAME, VENT, SupergroupPhylum) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>%
group_by(GENUS, SupergroupPhylum, SAMPLE) %>%
summarise(SUM = sum(AVG)) %>%
# filter(SUM > 200) %>%
ungroup() %>%
distinct(GENUS, SupergroupPhylum, SUM, SAMPLE, .keep_all = TRUE) %>%
group_by(GENUS, SupergroupPhylum) %>%
summarise(SAMPLE = list(SAMPLE)) %>%
ggplot(aes(x = SAMPLE)) +
geom_bar(color = "black", width = 0.7, aes(fill = SupergroupPhylum)) +
scale_x_upset(n_intersections = 25) +
scale_y_continuous(expand = c(0,0)) +
labs(x = "", y = "Shared at Genus level") +
theme_linedraw() +
theme(axis.text.y = element_text(color="black", size=14, face = "bold"),
axis.text.x = element_text(color="black", size=14, face = "bold"),
axis.title = element_text(color="black", size=14, face = "bold"),
legend.text = element_text(color = "black", size = 12, face = "bold"),
legend.title = element_blank(),
panel.grid.minor = element_blank(),
plot.margin = margin(1, 1, 1, 5, "cm")) +
scale_fill_manual(values = all_taxa_color)
## Warning: Expected 4 pieces. Additional pieces discarded in 8229 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
#
genus_upset
## Warning: Removed 316 rows containing non-finite values (stat_count).
Isolate list of genus level that are shared at all sites.
<- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
alv # svg("upsetR-bysite-sampletype-nov2.svg", h=9, w=15)
<- insitu_asv_wClass %>%
shared_genus filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) # Taxa to supergroup
mutate(SupergroupPhylum = SUPERGROUP) %>%
unite(GENUS, Domain:Genus, sep = ";") %>%
# Average across replicates
group_by(GENUS, SAMPLENAME, VENT, SupergroupPhylum) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>%
select(GENUS, SAMPLE, AVG) %>%
pivot_wider(names_from = SAMPLE, values_from = "AVG", values_fn = sum) %>%
drop_na()
## Warning: Expected 4 pieces. Additional pieces discarded in 8229 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# head(shared_genus)
From the vent-only ASVs, what percentage of them appear only at 1 site? what percent also appear at other vent sites?
<- filter(insitu_asv_wClass, CLASS == "Vent only")
resident_only
<- length(unique(resident_only$FeatureID)); totalasvs totalasvs
## [1] 8107
<- sum(resident_only$value); totalseq totalseq
## [1] 1267675
# resident includes
100*(totalasvs/length(unique(insitu_asv_wClass$FeatureID))) #%ASVs
## [1] 65.51111
100*(totalseq/sum(insitu_asv_wClass$value)) #%sequences
## [1] 33.45907
# unique(resident_only$SITE_CLASS)
<- filter(resident_only, grepl(" only", SITE_CLASS))
tmp_unique <- filter(resident_only, !(grepl(" only", SITE_CLASS)))
tmp_shared <- filter(resident_only, SITE_CLASS == "All sites")
tmp_allshared
# Of the ASVs found ONLY within diffuse venting fluid, just over 90% were unique to only the individual vent site - makes up 53% of the vent only sequences
<- length(unique(tmp_unique$FeatureID)); a a
## [1] 7325
100*(a/totalasvs)
## [1] 90.35402
100*(a/length(unique(insitu_asv_wClass$FeatureID)))
## [1] 59.19192
<- sum(tmp_unique$value); a_sum a_sum
## [1] 682645
100*(a_sum/totalseq)
## [1] 53.85016
# Of the ASVs from vent only, 9.6% were also found at other vent sites, totals to 46% of the sequences.
<- length(unique(tmp_shared$FeatureID)); b b
## [1] 782
100*(b/totalasvs)
## [1] 9.645985
<- sum(tmp_shared$value); b_sum b_sum
## [1] 585030
100*(b_sum/totalseq)
## [1] 46.14984
# Total number of ASVs designated to be found at all sites
<- length(unique(tmp_allshared$FeatureID)); c c
## [1] 7
100*(c/totalasvs)
## [1] 0.08634513
<- sum(tmp_allshared$value); c_sum c_sum
## [1] 3022
100*(c_sum/totalseq)
## [1] 0.2383892
<- tmp_allshared
vent_only_allsites # View(vent_only_allsites)
table(vent_only_allsites$Genus)
##
## Chrysochromulina Chrysophyceae_Clade-C_X
## 21 8
## Prymnesiophyceae_Clade_B4_X Stephanoecidae_Group_D_X
## 7 8
unique(vent_only_allsites$Taxon)
## [1] "Eukaryota;Hacrobia;Haptophyta;Prymnesiophyceae;Prymnesiales;Chrysochromulinaceae;Chrysochromulina;Chrysochromulina_sp.;"
## [2] "Eukaryota;Stramenopiles;Ochrophyta;Chrysophyceae;Chrysophyceae_X;Chrysophyceae_Clade-C;Chrysophyceae_Clade-C_X;Chrysophyceae_Clade-C_X_sp.;"
## [3] "Eukaryota;Opisthokonta;Choanoflagellida;Choanoflagellatea;Acanthoecida;Stephanoecidae_Group_D;Stephanoecidae_Group_D_X;Stephanoecidae_Group_D_X_sp.;"
## [4] "Eukaryota;Hacrobia;Haptophyta;Prymnesiophyceae;Prymnesiales;Prymnesiophyceae_Clade_B4;Prymnesiophyceae_Clade_B4_X;Prymnesiophyceae_Clade_B4_X_sp.;"
## [5] "Unassigned"
# unique(vent_only_allsites$FeatureID)
Isolate ASVs that were found in all habitat types! (Vent, plume, & background)
# unique(insitu_asv_wClass$CLASS)
<- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
cosmo # cosmo <- c("Vent, plume, & background")
<- filter(insitu_asv_wClass, CLASS %in% cosmo)
cosmo_only
<- length(unique(cosmo_only$FeatureID)); totalasvs totalasvs
## [1] 2133
<- sum(cosmo_only$value); totalseq totalseq
## [1] 2424982
100*(totalasvs/length(unique(insitu_asv_wClass$FeatureID)))
## [1] 17.23636
100*(totalseq/sum(insitu_asv_wClass$value))
## [1] 64.00507
# unique(cosmo_only$SITE_CLASS)
<- filter(cosmo_only, grepl(" only", SITE_CLASS))
tmp_unique <- filter(cosmo_only, !(grepl(" only", SITE_CLASS)))
tmp_shared <- filter(cosmo_only, SITE_CLASS == "All sites")
tmp_allshared
# Of the xoamopolitan ASVs, the ASVs found unique to a vent field
<- length(unique(tmp_unique$FeatureID)); a a
## [1] 761
100*(a/totalasvs)
## [1] 35.67745
100*(a/length(unique(insitu_asv_wClass$FeatureID)))
## [1] 6.149495
<- sum(tmp_unique$value); a_sum a_sum
## [1] 414955
100*(a_sum/totalseq)
## [1] 17.11167
# Of the ASVs found in all habitat types, 77% of them were also found at another vent site, consisting of 92% of the sequences
<- length(unique(tmp_shared$FeatureID)); b b
## [1] 1372
100*(b/totalasvs)
## [1] 64.32255
<- sum(tmp_shared$value); b_sum b_sum
## [1] 2010027
100*(b_sum/totalseq)
## [1] 82.88833
# Of the ASVS found in all habitat types, 16% of them were found at all vent sites, which made up 40% of the sequences.
<- length(unique(tmp_allshared$FeatureID)); c c
## [1] 187
100*(c/totalasvs)
## [1] 8.766995
<- sum(tmp_allshared$value); c_sum c_sum
## [1] 831712
100*(c_sum/totalseq)
## [1] 34.29766
<- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
alv <- c("Deep seawater", "BSW", "Shallow seawater", "Near vent BW")
bkgd <- c("Candelabra Plume", "Mt Edwards Plume", "Plume", "Anemone Plume")
plume
<- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
cosmo <- c("Vent only")
res # head(insitu_asv_wClass)
# unique(insitu_asv_wClass$CLASS)
<- insitu_asv_wClass %>%
to_supergroup_CLASS filter(SITE %in% all) %>%
mutate(CATEGORY = case_when(
%in% cosmo ~ "Cosmopolitan",
CLASS %in% res ~ "Resident",
CLASS TRUE ~ "Other"
%>%
)) filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) mutate(SAMPLETYPEORDER = case_when(
%in% bkgd ~ "Background",
VENT %in% plume ~ "Plume",
VENT TRUE ~ "Vent"
%>%
)) group_by(FeatureID, Taxon, SUPERGROUP,
%>%
CATEGORY) summarise(SEQ_AVG_REP = mean(value)) %>%
ungroup() %>%
group_by(CATEGORY, SUPERGROUP) %>%
summarise(ASV_COUNT = n(),
SEQ_SUM = sum(SEQ_AVG_REP))
# head(to_supergroup_CLASS)
%>%
to_supergroup_CLASS filter(CATEGORY != "Other") %>%
pivot_longer(cols = ASV_COUNT:SEQ_SUM) %>%
mutate(NAME = case_when(
== "SEQ_SUM" ~ "Proportion of sequences",
name TRUE ~ "Proportion of ASVs"
%>%
)) mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
ggplot(aes(area = value, fill = SUPERGROUP_ORDER, subgroup = SUPERGROUP_ORDER)) +
geom_treemap(color = "white") +
geom_treemap_subgroup_border(colour = "white", size = 2) +
facet_grid(CATEGORY ~ NAME) +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black"),
legend.position = "right",
legend.title = element_blank(),
panel.border = element_blank()) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = all_taxa_color) +
labs(x = "", y = "")
<- insitu_asv_wClass %>%
supergroup_distri_bubble filter(SITE %in% all) %>%
mutate(CATEGORY = case_when(
%in% cosmo ~ "Cosmopolitan",
CLASS %in% res ~ "Resident",
CLASS TRUE ~ "Other"
%>%
)) filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) mutate(SAMPLETYPEORDER = case_when(
%in% bkgd ~ "Background",
VENT %in% plume ~ "Plume",
VENT TRUE ~ "Vent"
%>%
)) group_by(SUPERGROUP, CATEGORY) %>%
summarise(SEQ_TOTAL = sum(value),
ASV_TOTAL = n()) %>%
filter(SEQ_TOTAL > 200) %>%
ungroup() %>%
pivot_wider(names_from = CATEGORY, values_from = c(SEQ_TOTAL, ASV_TOTAL), values_fill = 0) %>%
mutate(SUPERGROUP_SEQ_TOTAL = SEQ_TOTAL_Cosmopolitan + SEQ_TOTAL_Other + SEQ_TOTAL_Resident,
cosmo_seq_perc = 100*(SEQ_TOTAL_Cosmopolitan/SUPERGROUP_SEQ_TOTAL),
res_seq_perc = 100*(SEQ_TOTAL_Resident/SUPERGROUP_SEQ_TOTAL),
other_seq_perc = 100*(SEQ_TOTAL_Other/SUPERGROUP_SEQ_TOTAL),
SUPERGROUP_ASV_TOTAL = ASV_TOTAL_Cosmopolitan + ASV_TOTAL_Other + ASV_TOTAL_Resident,
cosmo_asv_perc = 100*(ASV_TOTAL_Cosmopolitan/SUPERGROUP_ASV_TOTAL),
res_asv_perc = 100*(ASV_TOTAL_Resident/SUPERGROUP_ASV_TOTAL),
other_asv_perc = 100*(ASV_TOTAL_Other/SUPERGROUP_ASV_TOTAL))
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/fig5a-bubble-wlegend.svg", h = 4, w = 4)
%>%
supergroup_distri_bubble mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
ggplot(aes(y = res_asv_perc, x = cosmo_asv_perc, size = SUPERGROUP_SEQ_TOTAL, fill = SUPERGROUP_ORDER)) +
# geom_abline(slope = 1, intercept = 0, alpha = 0.5) +
geom_point(shape = 21, stat = "identity", color = "#525252", stroke = 1) +
scale_size_continuous(range = c(5,25)) +
scale_y_continuous(limits = c(0,100)) +
scale_x_continuous(limits = c(0,100)) +
theme_linedraw() +
scale_fill_manual(values = all_taxa_color) +
labs(x = "Proportion of cosmopolitan ASVs", y = "Proportion of resident ASVs") +
theme(legend.title = element_blank(),
# legend.position = "none",
legend.position = "right", # color key is the same
axis.text = element_text(color = "black", face = "bold", size = 12),
axis.title = element_text(color = "black", face = "bold", size = 12),
panel.background = element_blank())
dev.off()
## null device
## 1
# View(select(supergroup_distri_bubble, SUPERGROUP, SUPERGROUP_SEQ_TOTAL)) # check legend using these values
%>%
supergroup_distri_bubble mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
ggplot(aes(x = ASV_TOTAL_Cosmopolitan, y = ASV_TOTAL_Resident, size = SUPERGROUP_SEQ_TOTAL, fill = SUPERGROUP_ORDER)) +
# geom_abline(slope = 1, intercept = 0, alpha = 0.5) +
geom_point(shape = 21, stat = "identity", color = "black") +
scale_size_continuous(range = c(4,20)) +
scale_y_continuous(limits = c(0,4500)) +
scale_x_continuous(limits = c(0,4500)) +
theme_linedraw() +
scale_fill_manual(values = all_taxa_color) +
labs(x = "Total number of cosmopolitan ASVs", y = "Total number of resident ASVs") +
theme(legend.title = element_blank())
<- insitu_asv_wClass %>%
seq_res_cosmo_bar # filter(SITE %in% all) %>%
mutate(CATEGORY = case_when(
%in% cosmo ~ "Cosmopolitan",
CLASS %in% res ~ "Resident",
CLASS TRUE ~ "Other"
%>%
)) filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) mutate(SAMPLETYPEORDER = case_when(
%in% bkgd ~ "Background",
VENT %in% plume ~ "Plume",
VENT TRUE ~ "Vent"
%>%
)) group_by(SUPERGROUP, CATEGORY, SITE) %>%
summarise(SEQ_TOTAL = sum(value),
ASV_TOTAL = n()) %>%
filter(CATEGORY != "Other") %>%
mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
ggplot(aes(x = SITE, y = SEQ_TOTAL, fill = SUPERGROUP_ORDER)) +
geom_bar(stat = "identity", position = "stack", width = 0.4) +
facet_grid(. ~ CATEGORY) +
theme_linedraw() +
scale_fill_manual(values = all_taxa_color_protalve) +
labs(x = "", y = "Total sequences") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(),
strip.text = element_text(color = "black"))
%>%
insitu_asv_wClass mutate(CATEGORY = case_when(
%in% cosmo ~ "Cosmopolitan",
CLASS %in% res ~ "Resident",
CLASS TRUE ~ "Other"
%>%
)) filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) mutate(SAMPLETYPEORDER = case_when(
%in% bkgd ~ "Background",
VENT %in% plume ~ "Plume",
VENT TRUE ~ "Vent"
%>%
)) group_by(SUPERGROUP, CATEGORY, SITE) %>%
summarise(SEQ_TOTAL = sum(value),
ASV_TOTAL = n()) %>%
filter(CATEGORY != "Other") %>%
mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
ggplot(aes(x = SITE, y = SEQ_TOTAL, fill = SUPERGROUP_ORDER)) +
geom_bar(stat = "identity", position = "fill", width = 0.4) +
facet_grid(. ~ CATEGORY) +
theme_linedraw() +
scale_fill_manual(values = all_taxa_color_protalve) +
labs(x = "", y = "Sequence relative abundance") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(),
strip.text = element_text(color = "black"),
legend.title = element_blank())
<- insitu_asv_wClass %>%
asv_res_cosmo_bar # filter(SITE %in% all) %>%
mutate(CATEGORY = case_when(
%in% cosmo ~ "Cosmopolitan",
CLASS %in% res ~ "Resident",
CLASS TRUE ~ "Other"
%>%
)) filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) mutate(SAMPLETYPEORDER = case_when(
%in% bkgd ~ "Background",
VENT %in% plume ~ "Plume",
VENT TRUE ~ "Vent"
%>%
)) group_by(SUPERGROUP, CATEGORY, SITE) %>%
summarise(SEQ_TOTAL = sum(value),
ASV_TOTAL = n()) %>%
filter(CATEGORY != "Other") %>%
mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
ggplot(aes(x = SITE, y = ASV_TOTAL, fill = SUPERGROUP_ORDER)) +
geom_bar(stat = "identity", position = "stack", width = 0.4) +
facet_grid(. ~ CATEGORY) +
theme_linedraw() +
scale_fill_manual(values = all_taxa_color_protalve) +
labs(x = "", y = "Total ASVs") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(),
strip.text = element_text(color = "black"))
Figure S5
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/figs5-res_cosmo_barplot.svg", w = 9, h = 5)
+ theme(legend.position = "none")) + (asv_res_cosmo_bar + theme(legend.position = "none")) + patchwork::plot_layout(nrow = 1) + plot_annotation(tag_levels = "a") (seq_res_cosmo_bar
# dev.off()
<- c("Excavata", "Apusozoa", "Amoebozoa")
vent_only_resident %>%
insitu_asv_wClass # filter(SITE %in% all) %>%
mutate(CATEGORY = case_when(
%in% cosmo ~ "Cosmopolitan",
CLASS %in% res ~ "Resident",
CLASS TRUE ~ "Other"
%>%
)) filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) mutate(SAMPLETYPEORDER = case_when(
%in% bkgd ~ "Background",
VENT %in% plume ~ "Plume",
VENT TRUE ~ "Vent"
%>%
)) filter(SUPERGROUP %in% vent_only_resident) %>%
group_by(SUPERGROUP, CATEGORY, SITE) %>%
summarise(SEQ_TOTAL = sum(value),
ASV_TOTAL = n()) %>%
filter(CATEGORY != "Other") %>%
mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
ggplot(aes(x = SITE, y = SEQ_TOTAL, fill = SUPERGROUP_ORDER)) +
geom_bar(stat = "identity", position = "stack", width = 0.4) +
facet_grid(. ~ CATEGORY) +
theme_linedraw() +
scale_fill_manual(values = all_taxa_color) +
labs(x = "", y = "Total sequences") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(),
strip.text = element_text(color = "black"))
<- c("Excavata", "Apusozoa", "Amoebozoa")
vent_only_resident <- insitu_asv_wClass %>%
resident_subset # filter(SITE %in% all) %>%
mutate(CATEGORY = case_when(
%in% cosmo ~ "Cosmopolitan",
CLASS %in% res ~ "Resident",
CLASS TRUE ~ "Other"
%>%
)) filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) mutate(SAMPLETYPEORDER = case_when(
%in% bkgd ~ "Background",
VENT %in% plume ~ "Plume",
VENT TRUE ~ "Vent"
%>%
)) filter(SUPERGROUP %in% vent_only_resident) %>%
filter(CLASS == "Vent only")
dim(resident_subset)
## [1] 346 35
table(resident_subset$Class)
##
## Apusomonadidae_Group-1 Breviatea_X Carpediemonadea
## 55 7 30
## Discoba_XX Discosea-Flabellinia Fornicata_X
## 29 1 1
## Lobosa_X Mycetozoa-Myxogastrea NAMAKO-1-lineage
## 16 1 1
## Planomonadida Preaxostyla Tubulinea
## 84 17 78
## Variosea YS16Ec34-lineage
## 13 1
# Not sure if I need this
<- insitu_asv_wClass %>%
tax_key select(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species, CLASS, SITE_CLASS) %>%
distinct()
# head(insitu_asv_wClass)
<- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
alv
<- insitu_asv_wClass %>%
tmp filter(Domain == "Eukaryota") %>%
filter(!is.na(Supergroup)) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>%
group_by(FeatureID, SAMPLE) %>%
summarise(SUM = sum(AVG)) %>%
pivot_wider(names_from = "SAMPLE", values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "FeatureID") %>%
mutate(PREVALENCE = rowSums(. > 0),
SEQ_TOTAL = rowSums(.)) %>%
rownames_to_column(var = "FeatureID") %>%
left_join(tax_key) %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order))
## Warning: Expected 4 pieces. Additional pieces discarded in 25354 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# head(tmp)
%>%
tmp filter(SEQ_TOTAL > 0) %>%
ggplot(aes(x = PREVALENCE, y = SEQ_TOTAL, fill = SUPERGROUP_ORDER)) +
geom_jitter(stat = "identity", shape = 21) +
scale_y_log10() +
facet_wrap(SUPERGROUP_ORDER ~ .) +
scale_fill_manual(values = all_taxa_color) +
theme_linedraw() +
labs(x = "Number of samples ASV appears in", y = "Total sequences (log)")
Get CLR transformed data for all ASVs
head(insitu_asv_wClass)
## # A tibble: 6 × 32
## FeatureID SAMPLE value Taxon Domain Super…¹ Phylum Class Order Family Genus
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 000562094e0… Gorda… 8 Euka… Eukar… Strame… Sagen… <NA> <NA> <NA> <NA>
## 2 000562094e0… Gorda… 13 Euka… Eukar… Strame… Sagen… <NA> <NA> <NA> <NA>
## 3 00096455166… Gorda… 91 Euka… Eukar… Rhizar… Radio… Acan… <NA> <NA> <NA>
## 4 000ee37747b… Axial… 282 Euka… Eukar… Alveol… Cilio… Nass… Nass… Disco… NASS…
## 5 000ee37747b… Axial… 32 Euka… Eukar… Alveol… Cilio… Nass… Nass… Disco… NASS…
## 6 00165708dab… Gorda… 1 Euka… Eukar… Strame… Ochro… Pela… Pela… Pelag… Pela…
## # … with 21 more variables: Species <chr>, Consensus <dbl>, SAMPLENAME <chr>,
## # VENT <chr>, SITE <chr>, SAMPLEID <chr>, DEPTH <chr>, SAMPLETYPE <chr>,
## # YEAR <chr>, TEMP <chr>, pH <chr>, PercSeawater <chr>, Mg <chr>, H2S <chr>,
## # CH4 <chr>, ProkConc <chr>, Sample_or_Control <chr>, DATASET <chr>,
## # DECONTAM <chr>, CLASS <chr>, SITE_CLASS <chr>, and abbreviated variable
## # name ¹Supergroup
<- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
alv <- c("Ciliophora")
ciliate
<- insitu_asv_wClass %>%
avg_insitu_asv # isolate euks only, remove opisthokonta
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
# Modify supergroup level information
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) mutate(PHYLUM = case_when(
== "Unknown" ~ paste(SUPERGROUP, "Other"),
Phylum grepl("_X", Phylum) ~ paste(SUPERGROUP, "Other"),
is.na(Phylum) ~ paste(SUPERGROUP, "Other"),
TRUE ~ Phylum
%>%
)) mutate(CLASS = case_when(
== "Unknown" ~ PHYLUM,
Class grepl("_X", Class) ~ PHYLUM,
is.na(Class) ~ Phylum,
grepl("MAST-", Class) ~ "MAST",
TRUE ~ Class
%>%
)) # filter(value > 200) %>%
# Average across replicates
group_by(FeatureID, SITE, VENT, SAMPLETYPE, YEAR, Domain, SUPERGROUP, PHYLUM, CLASS, Order, Family, Genus, Species) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
# isolate sample name and vent
unite(SAMPLENAME_2, SITE, VENT, SAMPLETYPE, YEAR, sep = "_") %>%
pivot_wider(names_from = SAMPLENAME_2, values_from = AVG, values_fill = 0)
###
# names(avg_insitu_asv)
Perform CLR transformation
# Isolate key for ASV ID and taxonomic information
<- select(avg_insitu_asv, FeatureID, Domain:Species)
tax_key
# CLR, and re-format data frame
<- data.frame(compositions::clr(avg_insitu_asv %>%
clr_trans_insitu_asv select(-(Domain:Species)) %>%
column_to_rownames(var = "FeatureID"))) %>%
rownames_to_column(var = "FeatureID") %>%
pivot_longer(cols = starts_with(all), values_to = "CLR", names_to = "SAMPLENAME_2") %>%
left_join(tax_key) %>%
separate(SAMPLENAME_2, c("SITE", "VENT", "SAMPLETYPE", "YEAR"), sep = "_") %>%
mutate(VENT = str_replace_all(VENT, "\\.", " ")) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE == "VonDamm" ~ "Von Damm",
SITE == "Piccard" ~ "Piccard",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Axial" ~ paste(VENT, YEAR, sep = " "),
REGION TRUE ~ VENT)) %>%
unite(SAMPLE, REGION, VENTNAME, SAMPLETYPE, sep = " ", remove = FALSE) %>%
filter(CLR != 0) %>%
unite(sampletype_tax, SAMPLETYPE, SUPERGROUP, sep = " ", remove = FALSE) %>%
add_column(PRESENCE = 1)
# check output
# head(clr_trans_insitu_asv)
# hist(clr_trans_insitu_asv$CLR)
<- c("Axial", "GordaRidge", "Piccard", "VonDamm")
sites_order $SITE_ORDER <- factor(clr_trans_insitu_asv$SITE, levels = rev(sites_order))
clr_trans_insitu_asv##
##
%>%
clr_trans_insitu_asv mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
group_by(SAMPLETYPE, SUPERGROUP_ORDER, SITE_ORDER) %>%
summarize(COUNT_SUM = sum(PRESENCE)) %>%
ggplot(aes(x = SAMPLETYPE, y = SITE_ORDER, fill = SUPERGROUP_ORDER)) +
geom_point(shape = 21, aes(size = COUNT_SUM), stat = "identity") +
facet_wrap(SUPERGROUP_ORDER ~ .) +
scale_size_continuous(range = c(1,10)) +
theme(legend.position = "top",
panel.grid.major = element_line(),
panel.grid.minor = element_line(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_line(),
axis.text = element_text(color = "black", size = 8),
strip.background = element_blank(),
strip.text = element_blank(),
legend.title = element_blank(),
axis.ticks = element_blank(),
strip.placement = "outside") +
labs(x = "", y = "") +
scale_fill_manual(values = all_taxa_color)
Filter data to reduce noise and show sample type to vent ecosystem variability.
<- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
alv <- c("Deep seawater", "BSW", "Shallow seawater", "Near vent BW")
bkgd <- c("Candelabra Plume", "Mt Edwards Plume", "Plume", "Anemone Plume")
plume
<- insitu_asv_wClass %>%
to_supergroup filter(SITE %in% all) %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) mutate(SAMPLETYPEORDER = case_when(
%in% bkgd ~ "Background",
VENT %in% plume ~ "Plume",
VENT TRUE ~ "Vent"
%>%
)) group_by(FeatureID, Taxon, SUPERGROUP,
%>%
VENT, SITE, SAMPLETYPE, YEAR, DATASET, SAMPLETYPEORDER) summarise(SEQ_AVG_REP = mean(value)) %>%
ungroup() %>%
group_by(SITE, SUPERGROUP, SAMPLETYPEORDER) %>%
summarise(ASV_COUNT = n(),
SEQ_SUM = sum(SEQ_AVG_REP))
# Order sample type
$SAMPLETYPEORDER <- factor(to_supergroup$SAMPLETYPEORDER, levels = c("Background", "Plume", "Vent"))
to_supergroup
# Remove ASVs with fewer than 200 sequences
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/fig2a-supergroup-treemap.svg", h = 7, w = 8)
%>%
to_supergroup mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
filter(SEQ_SUM > 200) %>%
ggplot(aes(area = SEQ_SUM, fill = SUPERGROUP_ORDER, subgroup = SUPERGROUP)) +
geom_treemap(color = "white") +
geom_treemap_subgroup_border(colour = "white", size = 2) +
facet_grid(SITE ~ SAMPLETYPEORDER) +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, face = "bold", size = 12),
strip.background = element_blank(), strip.text = element_text(color = "black", face = "bold", vjust = 0.5, hjust = 0, size = 12),
legend.position = "right",
legend.title = element_blank(),
legend.text = element_text(color = "black", size = 12),
panel.border = element_blank(),
axis.title = element_text(face = "bold", size = 12)) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = all_taxa_color) +
labs(x = "", y = "Sequence proportion by Supergroup")
# dev.off()
# head(to_supergroup)
<- sum(to_supergroup$SEQ_SUM)
totalsum_supergroup
<- to_supergroup %>%
table_perc_supergroup group_by(SITE) %>%
mutate(SUM_SITE = sum(SEQ_SUM)) %>%
ungroup() %>%
group_by(SITE, SAMPLETYPEORDER) %>%
mutate(SUM_SITE_TYPE = sum(SEQ_SUM)) %>%
ungroup() %>%
mutate(Perc_seq_total = (100*(SEQ_SUM/totalsum_supergroup)),
Perc_seq_site = (100*(SEQ_SUM/SUM_SITE)),
Perc_seq_site_type = (100*(SEQ_SUM/SUM_SITE_TYPE))) %>%
select(-starts_with("SUM_")) %>%
gt(
groupname_col = c("SITE", "SAMPLETYPEORDER")
# rowname_col = "SAMPLENAME"
%>%
) fmt_number(columns = starts_with("Perc_"), decimals = 2) %>%
fmt_number(columns = SEQ_SUM, decimals = 0) %>%
fmt_number(columns = ASV_COUNT, decimals = 0)
# gtsave(table_perc_supergroup, filename = "output-tables/perc_seq_asv_bysupergroup.html")
library(geosphere)
# Attach better coordinates (fixed in 'metadata-compilation-table.R')
<- read.delim("data-input/tableS1-geochem-params.txt") %>%
coords_input select(VENT, SITE, SAMPLETYPE, LONG, LAT) %>%
unite(SAMPLENAME, VENT, SITE, SAMPLETYPE, sep = "_") %>%
distinct(SAMPLENAME, .keep_all = TRUE) %>%
column_to_rownames(var = "SAMPLENAME")
<- row.names(coords_input)
vents_all <- as.data.frame(distm(coords_input, fun = distHaversine))
distances_m colnames(distances_m) <- vents_all
rownames(distances_m) <- vents_all
# Distinction with "_all" here is that some samples are not included in below analysis.
Calculate distances and create long format dataframe
<- distances_m %>%
dist_m rownames_to_column(var = "start") %>%
pivot_longer(cols = vents_all, names_to = "end") %>%
filter(!(start == end))
# Widen an numeric the ASVs for the distance calculations
# vents # use this list of samples
<- insitu_asv_wClass %>%
df_wide_num group_by(FeatureID, SAMPLENAME, VENT, SITE, SAMPLETYPE) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
select(-SAMPLENAME) %>%
unite(SAMPLENAME, VENT, SITE, SAMPLETYPE, sep = "_") %>%
group_by(FeatureID, SAMPLENAME) %>%
summarise(SUM = sum(AVG)) %>%
pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "SAMPLENAME")
<- row.names(df_wide_num) vents
Inclusion of three metrics below is because I want to see if the answer varies across the
<- as.data.frame(as.matrix(vegdist(df_wide_num, method = "bray"))) %>%
dist_bray rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
select(start, end, Bray_Curtis_metric = value)
<- as.data.frame(as.matrix(vegdist(df_wide_num, method = "jaccard"))) %>%
dist_jacc rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
select(start, end, Jaccard_metric = value)
<- as.data.frame(as.matrix(vegdist(compositions::clr(df_wide_num), method = "euclidean"))) %>%
dist_euc_clr rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
select(start, end, Euclidean_CLR_metric = value)
<- dist_m %>%
dist_compiled_output select(start, end, meters = value) %>%
left_join(dist_jacc) %>%
left_join(dist_bray) %>%
left_join(dist_euc_clr) %>%
pivot_longer(cols = ends_with("_metric"), names_to = "comm_dist") %>%
filter(!(start == end)) %>%
mutate(km = (meters/1000),
dist0 = gsub("_", " ", comm_dist),
dist = gsub(" metric", "", dist0))
Compare several distance metrics with distance.
# ggplot(dist_compiled_output, aes(x = km, y = value)) +
# geom_jitter() +
# scale_x_log10(labels = scales::comma) +
# facet_grid(dist ~ ., scales = "free") +
# theme_linedraw() +
# geom_smooth(method="lm") +
# stat_regline_equation(aes(label = ..rr.label..), color = "black", label.y.npc = 0.09, label.x.npc = 0.7) +
# theme(strip.background = element_blank(),
# strip.text = element_text(color = "black")) +
# labs(y = "Distance metric")
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/fig4a-distdecay.svg", h = 4, w = 5)
%>%
dist_compiled_output filter(dist == "Jaccard") %>%
ggplot(aes(x = km, y = value)) +
geom_jitter() +
scale_x_log10(labels = scales::comma) +
facet_grid(dist ~ ., scales = "free") +
# geom_smooth(method="lm") +
theme_linedraw() +
# stat_regline_equation(aes(label = ..rr.label..), face = "bold", color = "black", label.y.npc = 0.09, label.x.npc = 0.7) +
scale_y_continuous(limits = c(0.7,1)) +
theme(strip.background = element_blank(),
strip.text = element_text(color = "black", face = "bold", size = 12),
axis.text = element_text(color = "black", face = "bold", size = 12),
axis.title = element_text(color = "black", face = "bold", size = 12)) +
labs(y = "Jaccard Dissimilarity")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 198 rows containing missing values (geom_point).
# dev.off()
# labs(y = "Distance metric", title = "Distance-Decay: all ASVs")
# head(dist_compiled_output)
# # ?var()
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/figS4-var-dist.svg", h = 4, w = 5)
<- dist_compiled_output %>%
var_km filter(dist == "Jaccard") %>%
# separate(start, into = c("NAME_start", "SITE_start"), sep = "_", remove = FALSE, extra = "merge") %>%
# separate(end, into = c("NAME_end", "SITE_end"), sep = "_", remove = FALSE, extra = "merge") %>%
separate(start, into = c("NAME_start", "SITE_start", "TYPE_start"), sep = "_", remove = FALSE, extra = "merge") %>%
separate(end, into = c("NAME_end", "SITE_end", "TYPE_end"), sep = "_", remove = FALSE, extra = "merge") %>%
drop_na() %>%
# group_by(start, end) %>%
group_by(SITE_start, SITE_end) %>%
mutate(VARIANCE = var(value)) %>%
ggplot(aes(x = km, y = VARIANCE)) +
scale_x_log10(labels = scales::comma) +
geom_jitter(shape = 21, size = 2, color = "black", fill = "black") +
geom_smooth(method="lm") +
theme_linedraw() +
stat_regline_equation(aes(label = ..rr.label..), color = "black", label.y.npc = 0.0007, label.x.npc = 0.08) +
# scale_y_continuous(limits = c(0.7,1)) +
theme(strip.background = element_blank(),
strip.text = element_text(color = "black"),
axis.text = element_text(color = "black"),
axis.title = element_text(color = "black")) +
labs(y = "Variance of Jaccard Dissimilarity", x = "")
# dev.off()
Sample to vent sites only
# head(insitu_asv_wClass)
<- insitu_asv_wClass %>%
df_wide_vent filter(SAMPLETYPE == "Vent") %>%
group_by(FeatureID, SAMPLENAME, VENT, SITE, SAMPLETYPE) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
select(-SAMPLENAME) %>%
unite(SAMPLENAME, VENT, SITE, SAMPLETYPE, sep = "_") %>%
group_by(FeatureID, SAMPLENAME) %>%
summarise(SUM = sum(AVG)) %>%
pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "SAMPLENAME")
<- rownames(df_wide_vent) vents
<- as.data.frame(as.matrix(vegdist(df_wide_vent, method = "bray"))) %>%
dist_bray rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
select(start, end, Bray_Curtis_metric = value)
<- as.data.frame(as.matrix(vegdist(df_wide_vent, method = "jaccard"))) %>%
dist_jacc rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
select(start, end, Jaccard_metric = value)
<- as.data.frame(as.matrix(vegdist(compositions::clr(df_wide_vent), method = "euclidean"))) %>%
dist_euc_clr rownames_to_column(var = "start") %>%
pivot_longer(cols = vents, names_to = "end") %>%
select(start, end, Euclidean_CLR_metric = value)
<- dist_m %>%
dist_compiled_output_vents select(start, end, meters = value) %>%
left_join(dist_jacc) %>%
left_join(dist_bray) %>%
left_join(dist_euc_clr) %>%
pivot_longer(cols = ends_with("_metric"), names_to = "comm_dist") %>%
filter(!(start == end)) %>%
mutate(km = (meters/1000),
dist0 = gsub("_", " ", comm_dist),
dist = gsub(" metric", "", dist0))
%>%
dist_compiled_output_vents # filter(grepl("Vent", start) & grepl("Vent", end)) %>%
ggplot(aes(x = km, y = value)) +
geom_jitter() +
scale_x_log10(labels = scales::comma) +
facet_grid(dist ~ ., scales = "free") +
geom_smooth(method="lm") +
theme_linedraw() +
stat_regline_equation(aes(label = ..rr.label..), color = "black", label.y.npc = 0.09, label.x.npc = 0.7) +
theme(strip.background = element_blank(),
strip.text = element_text(color = "black")) +
labs(y = "Distance metric")
## Warning: Transformation introduced infinite values in continuous x-axis
## Transformation introduced infinite values in continuous x-axis
## Transformation introduced infinite values in continuous x-axis
## Warning: Removed 2310 rows containing non-finite values (stat_smooth).
## Warning: Removed 2310 rows containing non-finite values (stat_regline_equation).
## Warning: Removed 2310 rows containing missing values (geom_point).
<- dist_compiled_output_vents %>%
vent_dist_decay filter(dist == "Jaccard") %>%
ggplot(aes(x = km, y = value)) +
geom_jitter() +
scale_x_log10(labels = scales::comma) +
facet_grid(dist ~ ., scales = "free") +
geom_smooth(method="lm") +
theme_linedraw() +
stat_regline_equation(aes(label = ..rr.label..), color = "black", label.y.npc = 0.09, label.x.npc = 0.7) +
theme(strip.background = element_blank(),
strip.text = element_text(color = "black")) +
labs(y = "Jaccard Dissimilarity", title = "", x = "")
Sample to resident only and cosmopolitan only
<- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
cosmo <- c("Vent only")
res
<- insitu_asv_wClass %>%
df_wide_res filter(CLASS %in% res) %>%
group_by(FeatureID, SAMPLENAME, VENT, SITE, SAMPLETYPE) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
select(-SAMPLENAME) %>%
unite(SAMPLENAME, VENT, SITE, SAMPLETYPE, sep = "_") %>%
group_by(FeatureID, SAMPLENAME) %>%
summarise(SUM = sum(AVG)) %>%
pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "SAMPLENAME")
#
<- row.names(df_wide_res)
vents_res #
<- as.data.frame(as.matrix(vegdist(df_wide_res, method = "bray"))) %>%
dist_bray rownames_to_column(var = "start") %>%
pivot_longer(cols = vents_res, names_to = "end") %>%
select(start, end, Bray_Curtis_metric = value)
<- as.data.frame(as.matrix(vegdist(df_wide_res, method = "jaccard"))) %>%
dist_jacc rownames_to_column(var = "start") %>%
pivot_longer(cols = vents_res, names_to = "end") %>%
select(start, end, Jaccard_metric = value)
<- as.data.frame(as.matrix(vegdist(compositions::clr(df_wide_res), method = "euclidean"))) %>%
dist_euc_clr rownames_to_column(var = "start") %>%
pivot_longer(cols = vents_res, names_to = "end") %>%
select(start, end, Euclidean_CLR_metric = value)
<- dist_m %>%
dist_compiled_output_res select(start, end, meters = value) %>%
left_join(dist_jacc) %>%
left_join(dist_bray) %>%
left_join(dist_euc_clr) %>%
pivot_longer(cols = ends_with("_metric"), names_to = "comm_dist") %>%
filter(!(start == end)) %>%
mutate(km = (meters/1000),
dist0 = gsub("_", " ", comm_dist),
dist = gsub(" metric", "", dist0))
<- dist_compiled_output_res %>%
d_d_resident filter(dist == "Jaccard") %>%
ggplot(aes(x = km, y = value)) +
geom_jitter() +
scale_x_log10(labels = scales::comma) +
facet_grid(dist ~ ., scales = "free") +
geom_smooth(method="lm") +
theme_linedraw() +
stat_regline_equation(aes(label = ..rr.label..), color = "black", label.y.npc = 0.09, label.x.npc = 0.7) +
theme(strip.background = element_blank(),
strip.text = element_text(color = "black")) +
labs(y = "Distance metric", title = "Distance-Decay: Resident ASVs")
d_d_resident
## Warning: Transformation introduced infinite values in continuous x-axis
## Transformation introduced infinite values in continuous x-axis
## Transformation introduced infinite values in continuous x-axis
## Warning: Removed 770 rows containing non-finite values (stat_smooth).
## Warning: Removed 770 rows containing non-finite values (stat_regline_equation).
## Warning: Removed 770 rows containing missing values (geom_point).
Target cosmpolitan
<- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
cosmo <- c("Vent only")
res
<- insitu_asv_wClass %>%
df_wide_cosmo filter(CLASS %in% cosmo) %>%
group_by(FeatureID, SAMPLENAME, VENT, SITE, SAMPLETYPE) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
select(-SAMPLENAME) %>%
unite(SAMPLENAME, VENT, SITE, SAMPLETYPE, sep = "_") %>%
group_by(FeatureID, SAMPLENAME) %>%
summarise(SUM = sum(AVG)) %>%
pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "SAMPLENAME")
#
<- row.names(df_wide_cosmo)
vents_cosmo #
<- as.data.frame(as.matrix(vegdist(df_wide_cosmo, method = "bray"))) %>%
dist_bray rownames_to_column(var = "start") %>%
pivot_longer(cols = vents_cosmo, names_to = "end") %>%
select(start, end, Bray_Curtis_metric = value)
<- as.data.frame(as.matrix(vegdist(df_wide_cosmo, method = "jaccard"))) %>%
dist_jacc rownames_to_column(var = "start") %>%
pivot_longer(cols = vents_cosmo, names_to = "end") %>%
select(start, end, Jaccard_metric = value)
<- as.data.frame(as.matrix(vegdist(compositions::clr(df_wide_cosmo), method = "euclidean"))) %>%
dist_euc_clr rownames_to_column(var = "start") %>%
pivot_longer(cols = vents_cosmo, names_to = "end") %>%
select(start, end, Euclidean_CLR_metric = value)
<- dist_m %>%
dist_compiled_output_cosmoselect(start, end, meters = value) %>%
left_join(dist_jacc) %>%
left_join(dist_bray) %>%
left_join(dist_euc_clr) %>%
pivot_longer(cols = ends_with("_metric"), names_to = "comm_dist") %>%
filter(!(start == end)) %>%
mutate(km = (meters/1000),
dist0 = gsub("_", " ", comm_dist),
dist = gsub(" metric", "", dist0))
<- dist_compiled_output_cosmo %>%
cosmo_dist_decay filter(dist == "Jaccard") %>%
ggplot(aes(x = km, y = value)) +
geom_jitter() +
scale_x_log10(labels = scales::comma) +
facet_grid(dist ~ ., scales = "free") +
geom_smooth(method="lm") +
theme_linedraw() +
stat_regline_equation(aes(label = ..rr.label..), color = "black", label.y.npc = 0.09, label.x.npc = 0.7) +
theme(strip.background = element_blank(),
strip.text = element_text(color = "black")) +
labs(y = "Jaccard Dissimilarity", title = "", x = "")
Figure S4
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/supp_dist_decay_figS4a-c.svg", h = 10, w = 5)
# var_km + vent_dist_decay + cosmo_dist_decay +patchwork::plot_layout(nrow = 3) + plot_annotation(tag_levels = 'a')
# Execute all commands in clustered-ASV-analysis markdown file to get dist_decay for OTU clustering
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/supp_dist_decay_figS4_all.svg", h = 10, w = 9)
# ((var_km / vent_dist_decay / cosmo_dist_decay + plot_layout(guides = 'keep')) | dist_decay) + plot_layout(guides = 'collect') + plot_annotation(tag_levels = 'a')
# dev.off()
# dev.off()
Isolate taxa key, and manually curated taxonomic groupings.
<- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
alv <- c("Ciliophora")
ciliate
<- insitu_asv_wClass %>%
tax_key_curated select(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species) %>%
distinct() %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) mutate(PHYLUM = case_when(
== "Unknown" ~ paste(SUPERGROUP, "Other"),
Phylum grepl("_X", Phylum) ~ paste(SUPERGROUP, "Other"),
is.na(Phylum) ~ paste(SUPERGROUP, "Other"),
TRUE ~ Phylum
%>%
)) mutate(CLASS = case_when(
== "Unknown" ~ PHYLUM,
Class grepl("_X", Class) ~ PHYLUM,
is.na(Class) ~ Phylum,
grepl("MAST-", Class) ~ "MAST",
TRUE ~ Class
%>%
)) mutate(CLASS2 = case_when(
== Order ~ Order,
CLASS is.na(Order) ~ paste(CLASS, "Other"),
TRUE ~ Order)) %>%
filter(!is.na(Order)) %>%
select(FeatureID, SUPERGROUP, PHYLUM, CLASS, CLASS2) %>%
mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order))
# head(tax_key_curated)
# unique(tax_key_curated$CLASS_2)
<- insitu_asv_wClass %>%
tmp filter(Domain == "Eukaryota") %>%
# filter(CLASS %in% slice) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, distribution = CLASS) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
left_join(tax_key_curated) %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp")) %>%
mutate(VENTNAME = case_when(
== "Axial" ~ paste(SITE, VENT, YEAR, sep = " "),
SITE == "GordaRidge" ~ paste(SITE, VENT, sep = " "),
SITE %in% mcr ~ paste(SITE, VENT, sep = " ")
SITE %>% select(-Sample_tmp) %>%
)) # filter(SAMPLETYPE == "Vent") %>%
group_by(VENTNAME, distribution, SUPERGROUP, PHYLUM, CLASS2) %>%
summarise(TOTAL_SEQ = sum(AVG),
COUNT = n()) # %>% #IF you use this please modify location
## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# filter(!is.na(SUPERGROUP))
head(tmp)
## # A tibble: 6 × 7
## # Groups: VENTNAME, distribution, SUPERGROUP, PHYLUM [2]
## VENTNAME distribution SUPERGROUP PHYLUM CLASS2 TOTAL…¹ COUNT
## <chr> <chr> <chr> <chr> <chr> <dbl> <int>
## 1 Axial Anemone 2013 Vent & background Alveolata-Ci… Cilio… Apost… 24 1
## 2 Axial Anemone 2013 Vent & background Alveolata-Ci… Cilio… Hypot… 20 1
## 3 Axial Anemone 2013 Vent & background Alveolata-Ci… Cilio… Strom… 38 2
## 4 Axial Anemone 2013 Vent & background Alveolata-Di… Dinof… Dino-… 20 1
## 5 Axial Anemone 2013 Vent & background Alveolata-Di… Dinof… Dino-… 96 5
## 6 Axial Anemone 2013 Vent & background Alveolata-Di… Dinof… Dino-… 8 1
## # … with abbreviated variable name ¹TOTAL_SEQ
%>%
tmp filter(distribution == "Vent only") %>%
ggplot(aes(x = VENTNAME, y = CLASS2)) +
geom_point(aes(fill = log(TOTAL_SEQ), size = COUNT), shape = 21) +
scale_size_continuous(range = c(1,18)) +
# scale_shape_manual(values = c(21, 23, 22, 25)) +
facet_grid(.~SUPERGROUP, space = "free", scales = "free") +
theme(axis.text.x = element_text(angle = 90)) +
coord_flip()
<- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
cosmo <- c("Vent only")
res <- as.character(unique(insitu_asv_wClass$CLASS))
all_classes
<- c("Axial")
axial <- c("VonDamm", "Piccard")
mcr <- c("GordaRidge")
gr <- c("Axial", "VonDamm", "Piccard", "GordaRidge") all
<- function(df, slice){
isolate_calc_jaccard <- df %>%
df_out_wide filter(Domain == "Eukaryota") %>%
filter(CLASS %in% slice) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp")) %>%
mutate(VENTNAME = case_when(
== "Axial" ~ paste(SITE, VENT, YEAR, sep = " "),
SITE == "GordaRidge" ~ paste(SITE, VENT, sep = " "),
SITE %in% mcr ~ paste(SITE, VENT, sep = " ")
SITE %>% select(-Sample_tmp) %>%
)) unite(SAMPLEID, SITE, VENT, YEAR, SAMPLETYPE, sep = "-") %>%
unite(sample_full, SAMPLEID, VENTNAME, sep = "--") %>%
group_by(FeatureID, sample_full) %>%
summarise(SUM = sum(AVG)) %>%
pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "sample_full")
}
Apply function to resident population, cosmopolitan population, and whole population
<- isolate_calc_jaccard(insitu_asv_wClass, res) resident
## Warning: Expected 4 pieces. Additional pieces discarded in 10510 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
<- isolate_calc_jaccard(insitu_asv_wClass, cosmo) cosmopolitan
## Warning: Expected 4 pieces. Additional pieces discarded in 12990 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
<- isolate_calc_jaccard(insitu_asv_wClass, all_classes) all_jacc
## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# head(resident)
Plot resident
<- as.data.frame(as.matrix(vegdist(compositions::clr(resident), method = "euclidean"))) %>%
dist_jacc_clr_res rownames_to_column(var = "start") %>%
pivot_longer(cols = row.names(resident), names_to = "end") %>%
select(start, end, Jaccard_CLR_metric = value) %>%
# mutate(VARS = purrr::map2_chr(start, end, ~toString(sort(c(.x, .y))))) %>%
# distinct(VARS, .keep_all = TRUE) %>%
# separate(VARS, c("X", "Y"), sep = ", ") %>%
# select(-start, -end) %>%
# add_column(SUBSET = "RESIDENT") %>%
separate(start, c("Y_FULL", "Y"), sep = "--", remove = FALSE) %>%
separate(Y_FULL, c("Y_SITE", "Y_VENT", "Y_YEAR"), sep = "-", remove = FALSE) %>%
separate(end, c("X_FULL", "X"), sep = "--", remove = FALSE) %>%
separate(X_FULL, c("X_SITE", "X_VENT", "X_YEAR"), sep = "-", remove = FALSE) %>%
ggplot(aes(x = X, y = Y, fill = Jaccard_CLR_metric)) +
geom_tile(color = "black") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
axis.title = element_blank()) +
# scale_fill_gradientn(colors = c("#badbdb", "#dedad2", "#e4bcad", "#df979e", "#d7658b", "#c80064")) +
# scale_fill_gradientn(colors = c("#fff7f3", "#fde0dd", "#fcc5c0", "#fa9fb5", "#f768a1", "#dd3497", "#ae017e", "#7a0177", "#49006a"))
scale_fill_gradientn(colors = c("#ffffff","#f0f0f0","#d9d9d9","#bdbdbd","#969696","#737373","#525252","#252525","#000000")) +
coord_fixed(ratio = 1)
## Warning: Expected 3 pieces. Additional pieces discarded in 576 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Expected 3 pieces. Additional pieces discarded in 576 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
dist_jacc_clr_res
Plot cosmopolitan
<- as.data.frame(as.matrix(vegdist(compositions::clr(cosmopolitan), method = "euclidean"))) %>%
dist_jacc_clr_cos rownames_to_column(var = "start") %>%
pivot_longer(cols = row.names(cosmopolitan), names_to = "end") %>%
select(start, end, Jaccard_CLR_metric = value) %>%
separate(start, c("Y_FULL", "Y"), sep = "--", remove = FALSE) %>%
separate(Y_FULL, c("Y_SITE", "Y_VENT", "Y_YEAR"), sep = "-", remove = FALSE) %>%
separate(end, c("X_FULL", "X"), sep = "--", remove = FALSE) %>%
separate(X_FULL, c("X_SITE", "X_VENT", "X_YEAR"), sep = "-", remove = FALSE) %>%
ggplot(aes(x = X, y = Y, fill = Jaccard_CLR_metric)) +
geom_tile(color = "black") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
axis.title = element_blank()) +
# scale_fill_gradientn(colors = c("#badbdb", "#dedad2", "#e4bcad", "#df979e", "#d7658b", "#c80064")) +
# scale_fill_gradientn(colors = c("#fff7f3", "#fde0dd", "#fcc5c0", "#fa9fb5", "#f768a1", "#dd3497", "#ae017e", "#7a0177", "#49006a"))
scale_fill_gradientn(colors = c("#ffffff","#f0f0f0","#d9d9d9","#bdbdbd","#969696","#737373","#525252","#252525","#000000")) +
coord_fixed(ratio = 1)
## Warning: Expected 3 pieces. Additional pieces discarded in 1225 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Expected 3 pieces. Additional pieces discarded in 1225 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
dist_jacc_clr_cos
Plot all ASVs
<- as.data.frame(as.matrix(vegdist(compositions::clr(all_jacc), method = "euclidean"))) %>%
dist_jacc_clr_all rownames_to_column(var = "start") %>%
pivot_longer(cols = row.names(all_jacc), names_to = "end") %>%
select(start, end, Jaccard_CLR_metric = value) %>%
separate(start, c("Y_FULL", "Y"), sep = "--", remove = FALSE) %>%
separate(Y_FULL, c("Y_SITE", "Y_VENT", "Y_YEAR"), sep = "-", remove = FALSE) %>%
separate(end, c("X_FULL", "X"), sep = "--", remove = FALSE) %>%
separate(X_FULL, c("X_SITE", "X_VENT", "X_YEAR"), sep = "-", remove = FALSE) %>%
ggplot(aes(x = X, y = Y, fill = Jaccard_CLR_metric)) +
geom_tile(color = "black") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
axis.title = element_blank()) +
# scale_fill_gradientn(colors = c("#badbdb", "#dedad2", "#e4bcad", "#df979e", "#d7658b", "#c80064")) +
# scale_fill_gradientn(colors = c("#fff7f3", "#fde0dd", "#fcc5c0", "#fa9fb5", "#f768a1", "#dd3497", "#ae017e", "#7a0177", "#49006a"))
scale_fill_gradientn(colors = c("#ffffff","#f0f0f0","#d9d9d9","#bdbdbd","#969696","#737373","#525252","#252525","#000000")) +
coord_fixed(ratio = 1)
## Warning: Expected 3 pieces. Additional pieces discarded in 1225 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Expected 3 pieces. Additional pieces discarded in 1225 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
dist_jacc_clr_all
<- function(df, slice, tax_slice){
clr_to_plot_maintax <- df %>%
df_out_wide filter(Domain == "Eukaryota") %>%
filter(CLASS %in% slice) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp")) %>%
mutate(VENTNAME = case_when(
== "Axial" ~ paste(SITE, VENT, YEAR, sep = " "),
SITE == "GordaRidge" ~ paste(SITE, VENT, sep = " "),
SITE %in% mcr ~ paste(SITE, VENT, sep = " ")
SITE %>% select(-Sample_tmp) %>%
)) left_join(tax_key_curated) %>%
# Modify Class 2 level with excess text.
mutate(CLASS2 = str_replace_all(CLASS2, "_X", "")) %>%
mutate(CLASS2 = str_replace_all(CLASS2, "_XX", "")) %>%
mutate(CLASS2 = str_replace_all(CLASS2, "X$", "")) %>%
mutate(CLASS2 = str_replace_all(CLASS2, "_[:alnum:]$", "")) %>%
group_by(VENTNAME, SUPERGROUP, PHYLUM, CLASS, CLASS2) %>%
summarise(SUM_VALUE = sum(AVG)) %>%
filter(SUPERGROUP %in% tax_slice) %>%
unite(TAX, SUPERGROUP, PHYLUM, CLASS, CLASS2, sep = "__") %>%
pivot_wider(names_from = TAX, values_from = SUM_VALUE, values_fill = 0) %>%
column_to_rownames(var = "VENTNAME")
# Plot
as.data.frame(compositions::clr(df_out_wide)) %>%
rownames_to_column(var = "VENTNAME") %>%
pivot_longer(cols = names(df_out_wide), values_to = "CLR", names_to = "TAX") %>%
separate(TAX, into = c("SUPERGROUP", "PHYLUM", "CLASS", "CLASS2"), sep = "__") %>%
separate(VENTNAME, into = c("SITE"), sep = " ", remove = FALSE) %>%
ggplot(aes(x = CLASS2, y = VENTNAME, fill = CLR)) +
geom_tile(aes(fill = CLR), color = "black") +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, color = "black",size = 10, face = "bold"),
axis.text.y = element_text(color = "black", size = 10, face = "bold", angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(),
strip.text.y = element_text(hjust = 0, vjust = 0, angle = 45, size = 10, color = "black", face = "bold"),
strip.text.x = element_text(hjust = 1, vjust = 0, angle = 90, size = 10, color = "black", face = "bold"),
legend.title = element_blank(),
strip.placement = "outside",
axis.ticks.y = element_blank()) +
labs(x = "", y = "") +
scale_fill_steps2(
low = "#2166ac",
mid = "white",
high = "#b2182b",
midpoint = 0,
space = "Lab",
na.value = "#4d4d4d",
guide = "coloursteps",
aesthetics = "fill") +
# coord_flip() +
facet_grid(SITE ~ SUPERGROUP + PHYLUM, space = "free", scales = "free", switch = "both")
}
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/fig5b-clr-tile.svg", h = 11, w = 21)
<- c("Alveolata-Ciliophora", "Amoebozoa", "Apusozoa", "Excavata", "Rhizaria-Cercozoa", "Rhizaria-Radiolaria")
maintext_supergroup clr_to_plot_maintax(insitu_asv_wClass, res, maintext_supergroup)
## Warning: Expected 4 pieces. Additional pieces discarded in 10510 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 1 pieces. Additional pieces discarded in 2064 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# dev.off()
unique(tax_key_curated$SUPERGROUP)
## [1] "Alveolata-Ciliophora" "Stramenopiles-Ochrophyta"
## [3] "Rhizaria-Cercozoa" "Stramenopiles-Opalozoa"
## [5] "Opisthokonta" "Rhizaria-Radiolaria"
## [7] "Alveolata-Dinoflagellata" "Stramenopiles-Sagenista"
## [9] "Stramenopiles" "Bacteria_X"
## [11] "Apusozoa" "Archaeplastida"
## [13] "Hacrobia" "Other Alveolata"
## [15] "Amoebozoa" "Excavata"
## [17] "Terrabacteria" "Archaea_X"
## [19] "Protalveolata"
<- c("Stramenopiles-Opalozoa", "Stramenopiles-Sagenista")
mast clr_to_plot_maintax(insitu_asv_wClass, res, mast)
## Warning: Expected 4 pieces. Additional pieces discarded in 10510 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 1 pieces. Additional pieces discarded in 748 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# maintext_supergroup <- c("Alveolata-Ciliophora", "Alveolata-Dinoflagellata", "Rhizaria-Cercozoa", "Rhizaria-Radiolaria")
# clr_to_plot_maintax(insitu_asv_wClass, res, maintext_supergroup)
# maintext_supergroup <- c("Alveolata-Ciliophora", "Other Alveolata", "Amoebozoa", "Apusozoa", "Rhizaria-Cercozoa", "Rhizaria-Radiolaria")
# clr_to_plot_maintax(insitu_asv_wClass, all_classes, maintext_supergroup)
Run on HPC at WHOI.
load("data-input/asv-tables-processed-27012022.RData, verbose = T")
DivNet package - diversity estimation hypothesis testing from Amy Willis’s group. This will also characterize the uncertainty of the richness estimate. Richness estimation is flawed because of sample depth and processing methods.
library(phyloseq); library(breakaway); library(DivNet)
library(tidyverse)
This code block run on HPC.
# Select eukaryotes only and create wide format dataframe
<- asv_insitu_qc %>%
insitu_wide filter(Domain == "Eukaryota") %>%
filter(!grepl("_Plume001_", SAMPLE)) %>% #removing "near vent background", not relevant in other data sets
select(FeatureID, Taxon, SAMPLE, value) %>%
pivot_wider(names_from = SAMPLE, values_from = value, values_fill = 0)
# head(insitu_wide)
<- as.character(colnames(insitu_wide %>% select(-Taxon, -FeatureID)))
insitu_samples # insitu_samples
<- insitu_wide %>%
insitu_tax_matrix select(FeatureID, Taxon) %>%
separate(Taxon, c("Domain", "Supergroup",
"Phylum", "Class", "Order",
"Family", "Genus", "Species"), sep = ";") %>%
column_to_rownames(var = "FeatureID") %>%
as.matrix
## Warning: Expected 8 pieces. Additional pieces discarded in 6222 rows [3, 4, 6,
## 7, 9, 10, 11, 12, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 29, 32, ...].
## Warning: Expected 8 pieces. Missing pieces filled with `NA` in 4264 rows [1, 2,
## 5, 8, 13, 14, 16, 19, 21, 26, 30, 31, 33, 40, 41, 45, 46, 47, 48, 50, ...].
<- insitu_wide %>%
insitu_asv_matrix select(-Taxon) %>%
column_to_rownames(var = "FeatureID") %>%
as.matrix
# Align row names for each matrix
rownames(insitu_tax_matrix) <- row.names(insitu_asv_matrix)
Reformat metadata, but change to numeric.
<- read.csv("data-input/samplelist-metadata.csv", na.strings = "NA")
metadata # ?read.csv()
## Extract relevant metadata information
<- metadata %>%
metadata_formatted mutate_all(as.character) %>%
filter(Sample_or_Control == "Sample") %>%
filter(!(SAMPLETYPE == "Incubation")) %>%
filter(!(SAMPLETYPE == "Microcolonizer")) %>%
select(SAMPLE, VENT, COORDINATES, SITE, SAMPLEID, DEPTH, SAMPLETYPE, YEAR, TEMP = starts_with("TEMP"), pH, PercSeawater = starts_with("Perc"), Mg = starts_with("Mg"), H2 = starts_with("H2."), H2S = starts_with("H2S"), CH4 = starts_with("CH4"), ProkConc, Sample_or_Control)
# Ensure these columns are converted to numeric
<- c("DEPTH", "TEMP", "pH", "PercSeawater", "Mg", "H2", "H2S", "CH4", "ProkConc")
env_params
<- metadata_formatted %>%
metadata_insitu na_if(., "bd") %>% na_if(., "nd") %>% na_if(., "-") %>% na_if(., "") %>% na_if(., "na") %>%
filter(SAMPLE %in% insitu_samples) %>% # from reformatting df above
# select(SAMPLE, VENT, SITE, SAMPLETYPE, YEAR) %>%
unite(SAMPLELABEL, VENT, SITE, SAMPLETYPE, YEAR, sep = "_", remove = FALSE) %>%
unite(TYPE_SITE, SITE, SAMPLETYPE, sep = "_", remove = FALSE) %>%
mutate_at(env_params,as.numeric)
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
rownames(metadata_insitu) <- metadata_insitu$SAMPLE
# str(metadata_insitu)
# View(metadata_insitu)
# head(metadata_insitu)
# row.names(metadata_insitu)
Import taxa and ASV count matrices into phyloseq objects.
# Import asv and tax matrices
= otu_table(insitu_asv_matrix, taxa_are_rows = TRUE)
ASV = tax_table(insitu_tax_matrix)
TAX
<- phyloseq(ASV, TAX)
phylo_obj
# Import metadata as sample data in phyloseq
<- sample_data(metadata_insitu)
samplenames
# join as phyloseq object
= merge_phyloseq(phylo_obj, samplenames)
physeq_insitu
## Check
physeq_insitu
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 10486 taxa and 43 samples ]
## sample_data() Sample Data: [ 43 samples by 19 sample variables ]
## tax_table() Taxonomy Table: [ 10486 taxa by 8 taxonomic ranks ]
# head(insitu_tax_matrix)
# head(metadata_insitu)
save(physeq_insitu, metadata_insitu, file = "phyloseq-objs-180122.RData") #use below for statistical analysis
Focus on genus and species level.
###
<- divnet(tax_glom(physeq_insitu, taxrank = "Genus"), base = 3)
gen_divnet <- divnet(tax_glom(physeq_insitu, taxrank = "Genus"), X = "SAMPLELABEL", base = 3)
gen_divnet_label # Vent vs plume vs background
<- divnet(tax_glom(physeq_insitu, taxrank = "Genus"), X = "SAMPLETYPE", base = 3)
gen_divnet_TYPE # location and vent vs plume vs background
<- divnet(tax_glom(physeq_insitu, taxrank = "Genus"), X = "TYPE_SITE", base = 3)
gen_divnet_TYPE_SITE save(gen_divnet, gen_divnet_label, gen_divnet_TYPE, gen_divnet_TYPE_SITE, file = "GENUS.Rdata")
###
## Analyzed at the ASV level:
<- divnet(tax_glom(physeq_insitu, taxrank = "Species"), base = 3)
spp_divnet
<- divnet(tax_glom(physeq_insitu, taxrank = "Species"), X = "SAMPLELABEL", base = 3)
spp_divnet_label # Vent vs plume vs background
<- divnet(tax_glom(physeq_insitu, taxrank = "Species"), X = "SAMPLETYPE", base = 3)
spp_divnet_TYPE # location and vent vs plume vs background
<- divnet(tax_glom(physeq_insitu, taxrank = "Species"), X = "TYPE_SITE", base = 3)
spp_divnet_TYPE_SITE save(spp_divnet, spp_divnet_label, spp_divnet_TYPE, spp_divnet_TYPE_SITE, file = "SPECIES.Rdata")
Above run on HPC and RData files save so we can look at various levels of species richness.
Function to extract shannon and simpson data from each divnet output.
<- function(df){
fxn_extract_divet $shannon %>% summary %>%
dfpivot_longer(cols = starts_with("estimate"), names_to = "ESTIMATE-shannon", values_to = "Shannon") %>%
pivot_longer(cols = starts_with("error"), names_to = "ERROR-shannon", values_to = "Shannon-error") %>%
pivot_longer(cols = starts_with("lower"), names_to = "LOWER-shannon", values_to = "Shannon-lower") %>%
pivot_longer(cols = starts_with("upper"), names_to = "UPPER-shannon", values_to = "Shannon-upper") %>%
left_join(df$simpson %>% summary %>%
pivot_longer(cols = starts_with("estimate"), names_to = "ESTIMATE-simpson", values_to = "Simpson") %>%
pivot_longer(cols = starts_with("error"), names_to = "ERROR-simpson", values_to = "Simpson-error") %>%
pivot_longer(cols = starts_with("lower"), names_to = "LOWER-simpson", values_to = "Simpson-lower") %>%
pivot_longer(cols = starts_with("upper"), names_to = "UPPER-simpson", values_to = "Simpson-upper"),
by = c("sample_names" = "sample_names")) %>%
left_join(metadata_insitu %>% rownames_to_column(var = "sample_names")) %>%
select(-sample_names, -ends_with("-simpson"), -ends_with("-shannon"), -starts_with("model."), -starts_with("name.")) %>%
distinct()
}
###Function to create plots
### Set colors and symbols for dataset
<- c("Background", "Plume", "Vent")
sampletype_order <- c(21, 23, 24, 22)
site_symbol<- c("Axial", "GordaRidge", "Piccard", "VonDamm")
site_order <- c("#fdbb84", "#31a354", "#ef3b2c", "#02818a")
site_color
###
<- function(df){
plot_sampletype $SITE_ORDER <- factor(df$SITE, levels = site_order)
df$SAMPLETYPE_ORDER <- factor(df$SAMPLETYPE, levels = sampletype_order)
dfnames(site_symbol) <- site_order
plot_grid(df %>%
ggplot(aes(x = SAMPLETYPE_ORDER, y = Shannon, group = SAMPLETYPE_ORDER)) +
geom_violin(color = "#525252", fill = "#525252", alpha = 0.2, width = 0.6, draw_quantiles = c(0.25, 0.5, 0.75)) +
geom_jitter(color = "white", fill = "black", width = 0.2, size = 2, aes(shape = SITE_ORDER)) +
scale_shape_manual(values = site_symbol) +
theme_linedraw() +
theme(axis.text.y = element_text(size = 14),
axis.text.x = element_blank(),
strip.background = element_blank(),
strip.text = element_text(color = "black"),
legend.position = "none",
axis.ticks.x = element_blank()) +
labs(x = "", y = "Shannon"),
%>%
df ggplot(aes(x = SAMPLETYPE_ORDER, y = Simpson, group = SAMPLETYPE_ORDER)) +
geom_violin(color = "#525252", fill = "#525252", alpha = 0.1, width = 0.6, draw_quantiles = c(0.25, 0.5, 0.75)) +
geom_jitter(color = "white", fill = "black", width = 0.2, size = 2.5, aes(shape = SITE_ORDER)) +
scale_shape_manual(values = site_symbol) +
theme_linedraw() +
theme(axis.text.x = element_text(vjust = 1, hjust = 0.5, size = 14),
axis.text = element_text(size = 14),
strip.background = element_blank(),
strip.text = element_blank(),
legend.title = element_blank(),
legend.position = "none") +
labs(x = "", y = "Simpson"),
ncol = 1, axis = c("lrt"), align = c("vh"))
}
Below commands use output from DivNet at different taxonomic levels to determine if pattern of Shannon index is consistent across taxonomic levels, which it appears to be. For final work, will use the species level (ASV level).
load("data-input/SPECIES.Rdata", verbose = T)
## Loading objects:
## spp_divnet
## spp_divnet_label
## spp_divnet_TYPE
## spp_divnet_TYPE_SITE
<- fxn_extract_divet(spp_divnet)
spp_alpha_18s <- fxn_extract_divet(spp_divnet_label)
spp_alpha_label <- fxn_extract_divet(spp_divnet_TYPE)
spp_alpha_TYPE <- fxn_extract_divet(spp_divnet_TYPE_SITE)
spp_alpha_TYPE_SITE
plot_sampletype(spp_alpha_18s)
$SITE_ORDER <- factor(spp_alpha_label$SITE, levels = site_order)
spp_alpha_label$SAMPLETYPE_ORDER <- factor(spp_alpha_label$SAMPLETYPE, levels = sampletype_order)
spp_alpha_labelnames(site_symbol) <- site_order
<- c(21, 23, 24)
sampletype_symbol# names(sampletype_symbol) <- sampletype_order
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/fig4b-shannon.svg", h = 4, w = 5)
%>%
spp_alpha_label ggplot(aes(x = SAMPLETYPE_ORDER, y = Shannon, group = SAMPLETYPE_ORDER)) +
geom_violin(color = "#525252", fill = "#525252", alpha = 0.2, width = 0.6, draw_quantiles = c(0.25, 0.5, 0.75)) +
geom_jitter(color = "white", fill = "black", width = 0.2, size = 2, aes(shape = SITE_ORDER)) +
# scale_fill_manual(values = site_color) +
scale_shape_manual(values = site_symbol) +
theme_linedraw() +
theme(axis.text.y = element_text(size = 12, color = "black", face = "bold"),
axis.text.x = element_text(size = 12, color = "black", face = "bold"),
axis.title = element_text(size = 12, color = "black", face = "bold"),
strip.background = element_blank(),
strip.text = element_text(color = "black", face = "bold"),
legend.position = "none",
# legend.position = "right",
legend.title = element_blank(),
axis.ticks.x = element_blank()) +
labs(x = "", y = "Shannon")
# dev.off()
What vents or samples have the highest Shannon index?
# View(spp_alpha_label)
Visualize with estimator variance included.
# head(spp_alpha_18s)
# View(spp_alpha_label)
# View(spp_alpha_TYPE_SITE)
##
plot_grid(spp_alpha_label %>%
select(VENT, SITE, SAMPLETYPE, starts_with("Shannon"), starts_with("Simpson")) %>%
distinct() %>%
ggplot(aes(x = VENT, y = Shannon, fill = SITE, shape = SAMPLETYPE)) +
geom_errorbar(aes(ymin = `Shannon-lower`, ymax = `Shannon-upper`), color = "#525252", width = 0.2) +
geom_point(color = "#525252", size = 2, aes(fill = SITE, shape = SAMPLETYPE)) +
scale_shape_manual(values = sampletype_symbol) +
scale_fill_manual(values = site_color) +
# coord_flip() +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 0),
panel.background = element_blank(),
strip.background = element_blank(),
strip.text.x = element_text(color = "black", angle = 0, vjust = 0.5, hjust = 0.5),
legend.position = "none",
panel.grid = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
axis.line = element_line()) +
facet_grid(SITE + SAMPLETYPE ~ ., scale = "free", space = "free") +
labs(x = "", y = "Shannon"),
#
%>%
spp_alpha_label select(VENT, SITE, SAMPLETYPE, starts_with("Shannon"), starts_with("Simpson")) %>%
distinct() %>%
ggplot(aes(x = VENT, y = Simpson, fill = SITE, shape = SAMPLETYPE)) +
geom_errorbar(aes(ymin = `Simpson-lower`, ymax = `Simpson-upper`), color = "#525252", width = 0.2) +
geom_point(color = "#525252", size = 2, aes(fill = SITE, shape = SAMPLETYPE)) +
scale_shape_manual(values = sampletype_symbol) +
scale_fill_manual(values = site_color) +
# coord_flip() +
theme_linedraw() +
theme(axis.text.y = element_blank(),
panel.background = element_blank(),
strip.background = element_blank(),
strip.text.x = element_text(color = "black", angle = 0, vjust = 0.5, hjust = 0.5),
legend.position = "none",
panel.grid = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
axis.line.x = element_line()) +
facet_grid(SITE + SAMPLETYPE ~ ., scale = "free", space = "free") +
labs(x = "", y = "Simpson"),
ncol = 2, rel_widths = c(1,0.7))
<- spp_alpha_label %>%
shannon_divnet select(VENT, SITE, SAMPLETYPE, starts_with("Shannon"), starts_with("Simpson")) %>%
distinct() %>%
ggplot(aes(x = VENT, y = Shannon, fill = SITE, shape = SAMPLETYPE)) +
geom_errorbar(aes(ymin = `Shannon-lower`, ymax = `Shannon-upper`), color = "#525252", width = 0.2) +
geom_point(color = "#525252", size = 2, aes(fill = SITE, shape = SAMPLETYPE)) +
scale_shape_manual(values = sampletype_symbol) +
scale_fill_manual(values = site_color) +
# coord_flip() +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
panel.background = element_blank(),
strip.background = element_blank(),
strip.text.x = element_text(color = "black", angle = 0, vjust = 0.5, hjust = 0.5),
legend.position = "none",
panel.grid = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
axis.line = element_line()) +
facet_grid(. ~ SITE + SAMPLETYPE, scale = "free", space = "free") +
labs(x = "", y = "Shannon")
Figure S6.
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/figS6.svg", h=4, w = 9)
shannon_divnet
# dev.off()
Import data frames below and format as phyloseq. Subsample to select ASVs that appear in more than 1 sample and have a total of 100 or more sequences. This leaves 2577 total ASVs across 43 samples.
Run below commands on HPC
load("data-input/asv-tables-processed-27012022.RData", verbose = T)
library(phyloseq)
# library(SpiecEasi)
# Select eukaryotes only and create wide format dataframe
<- asv_insitu_qc %>%
insitu_wide_nosingle filter(Domain == "Eukaryota") %>%
filter(!grepl("_Plume001_", SAMPLE)) %>% #removing "near vent background", not relevant in other data sets
select(FeatureID, Taxon, SAMPLE, value) %>%
pivot_wider(names_from = SAMPLE, values_from = value, values_fill = 0) %>%
mutate(PREVALENCE = rowSums(select_if(., is.numeric) > 0),
SEQ_TOTAL = rowSums(select_if(., is.numeric))) %>%
filter(PREVALENCE >= 1) %>%
filter(SEQ_TOTAL >= 100)
#
# ASVs had to appear in more than 1 sample and have at least 100 sequences
length(unique(insitu_wide_nosingle$FeatureID))
length(unique(asv_insitu_qc$FeatureID))
# head(insitu_wide)
<- as.character(colnames(insitu_wide_nosingle %>% select(-Taxon, -FeatureID)))
insitu_samples
# make matrices for phyloseq
<- insitu_wide_nosingle %>%
insitu_tax_matrix select(FeatureID, Taxon) %>%
separate(Taxon, c("Domain", "Supergroup",
"Phylum", "Class", "Order",
"Family", "Genus", "Species"), sep = ";") %>%
column_to_rownames(var = "FeatureID") %>%
as.matrix
<- insitu_wide_nosingle %>%
insitu_asv_matrix select(-Taxon) %>%
column_to_rownames(var = "FeatureID") %>%
as.matrix
# Align row names for each matrix
rownames(insitu_tax_matrix) <- row.names(insitu_asv_matrix)
<- metadata %>%
metadata_insitu filter(SAMPLE %in% insitu_samples) %>% # from reformatting df above
select(SAMPLE, VENT, SITE, SAMPLETYPE, YEAR) %>%
unite(SAMPLELABEL, VENT, SITE, SAMPLETYPE, YEAR, sep = "_", remove = FALSE) %>%
unite(TYPE_SITE, SITE, SAMPLETYPE, sep = "_", remove = FALSE)
rownames(metadata_insitu) <- metadata_insitu$SAMPLE
# Import asv and tax matrices
= otu_table(insitu_asv_matrix, taxa_are_rows = TRUE)
ASV = tax_table(insitu_tax_matrix)
TAX
<- phyloseq(ASV, TAX)
phylo_obj
# Import metadata as sample data in phyloseq
<- sample_data(metadata_insitu)
samplenames
## Check
physeq_insitu
Run SPIEC-EASI with phyloseq object.
# Run spiec easi with glasso
<- list(rep.num = 50, seed = 10010, ncores = 10)
pargs2 <- spiec.easi(physeq_insitu, method = 'glasso', lambda.min.ratio=1e-2, nlambda=20,pulsar.params=pargs2)
spec_glasso_microeuk
# save(spec_glasso_microeuk, file = "spiec-easi-output-03-12-21.RData")
Isolate ASV-ASV pairs of interest
# load("spiec-easi-output-03-12-21.RData", verbose = T) # almost 5GB file!
getStability(spec_glasso_microeuk) # Target == 0.05
# [1] 0.03827056
sum(getRefit(spec_glasso_microeuk))/2
# [1] 45904.5
# spec_glasso_microeuk
# Pulsar-selected refit of sparseiCov
# Path length: 20
# Graph dim: 2577
# Criterion:
# stars... sparsity 0.0138
Extract weighted matrix
# se_beta <- as.matrix(symBeta(getOptBeta(spec_glasso_microeuk)))
# df_beta <- as.data.frame(se_beta)
# Extract weight information
<- cov2cor(as.matrix(getOptCov(spec_glasso_microeuk)))
glasso_weight
colnames(glasso_weight) <- rownames(glasso_weight)# <- colnames(Networ_taxa_DF_WideMat) #here i may be able to give the verticies the taxa names if i feed it the vector of names from the levels.
<- glasso_weight*getRefit(spec_glasso_microeuk)
weighted_adj_mat <- as.data.frame(as.matrix(weighted_adj_mat))
df_weighted # Assign column and row names - from original glasso output matrix data
colnames(df_weighted) <- colnames(spec_glasso_microeuk$est$data)
row.names(df_weighted) <- colnames(spec_glasso_microeuk$est$data)
Work with weighted dataframe
<- insitu_asv_wClass %>%
key select(FeatureID, Taxon, Domain:Species, CLASS, SITE_CLASS) %>%
distinct()
# head(key)
<- df_weighted %>%
df_spieceasi rownames_to_column(var = "sideA") %>%
pivot_longer(cols = -sideA, names_to = "sideB") %>%
left_join(key, by = c(sideA = "FeatureID")) %>%
left_join(key, by = c(sideB = "FeatureID"), suffix = c("_sideA", "_sideB")) %>%
distinct()
# 6640929 total interactions
## 6 million edged?
<- df_spieceasi %>%
df_spieceasi_filtered filter(abs(value) > 0.01) %>%
mutate(Interaction = case_when(
< 0 ~ "negative",
value > 0 ~ "positive"
value
))# Leaves 91,288 interactions
## Interaction type
# negative positive
# 3363 87925
save(df_spieceasi_filtered, file = "filtered-spieceasi-result-08122021.RData")
load("data-input/filtered-spieceasi-result-08122021.RData", verbose = TRUE)
## Loading objects:
## df_spieceasi_filtered
H0: the majority of protist-protist pairs will reveal host-parasite interactions, and then predator-prey.
Questions to ask regarding the SPIEC EASI results. - What is the overall taxonomic composition of negative and positive co-occurring ASVs? - What percentage of putative interactions include likely parasitic protists?
Format, get summary stats from network analyss.
# head(df_spieceasi_filtered)
<- df_spieceasi_filtered %>%
spieceasi_rm_reps mutate(TMP_ASV_REP = purrr::map2_chr(sideA, sideB, ~toString(sort(c(.x, .y))))) %>%
select(TMP_ASV_REP, value, Interaction) %>%
group_by(TMP_ASV_REP, value, Interaction) %>%
distinct() %>%
ungroup() %>%
separate(TMP_ASV_REP, c("sideA", "sideB"), sep = ", ") %>%
left_join((select(df_spieceasi_filtered, ends_with("sideA")) %>% distinct())) %>%
left_join((select(df_spieceasi_filtered, ends_with("sideB")) %>% distinct()))
head(spieceasi_rm_reps)
## # A tibble: 6 × 26
## sideA sideB value Inter…¹ Taxon…² Domai…³ Super…⁴ Phylu…⁵ Class…⁶ Order…⁷
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 000ee3774… 0413… 0.449 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 2 000ee3774… 0769… 0.395 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 3 000ee3774… 15d8… 0.449 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 4 000ee3774… 1849… 0.432 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 5 000ee3774… 1a9b… 0.443 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 6 000ee3774… 1c51… 0.416 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## # … with 16 more variables: Family_sideA <chr>, Genus_sideA <chr>,
## # Species_sideA <chr>, CLASS_sideA <chr>, SITE_CLASS_sideA <chr>,
## # Taxon_sideB <chr>, Domain_sideB <chr>, Supergroup_sideB <chr>,
## # Phylum_sideB <chr>, Class_sideB <chr>, Order_sideB <chr>,
## # Family_sideB <chr>, Genus_sideB <chr>, Species_sideB <chr>,
## # CLASS_sideB <chr>, SITE_CLASS_sideB <chr>, and abbreviated variable names
## # ¹Interaction, ²Taxon_sideA, ³Domain_sideA, ⁴Supergroup_sideA, …
Look at highest percentage of ASV-ASV pairs by various categories.
# Get stats
<- dim(spieceasi_rm_reps)[1]
totaloccur
%>%
spieceasi_rm_reps unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
group_by(CLASS_joined, Interaction) %>%
summarise(COUNT = n(),
PERC = 100*(COUNT/totaloccur)) %>%
arrange(desc(PERC))
## # A tibble: 63 × 4
## # Groups: CLASS_joined [43]
## CLASS_joined Interaction COUNT PERC
## <chr> <chr> <int> <dbl>
## 1 Vent only-Vent only positive 16727 36.6
## 2 Vent, plume, & background-Vent, plume, & background positive 7076 15.5
## 3 Vent only-Vent & background positive 1518 3.32
## 4 Vent & background-Vent only positive 1435 3.14
## 5 Vent only-Vent, plume, & background positive 1319 2.89
## 6 Vent, plume, & background-Vent only positive 1180 2.58
## 7 Plume only-Plume only positive 1099 2.41
## 8 Vent & plume-Vent only positive 1090 2.39
## 9 Vent only-Vent & plume positive 932 2.04
## 10 Vent & plume-Vent, plume, & background positive 897 1.96
## # … with 53 more rows
ASV-ASV pairs were primarily among ASVs classified as ‘Vent only’ - both ASVs were resident. 36%
Secondly, 15% of the ASV pairs were from ASVs specifically found in all sample types.
Again, mostly all positive. The highest occurence of negative interactions were between Vent only- Vent, plume, and background.
<- c("Vent only-Vent only", "Vent, plume, & background-Vent, plume, & background")
asv_asv
# Get stats
<- dim(spieceasi_rm_reps)[1]
totaloccur
%>%
spieceasi_rm_reps unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
filter(CLASS_joined %in% asv_asv) %>%
group_by(CLASS_joined) %>%
mutate(TOTALCOUNT = n()) %>%
group_by(CLASS_joined, PHYLUM_joined, Interaction, TOTALCOUNT) %>%
summarise(COUNT = n(),
PERC = 100*(COUNT/TOTALCOUNT)) %>%
ungroup() %>%
arrange(desc(PERC)) %>%
distinct() %>%
select(CLASS_joined, PHYLUM_joined, PERC, Interaction) %>%
pivot_wider(names_from = CLASS_joined, values_from = PERC)
## # A tibble: 721 × 4
## PHYLUM_joined Interaction `Vent only-Vent only` Vent, plume…¹
## <chr> <chr> <dbl> <dbl>
## 1 Ciliophora-Ciliophora positive 15.0 2.80
## 2 Dinoflagellata-Dinoflagellata positive 2.95 12.6
## 3 Dinoflagellata-Radiolaria positive 0.837 5.56
## 4 Radiolaria-Dinoflagellata positive 0.705 5.41
## 5 Radiolaria-Radiolaria positive 0.209 3.74
## 6 Dinoflagellata-Ciliophora positive 3.34 3.39
## 7 Ciliophora-Dinoflagellata positive 2.86 2.87
## 8 Cercozoa-Ciliophora positive 2.43 0.0519
## 9 Ciliophora-Cercozoa positive 2.20 0.182
## 10 Haptophyta-Haptophyta positive 1.97 0.441
## # … with 711 more rows, and abbreviated variable name
## # ¹`Vent, plume, & background-Vent, plume, & background`
Within ‘Vent only-Vent only’ ASV pairs, almost 15% were among ciliates (ciliate-ciliate), while for the ‘Vent, plume, & backgroun’ ASV pairs, almost 13% were between dinoflagellates.
Isolate the ASV-ASV pairs that appeared most frequently. These include ciliates, dinoflagellates, and radiolaria within the resident and cosmopolitan (the latter includes ASVs that appears at least once ALL sample types).
<- c("Dinoflagellata-Radiolaria", "Radiolaria-Dinoflagellata", "Radiolaria-Radiolaria")
rad_dino
<- spieceasi_rm_reps %>%
ciliate_ciliate unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
filter(CLASS_joined %in% asv_asv) %>%
filter(PHYLUM_joined == "Ciliophora-Ciliophora")
<- spieceasi_rm_reps %>%
dino_dino unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
filter(CLASS_joined %in% asv_asv) %>%
filter(PHYLUM_joined == "Dinoflagellata-Dinoflagellata")
<- spieceasi_rm_reps %>%
rad_dino unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
filter(CLASS_joined %in% asv_asv) %>%
filter(PHYLUM_joined %in% rad_dino)
<- dim(spieceasi_rm_reps)[1]
total head(dino_dino)
## # A tibble: 6 × 25
## sideA sideB value Inter…¹ Taxon…² Domai…³ Super…⁴ PHYLU…⁵ Phylu…⁶ Class…⁷
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 00878368… 6e00… 0.0964 positi… Eukary… Eukary… Alveol… Dinofl… Dinofl… Syndin…
## 2 00878368… 7fc5… 0.0947 positi… Eukary… Eukary… Alveol… Dinofl… Dinofl… Syndin…
## 3 00d8341a… 0177… 0.450 positi… Eukary… Eukary… Alveol… Dinofl… Dinofl… Syndin…
## 4 00d8341a… 1ab8… 0.449 positi… Eukary… Eukary… Alveol… Dinofl… Dinofl… Syndin…
## 5 00d8341a… 4029… 0.449 positi… Eukary… Eukary… Alveol… Dinofl… Dinofl… Syndin…
## 6 00d8341a… 60d4… 0.449 positi… Eukary… Eukary… Alveol… Dinofl… Dinofl… Syndin…
## # … with 15 more variables: Order_sideA <chr>, Family_sideA <chr>,
## # Genus_sideA <chr>, Species_sideA <chr>, CLASS_joined <chr>,
## # SITE_CLASS_joined <chr>, Taxon_sideB <chr>, Domain_sideB <chr>,
## # Supergroup_sideB <chr>, Phylum_sideB <chr>, Class_sideB <chr>,
## # Order_sideB <chr>, Family_sideB <chr>, Genus_sideB <chr>,
## # Species_sideB <chr>, and abbreviated variable names ¹Interaction,
## # ²Taxon_sideA, ³Domain_sideA, ⁴Supergroup_sideA, ⁵PHYLUM_joined, …
dim(dino_dino)[1]/total) * 100 (
## [1] 3.346547
dim(ciliate_ciliate)[1]/total) * 100 (
## [1] 5.959395
<- (spieceasi_rm_reps %>% unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-")) by_phylum
Isolate associations with ciliates
# head(ciliate_ciliate)
# table(ciliate_ciliate$SITE_CLASS_joined)
# table(ciliate_ciliate
%>%
ciliate_ciliate filter(CLASS_joined == "Vent only-Vent only") %>%
# filter(!is.na(Phylum_sideA) | !is.na(Phylum_sideB)) %>%
ggplot(aes(x = Taxon_sideA, y = Taxon_sideB, fill = value)) +
geom_tile(color = "black") +
scale_fill_gradient(low = "#bfd3e6", high = "#810f7c") +
theme_linedraw() +
facet_grid(Class_sideB + Order_sideB ~ Class_sideA + Order_sideA, space = "free", scales = "free") +
theme(axis.text = element_blank(),
panel.grid.minor = element_blank(),
strip.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5),
strip.text = element_text(color = "black", face = "bold"),
strip.background = element_blank())
Isolate functional traits from these groups of ciliates
# order <- c("Plagiopylea", "Nassophorea", "Litostomatea")
# family <- c("Scuticociliatia", "Euplotia", "Suctoria", "Peritrichia")
# genus <- c("Strombidiidae")
Tile plot of vent-only (resident) ciliate-ciliate ASV co-occurrences.
# head(ciliate_ciliate)
# hist(ciliate_ciliate$value)
%>%
ciliate_ciliate filter(value >= 0.4) %>%
select(starts_with(c("Class_", "Order_")), PHYLUM_joined) %>%
select(-CLASS_joined) %>%
# select(Taxon_sideA, Taxon_sideB, PHYLUM_joined) %>%
unite(SIDEA, Class_sideA, Order_sideA, sep = ";", remove = FALSE) %>%
unite(SIDEB, Class_sideB, Order_sideB, sep = ";", remove = FALSE) %>%
group_by(SIDEA, SIDEB, Class_sideA, Class_sideB) %>%
summarise(COUNT = n()) %>%
ggplot(aes(x = SIDEA, y = SIDEB, fill = COUNT)) +
geom_tile(color = "black") +
scale_fill_gradient(low = "#bfd3e6", high = "#810f7c") +
theme_linedraw() +
facet_grid(Class_sideB ~ Class_sideA, space = "free", scales = "free") +
theme(panel.grid.minor = element_blank(),
strip.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5),
strip.text = element_text(color = "black", face = "bold"),
strip.background = element_blank(),
axis.title = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5))
%>%
dino_dino filter(CLASS_joined == "Vent only-Vent only") %>%
# filter(!is.na(Phylum_sideA) | !is.na(Phylum_sideB)) %>%
ggplot(aes(x = Taxon_sideA, y = Taxon_sideB, fill = value)) +
geom_tile(color = "black") +
scale_fill_gradient(low = "#bfd3e6", high = "#810f7c") +
theme_linedraw() +
facet_grid(Class_sideB + Order_sideB ~ Class_sideA + Order_sideA, space = "free", scales = "free") +
theme(axis.text = element_blank(),
panel.grid.minor = element_blank(),
strip.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5),
strip.text = element_text(color = "black", face = "bold"),
strip.background = element_blank())
Dinos and others Get stats on Ciliate-other ASV pairs for a table
%>%
rad_dino filter(CLASS_joined == "Vent only-Vent only") %>%
# filter(!is.na(Phylum_sideA) | !is.na(Phylum_sideB)) %>%
ggplot(aes(x = Taxon_sideA, y = Taxon_sideB, fill = value)) +
geom_tile(color = "black") +
scale_fill_gradient(low = "#bfd3e6", high = "#810f7c") +
theme_linedraw() +
facet_grid(Class_sideB + Order_sideB ~ Class_sideA + Order_sideA, space = "free", scales = "free") +
theme(axis.text = element_blank(),
panel.grid.minor = element_blank(),
strip.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5),
strip.text = element_text(color = "black", face = "bold"),
strip.background = element_blank())
Information for table 3.
head(spieceasi_rm_reps)
## # A tibble: 6 × 26
## sideA sideB value Inter…¹ Taxon…² Domai…³ Super…⁴ Phylu…⁵ Class…⁶ Order…⁷
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 000ee3774… 0413… 0.449 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 2 000ee3774… 0769… 0.395 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 3 000ee3774… 15d8… 0.449 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 4 000ee3774… 1849… 0.432 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 5 000ee3774… 1a9b… 0.443 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 6 000ee3774… 1c51… 0.416 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## # … with 16 more variables: Family_sideA <chr>, Genus_sideA <chr>,
## # Species_sideA <chr>, CLASS_sideA <chr>, SITE_CLASS_sideA <chr>,
## # Taxon_sideB <chr>, Domain_sideB <chr>, Supergroup_sideB <chr>,
## # Phylum_sideB <chr>, Class_sideB <chr>, Order_sideB <chr>,
## # Family_sideB <chr>, Genus_sideB <chr>, Species_sideB <chr>,
## # CLASS_sideB <chr>, SITE_CLASS_sideB <chr>, and abbreviated variable names
## # ¹Interaction, ²Taxon_sideA, ³Domain_sideA, ⁴Supergroup_sideA, …
<- spieceasi_rm_reps %>%
freq unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
filter(!(is.na(Phylum_sideA)) | !is.na(Phylum_sideB)) %>%
unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
group_by(CLASS_joined, PHYLUM_joined, Taxon_sideA, Taxon_sideB) %>%
summarise(FREQUENCY = n()) %>%
arrange(desc(FREQUENCY)) %>%
filter(FREQUENCY > 2)
# View(freq %>% filter(grepl("Cercozoa-", PHYLUM_joined)))
# View(freq %>% filter(grepl("Haptophyta-", PHYLUM_joined)))
# View(freq %>% filter(grepl("Opalozoa-", PHYLUM_joined) | grepl("Pseudofungi-", PHYLUM_joined)))
# View(freq %>% filter(grepl("Radiolaria-", PHYLUM_joined)))
# range(freq$FREQUENCY)
Import phyloseq-saved RObjects.
# library(tidyverse); library(phyloseq); library(compositions); library(vegan)
load("phyloseq-objs-180122.RData", verbose = TRUE)
## Loading objects:
## physeq_insitu
## metadata_insitu
Isolate ASV count table using phyloseq commands and perform a CLR transformation.
# Extract phyloseq object & CLR transform
<- compositions::clr(otu_table(physeq_insitu))
count_table_clr # sample_data(physeq_insitu)
clr()
?<- otu_table(physeq_insitu) count_table_notrans
Check all samples considered and estimate distance matrix (Euclidean)
# transpose and make dist matrix
sample_data(physeq_insitu)
## SAMPLE
## GordaRidge_Vent039_SUPRS1_2019 GordaRidge_Vent039_SUPRS1_2019
## GordaRidge_Vent040_SUPRS2_2019 GordaRidge_Vent040_SUPRS2_2019
## GordaRidge_BSW081_sterivex_2019 GordaRidge_BSW081_sterivex_2019
## Axial_Boca_FS905_2013 Axial_Boca_FS905_2013
## Axial_Skadi_FS910_2014 Axial_Skadi_FS910_2014
## GordaRidge_BSW056_sterivex_2019_REPb GordaRidge_BSW056_sterivex_2019_REPb
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021
## Axial_N3Area_FS898_2013 Axial_N3Area_FS898_2013
## GordaRidge_Vent088_SUPRS3_2019 GordaRidge_Vent088_SUPRS3_2019
## GordaRidge_Plume036_sterivex_2019_REPb GordaRidge_Plume036_sterivex_2019_REPb
## GordaRidge_Plume096_sterivex_2019 GordaRidge_Plume096_sterivex_2019
## GordaRidge_Vent105_SUPRS9_2019 GordaRidge_Vent105_SUPRS9_2019
## Axial_ElGuapo_FS896_2013 Axial_ElGuapo_FS896_2013
## Axial_Marker33_FS908_2014 Axial_Marker33_FS908_2014
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021
## GordaRidge_Vent041_SUPRS3_2019 GordaRidge_Vent041_SUPRS3_2019
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021
## Axial_Anemone_FS891_2013 Axial_Anemone_FS891_2013
## Axial_Marker113_FS915_2015 Axial_Marker113_FS915_2015
## Axial_Marker33_FS904_2013 Axial_Marker33_FS904_2013
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021
## GordaRidge_Vent086_SUPRS1_2019 GordaRidge_Vent086_SUPRS1_2019
## Axial_AnemonePlume_AnemonePlume_2015 Axial_AnemonePlume_AnemonePlume_2015
## Axial_Marker113_FS903_2013 Axial_Marker113_FS903_2013
## Axial_Marker113_FS906_2014 Axial_Marker113_FS906_2014
## GordaRidge_Vent009_SUPRS1_2019 GordaRidge_Vent009_SUPRS1_2019
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021
## GordaRidge_Vent011_SUPRS3_2019 GordaRidge_Vent011_SUPRS3_2019
## Axial_BSW1500m_BSW1500m_2015 Axial_BSW1500m_BSW1500m_2015
## GordaRidge_Vent106_SUPRS10_2019 GordaRidge_Vent106_SUPRS10_2019
## GordaRidge_Vent107_SUPRS11_2019 GordaRidge_Vent107_SUPRS11_2019
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021
## GordaRidge_Vent010_SUPRS2_2019 GordaRidge_Vent010_SUPRS2_2019
## GordaRidge_Vent087_SUPRS2_2019 GordaRidge_Vent087_SUPRS2_2019
## Axial_Skadi_FS902_2013 Axial_Skadi_FS902_2013
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021
## SAMPLELABEL
## GordaRidge_Vent039_SUPRS1_2019 Venti Latte_GordaRidge_Vent_2019
## GordaRidge_Vent040_SUPRS2_2019 Venti Latte_GordaRidge_Vent_2019
## GordaRidge_BSW081_sterivex_2019 Shallow seawater_GordaRidge_Background_2019
## Axial_Boca_FS905_2013 Boca_Axial_Vent_2013
## Axial_Skadi_FS910_2014 Escargot_Axial_Vent_2014
## GordaRidge_BSW056_sterivex_2019_REPb Deep seawater_GordaRidge_Background_2019
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021 Rav2_VonDamm_Vent_2020
## Axial_N3Area_FS898_2013 N3Area_Axial_Vent_2013
## GordaRidge_Vent088_SUPRS3_2019 Candelabra_GordaRidge_Vent_2019
## GordaRidge_Plume036_sterivex_2019_REPb Candelabra Plume_GordaRidge_Plume_2019
## GordaRidge_Plume096_sterivex_2019 Mt Edwards Plume_GordaRidge_Plume_2019
## GordaRidge_Vent105_SUPRS9_2019 Sir Ventsalot_GordaRidge_Vent_2019
## Axial_ElGuapo_FS896_2013 El Guapo_Axial_Vent_2013
## Axial_Marker33_FS908_2014 Marker33_Axial_Vent_2014
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021 Rav2_VonDamm_Vent_2020
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021 Plume_VonDamm_Plume_2020
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021 OldManTree_VonDamm_Vent_2020
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021 ArrowLoop_VonDamm_Vent_2020
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 Shrimpocalypse_Piccard_Vent_2020
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021 LotsOShrimp_Piccard_Vent_2020
## GordaRidge_Vent041_SUPRS3_2019 Venti Latte_GordaRidge_Vent_2019
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021 ShrimpHole_VonDamm_Vent_2020
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021 X18_VonDamm_Vent_2020
## Axial_Anemone_FS891_2013 Anemone_Axial_Vent_2013
## Axial_Marker113_FS915_2015 Marker113_Axial_Vent_2015
## Axial_Marker33_FS904_2013 Marker33_Axial_Vent_2013
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021 BSW_Piccard_Background_2020
## GordaRidge_Vent086_SUPRS1_2019 Candelabra_GordaRidge_Vent_2019
## Axial_AnemonePlume_AnemonePlume_2015 Anemone Plume_Axial_Plume_2015
## Axial_Marker113_FS903_2013 Marker113_Axial_Vent_2013
## Axial_Marker113_FS906_2014 Marker113_Axial_Vent_2014
## GordaRidge_Vent009_SUPRS1_2019 Mt Edwards_GordaRidge_Vent_2019
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021 BSW_VonDamm_Background_2020
## GordaRidge_Vent011_SUPRS3_2019 Mt Edwards_GordaRidge_Vent_2019
## Axial_BSW1500m_BSW1500m_2015 Deep seawater_Axial_Background_2015
## GordaRidge_Vent106_SUPRS10_2019 Sir Ventsalot_GordaRidge_Vent_2019
## GordaRidge_Vent107_SUPRS11_2019 Sir Ventsalot_GordaRidge_Vent_2019
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021 Plume_Piccard_Plume_2020
## GordaRidge_Vent010_SUPRS2_2019 Mt Edwards_GordaRidge_Vent_2019
## GordaRidge_Vent087_SUPRS2_2019 Candelabra_GordaRidge_Vent_2019
## Axial_Skadi_FS902_2013 Skadi_Axial_Vent_2013
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021 WhiteCastle_VonDamm_Vent_2020
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021 MustardStand_VonDamm_Vent_2020
## VENT
## GordaRidge_Vent039_SUPRS1_2019 Venti Latte
## GordaRidge_Vent040_SUPRS2_2019 Venti Latte
## GordaRidge_BSW081_sterivex_2019 Shallow seawater
## Axial_Boca_FS905_2013 Boca
## Axial_Skadi_FS910_2014 Escargot
## GordaRidge_BSW056_sterivex_2019_REPb Deep seawater
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021 Rav2
## Axial_N3Area_FS898_2013 N3Area
## GordaRidge_Vent088_SUPRS3_2019 Candelabra
## GordaRidge_Plume036_sterivex_2019_REPb Candelabra Plume
## GordaRidge_Plume096_sterivex_2019 Mt Edwards Plume
## GordaRidge_Vent105_SUPRS9_2019 Sir Ventsalot
## Axial_ElGuapo_FS896_2013 El Guapo
## Axial_Marker33_FS908_2014 Marker33
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021 Rav2
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021 Plume
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021 OldManTree
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021 ArrowLoop
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 Shrimpocalypse
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021 LotsOShrimp
## GordaRidge_Vent041_SUPRS3_2019 Venti Latte
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021 ShrimpHole
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021 X18
## Axial_Anemone_FS891_2013 Anemone
## Axial_Marker113_FS915_2015 Marker113
## Axial_Marker33_FS904_2013 Marker33
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021 BSW
## GordaRidge_Vent086_SUPRS1_2019 Candelabra
## Axial_AnemonePlume_AnemonePlume_2015 Anemone Plume
## Axial_Marker113_FS903_2013 Marker113
## Axial_Marker113_FS906_2014 Marker113
## GordaRidge_Vent009_SUPRS1_2019 Mt Edwards
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021 BSW
## GordaRidge_Vent011_SUPRS3_2019 Mt Edwards
## Axial_BSW1500m_BSW1500m_2015 Deep seawater
## GordaRidge_Vent106_SUPRS10_2019 Sir Ventsalot
## GordaRidge_Vent107_SUPRS11_2019 Sir Ventsalot
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021 Plume
## GordaRidge_Vent010_SUPRS2_2019 Mt Edwards
## GordaRidge_Vent087_SUPRS2_2019 Candelabra
## Axial_Skadi_FS902_2013 Skadi
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021 WhiteCastle
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021 MustardStand
## COORDINATES
## GordaRidge_Vent039_SUPRS1_2019 42.7548145 N 126.7088945 W
## GordaRidge_Vent040_SUPRS2_2019 42.754858 N 126.708922 W
## GordaRidge_BSW081_sterivex_2019 42.7546 N 126.743 W
## Axial_Boca_FS905_2013 45.927692 N 129.982482 W
## Axial_Skadi_FS910_2014 45.9264 N, 129.9791 W
## GordaRidge_BSW056_sterivex_2019_REPb 42.76060928 N 126.7047891 W
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021 18.375112, -81.797180
## Axial_N3Area_FS898_2013 45.943716 N 129.985163 W
## GordaRidge_Vent088_SUPRS3_2019 42.75503414 N 126.7094585 W
## GordaRidge_Plume036_sterivex_2019_REPb 42.7551105 N 126.709442 W
## GordaRidge_Plume096_sterivex_2019 42.75465646 N 126.7091669 W
## GordaRidge_Vent105_SUPRS9_2019 42.761202 N 126.7054775 W
## Axial_ElGuapo_FS896_2013 45.926575 N 129.979479 W
## Axial_Marker33_FS908_2014 45.9332 N 129.9822 W
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021 18.375254, -81.797176
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021 18.377600, -81.799317
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021 18.375069, -81.797678
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021 18.376659, -81.797986
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 18.546674, -81.717806
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021 18.546789, -81.718356
## GordaRidge_Vent041_SUPRS3_2019 42.754858 N 126.708922 W
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021 18.374893, -81.797441
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021 18.374810, -81.797411
## Axial_Anemone_FS891_2013 45.9332 N 130.0137 W
## Axial_Marker113_FS915_2015 45.9227 N 129.9882 W
## Axial_Marker33_FS904_2013 45.9332 N 129.9822 W
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021 18.547980, -81.718180
## GordaRidge_Vent086_SUPRS1_2019 42.75506794 N 126.709613 W
## Axial_AnemonePlume_AnemonePlume_2015 45.9335667 N 130.013667 W
## Axial_Marker113_FS903_2013 45.9227 N 129.9882 W
## Axial_Marker113_FS906_2014 45.9227 N 129.9882 W
## GordaRidge_Vent009_SUPRS1_2019 42.75464576 N 126.7090451 W
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021 18.374183, -81.781533
## GordaRidge_Vent011_SUPRS3_2019 42.754692 N 126.7090115 W
## Axial_BSW1500m_BSW1500m_2015 46.27389 N 129.79548 W
## GordaRidge_Vent106_SUPRS10_2019 42.76131802 N 126.7054541 W
## GordaRidge_Vent107_SUPRS11_2019 42.76131802 N 126.7054541 W
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021 18.546767, -81.718200
## GordaRidge_Vent010_SUPRS2_2019 42.754692 N 126.7090115 W
## GordaRidge_Vent087_SUPRS2_2019 42.75503414 N 126.7094585 W
## Axial_Skadi_FS902_2013 45.923383 N 129.982853 W
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021 18.377005, -81.798088
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021 18.375130, -81.797488
## TYPE_SITE
## GordaRidge_Vent039_SUPRS1_2019 GordaRidge_Vent
## GordaRidge_Vent040_SUPRS2_2019 GordaRidge_Vent
## GordaRidge_BSW081_sterivex_2019 GordaRidge_Background
## Axial_Boca_FS905_2013 Axial_Vent
## Axial_Skadi_FS910_2014 Axial_Vent
## GordaRidge_BSW056_sterivex_2019_REPb GordaRidge_Background
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021 VonDamm_Vent
## Axial_N3Area_FS898_2013 Axial_Vent
## GordaRidge_Vent088_SUPRS3_2019 GordaRidge_Vent
## GordaRidge_Plume036_sterivex_2019_REPb GordaRidge_Plume
## GordaRidge_Plume096_sterivex_2019 GordaRidge_Plume
## GordaRidge_Vent105_SUPRS9_2019 GordaRidge_Vent
## Axial_ElGuapo_FS896_2013 Axial_Vent
## Axial_Marker33_FS908_2014 Axial_Vent
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021 VonDamm_Vent
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021 VonDamm_Plume
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021 VonDamm_Vent
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021 VonDamm_Vent
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 Piccard_Vent
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021 Piccard_Vent
## GordaRidge_Vent041_SUPRS3_2019 GordaRidge_Vent
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021 VonDamm_Vent
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021 VonDamm_Vent
## Axial_Anemone_FS891_2013 Axial_Vent
## Axial_Marker113_FS915_2015 Axial_Vent
## Axial_Marker33_FS904_2013 Axial_Vent
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021 Piccard_Background
## GordaRidge_Vent086_SUPRS1_2019 GordaRidge_Vent
## Axial_AnemonePlume_AnemonePlume_2015 Axial_Plume
## Axial_Marker113_FS903_2013 Axial_Vent
## Axial_Marker113_FS906_2014 Axial_Vent
## GordaRidge_Vent009_SUPRS1_2019 GordaRidge_Vent
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021 VonDamm_Background
## GordaRidge_Vent011_SUPRS3_2019 GordaRidge_Vent
## Axial_BSW1500m_BSW1500m_2015 Axial_Background
## GordaRidge_Vent106_SUPRS10_2019 GordaRidge_Vent
## GordaRidge_Vent107_SUPRS11_2019 GordaRidge_Vent
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021 Piccard_Plume
## GordaRidge_Vent010_SUPRS2_2019 GordaRidge_Vent
## GordaRidge_Vent087_SUPRS2_2019 GordaRidge_Vent
## Axial_Skadi_FS902_2013 Axial_Vent
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021 VonDamm_Vent
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021 VonDamm_Vent
## SITE SAMPLEID
## GordaRidge_Vent039_SUPRS1_2019 GordaRidge Vent039
## GordaRidge_Vent040_SUPRS2_2019 GordaRidge Vent040
## GordaRidge_BSW081_sterivex_2019 GordaRidge BSW081
## Axial_Boca_FS905_2013 Axial FS905
## Axial_Skadi_FS910_2014 Axial FS910
## GordaRidge_BSW056_sterivex_2019_REPb GordaRidge BSW056
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021 VonDamm <NA>
## Axial_N3Area_FS898_2013 Axial FS898
## GordaRidge_Vent088_SUPRS3_2019 GordaRidge Vent088
## GordaRidge_Plume036_sterivex_2019_REPb GordaRidge Plume036
## GordaRidge_Plume096_sterivex_2019 GordaRidge Plume096
## GordaRidge_Vent105_SUPRS9_2019 GordaRidge Vent105
## Axial_ElGuapo_FS896_2013 Axial FS896
## Axial_Marker33_FS908_2014 Axial FS908
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021 VonDamm <NA>
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021 VonDamm <NA>
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021 VonDamm <NA>
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021 VonDamm <NA>
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 Piccard <NA>
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021 Piccard <NA>
## GordaRidge_Vent041_SUPRS3_2019 GordaRidge Vent041
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021 VonDamm <NA>
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021 VonDamm <NA>
## Axial_Anemone_FS891_2013 Axial FS891
## Axial_Marker113_FS915_2015 Axial FS915
## Axial_Marker33_FS904_2013 Axial FS904
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021 Piccard <NA>
## GordaRidge_Vent086_SUPRS1_2019 GordaRidge Vent086
## Axial_AnemonePlume_AnemonePlume_2015 Axial AnemonePlume
## Axial_Marker113_FS903_2013 Axial FS903
## Axial_Marker113_FS906_2014 Axial FS906
## GordaRidge_Vent009_SUPRS1_2019 GordaRidge Vent009
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021 VonDamm <NA>
## GordaRidge_Vent011_SUPRS3_2019 GordaRidge Vent011
## Axial_BSW1500m_BSW1500m_2015 Axial BSW1500m
## GordaRidge_Vent106_SUPRS10_2019 GordaRidge Vent106
## GordaRidge_Vent107_SUPRS11_2019 GordaRidge Vent107
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021 Piccard <NA>
## GordaRidge_Vent010_SUPRS2_2019 GordaRidge Vent010
## GordaRidge_Vent087_SUPRS2_2019 GordaRidge Vent087
## Axial_Skadi_FS902_2013 Axial FS902
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021 VonDamm <NA>
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021 VonDamm <NA>
## DEPTH SAMPLETYPE YEAR
## GordaRidge_Vent039_SUPRS1_2019 2708.0 Vent 2019
## GordaRidge_Vent040_SUPRS2_2019 2708.0 Vent 2019
## GordaRidge_BSW081_sterivex_2019 150.0 Background 2019
## Axial_Boca_FS905_2013 1516.9 Vent 2013
## Axial_Skadi_FS910_2014 1517.0 Vent 2014
## GordaRidge_BSW056_sterivex_2019_REPb 2010.0 Background 2019
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021 2389.6 Vent 2020
## Axial_N3Area_FS898_2013 1523.0 Vent 2013
## GordaRidge_Vent088_SUPRS3_2019 2730.0 Vent 2019
## GordaRidge_Plume036_sterivex_2019_REPb 2725.0 Plume 2019
## GordaRidge_Plume096_sterivex_2019 2707.0 Plume 2019
## GordaRidge_Vent105_SUPRS9_2019 2732.0 Vent 2019
## Axial_ElGuapo_FS896_2013 1502.0 Vent 2013
## Axial_Marker33_FS908_2014 1514.0 Vent 2014
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021 2388.9 Vent 2020
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021 1979.0 Plume 2020
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021 2375.8 Vent 2020
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021 2309.0 Vent 2020
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 4945.0 Vent 2020
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021 4967.0 Vent 2020
## GordaRidge_Vent041_SUPRS3_2019 2708.0 Vent 2019
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021 2376.0 Vent 2020
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021 2377.0 Vent 2020
## Axial_Anemone_FS891_2013 1542.0 Vent 2013
## Axial_Marker113_FS915_2015 1520.0 Vent 2015
## Axial_Marker33_FS904_2013 1516.0 Vent 2013
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021 4776.0 Background 2020
## GordaRidge_Vent086_SUPRS1_2019 2730.0 Vent 2019
## Axial_AnemonePlume_AnemonePlume_2015 1500.0 Plume 2015
## Axial_Marker113_FS903_2013 1520.3 Vent 2013
## Axial_Marker113_FS906_2014 1518.0 Vent 2014
## GordaRidge_Vent009_SUPRS1_2019 2707.0 Vent 2019
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021 2400.0 Background 2020
## GordaRidge_Vent011_SUPRS3_2019 2707.0 Vent 2019
## Axial_BSW1500m_BSW1500m_2015 1520.0 Background 2015
## GordaRidge_Vent106_SUPRS10_2019 2732.0 Vent 2019
## GordaRidge_Vent107_SUPRS11_2019 2732.0 Vent 2019
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021 4944.0 Plume 2020
## GordaRidge_Vent010_SUPRS2_2019 2707.0 Vent 2019
## GordaRidge_Vent087_SUPRS2_2019 2730.0 Vent 2019
## Axial_Skadi_FS902_2013 1561.6 Vent 2013
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021 2307.0 Vent 2020
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021 2374.0 Vent 2020
## TEMP pH
## GordaRidge_Vent039_SUPRS1_2019 11.000 6.40
## GordaRidge_Vent040_SUPRS2_2019 11.000 6.40
## GordaRidge_BSW081_sterivex_2019 8.600 NA
## Axial_Boca_FS905_2013 6.800 6.88
## Axial_Skadi_FS910_2014 6.600 5.84
## GordaRidge_BSW056_sterivex_2019_REPb 1.800 7.80
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021 94.000 5.81
## Axial_N3Area_FS898_2013 18.900 4.96
## GordaRidge_Vent088_SUPRS3_2019 79.000 5.50
## GordaRidge_Plume036_sterivex_2019_REPb 1.700 NA
## GordaRidge_Plume096_sterivex_2019 1.800 NA
## GordaRidge_Vent105_SUPRS9_2019 72.000 NA
## Axial_ElGuapo_FS896_2013 25.700 5.42
## Axial_Marker33_FS908_2014 18.500 5.60
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021 98.200 5.81
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021 4.208 NA
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021 121.600 5.69
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021 137.000 5.69
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 85.000 5.11
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021 36.000 5.92
## GordaRidge_Vent041_SUPRS3_2019 11.000 6.40
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021 21.000 7.72
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021 48.000 6.99
## Axial_Anemone_FS891_2013 28.200 5.50
## Axial_Marker113_FS915_2015 25.400 6.60
## Axial_Marker33_FS904_2013 27.300 5.50
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021 4.460 NA
## GordaRidge_Vent086_SUPRS1_2019 79.000 5.50
## Axial_AnemonePlume_AnemonePlume_2015 NA NA
## Axial_Marker113_FS903_2013 24.800 6.17
## Axial_Marker113_FS906_2014 24.300 5.80
## GordaRidge_Vent009_SUPRS1_2019 40.000 6.00
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021 4.181 NA
## GordaRidge_Vent011_SUPRS3_2019 40.000 6.00
## Axial_BSW1500m_BSW1500m_2015 2.000 7.80
## GordaRidge_Vent106_SUPRS10_2019 72.000 NA
## GordaRidge_Vent107_SUPRS11_2019 72.000 NA
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021 4.460 NA
## GordaRidge_Vent010_SUPRS2_2019 40.000 6.00
## GordaRidge_Vent087_SUPRS2_2019 79.000 5.50
## Axial_Skadi_FS902_2013 35.600 6.23
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021 108.000 5.49
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021 108.000 5.63
## PercSeawater Mg
## GordaRidge_Vent039_SUPRS1_2019 97.0 52.1
## GordaRidge_Vent040_SUPRS2_2019 97.0 52.1
## GordaRidge_BSW081_sterivex_2019 100.0 53.0
## Axial_Boca_FS905_2013 98.2 53.0
## Axial_Skadi_FS910_2014 97.4 52.6
## GordaRidge_BSW056_sterivex_2019_REPb 100.0 53.0
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021 33.4 18.0
## Axial_N3Area_FS898_2013 98.3 53.0
## GordaRidge_Vent088_SUPRS3_2019 88.0 36.5
## GordaRidge_Plume036_sterivex_2019_REPb 100.0 NA
## GordaRidge_Plume096_sterivex_2019 100.0 NA
## GordaRidge_Vent105_SUPRS9_2019 98.0 52.0
## Axial_ElGuapo_FS896_2013 92.3 49.8
## Axial_Marker33_FS908_2014 91.3 49.2
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021 33.4 18.0
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021 NA NA
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021 25.6 13.7
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021 33.8 18.1
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 81.7 43.9
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021 94.9 51.0
## GordaRidge_Vent041_SUPRS3_2019 97.0 52.1
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021 96.4 51.8
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021 52.0 28.0
## Axial_Anemone_FS891_2013 88.9 47.6
## Axial_Marker113_FS915_2015 95.8 51.4
## Axial_Marker33_FS904_2013 87.1 47.0
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021 100.0 53.7
## GordaRidge_Vent086_SUPRS1_2019 88.0 36.5
## Axial_AnemonePlume_AnemonePlume_2015 100.0 NA
## Axial_Marker113_FS903_2013 96.0 51.5
## Axial_Marker113_FS906_2014 96.4 50.0
## GordaRidge_Vent009_SUPRS1_2019 83.0 43.6
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021 100.0 NA
## GordaRidge_Vent011_SUPRS3_2019 83.0 43.6
## Axial_BSW1500m_BSW1500m_2015 100.0 53.6
## GordaRidge_Vent106_SUPRS10_2019 98.0 52.0
## GordaRidge_Vent107_SUPRS11_2019 98.0 52.0
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021 NA NA
## GordaRidge_Vent010_SUPRS2_2019 83.0 43.6
## GordaRidge_Vent087_SUPRS2_2019 88.0 36.5
## Axial_Skadi_FS902_2013 90.4 48.7
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021 16.6 8.9
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021 36.2 19.4
## H2
## GordaRidge_Vent039_SUPRS1_2019 NA
## GordaRidge_Vent040_SUPRS2_2019 NA
## GordaRidge_BSW081_sterivex_2019 NA
## Axial_Boca_FS905_2013 2.763720e+00
## Axial_Skadi_FS910_2014 4.697134e-02
## GordaRidge_BSW056_sterivex_2019_REPb NA
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021 1.020000e+04
## Axial_N3Area_FS898_2013 5.340191e-02
## GordaRidge_Vent088_SUPRS3_2019 2.190000e+01
## GordaRidge_Plume036_sterivex_2019_REPb NA
## GordaRidge_Plume096_sterivex_2019 NA
## GordaRidge_Vent105_SUPRS9_2019 NA
## Axial_ElGuapo_FS896_2013 2.992534e-01
## Axial_Marker33_FS908_2014 1.500000e+00
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021 1.020000e+04
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021 NA
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021 1.160000e+04
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021 1.150000e+04
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 0.000000e+00
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021 2.270000e+04
## GordaRidge_Vent041_SUPRS3_2019 NA
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021 5.527440e+00
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021 NA
## Axial_Anemone_FS891_2013 1.390000e+01
## Axial_Marker113_FS915_2015 3.000000e-01
## Axial_Marker33_FS904_2013 1.500000e+00
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021 NA
## GordaRidge_Vent086_SUPRS1_2019 2.190000e+01
## Axial_AnemonePlume_AnemonePlume_2015 NA
## Axial_Marker113_FS903_2013 1.400000e+00
## Axial_Marker113_FS906_2014 1.000000e+00
## GordaRidge_Vent009_SUPRS1_2019 1.270000e+02
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021 NA
## GordaRidge_Vent011_SUPRS3_2019 1.270000e+02
## Axial_BSW1500m_BSW1500m_2015 2.000000e-03
## GordaRidge_Vent106_SUPRS10_2019 NA
## GordaRidge_Vent107_SUPRS11_2019 NA
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021 NA
## GordaRidge_Vent010_SUPRS2_2019 1.270000e+02
## GordaRidge_Vent087_SUPRS2_2019 2.190000e+01
## Axial_Skadi_FS902_2013 1.023600e+00
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021 1.450000e+04
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021 9.800000e+03
## H2S
## GordaRidge_Vent039_SUPRS1_2019 NA
## GordaRidge_Vent040_SUPRS2_2019 NA
## GordaRidge_BSW081_sterivex_2019 NA
## Axial_Boca_FS905_2013 0.007664609
## Axial_Skadi_FS910_2014 0.035242393
## GordaRidge_BSW056_sterivex_2019_REPb NA
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021 1.370000000
## Axial_N3Area_FS898_2013 0.512918204
## GordaRidge_Vent088_SUPRS3_2019 NA
## GordaRidge_Plume036_sterivex_2019_REPb NA
## GordaRidge_Plume096_sterivex_2019 NA
## GordaRidge_Vent105_SUPRS9_2019 NA
## Axial_ElGuapo_FS896_2013 0.218077087
## Axial_Marker33_FS908_2014 0.267671400
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021 1.370000000
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021 NA
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021 1.770000000
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021 1.740000000
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 NA
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021 NA
## GordaRidge_Vent041_SUPRS3_2019 NA
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021 NA
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021 2.110000000
## Axial_Anemone_FS891_2013 1.060449600
## Axial_Marker113_FS915_2015 0.591845520
## Axial_Marker33_FS904_2013 0.557862000
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021 NA
## GordaRidge_Vent086_SUPRS1_2019 NA
## Axial_AnemonePlume_AnemonePlume_2015 NA
## Axial_Marker113_FS903_2013 0.746204400
## Axial_Marker113_FS906_2014 0.569121600
## GordaRidge_Vent009_SUPRS1_2019 1.010000000
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021 NA
## GordaRidge_Vent011_SUPRS3_2019 1.010000000
## Axial_BSW1500m_BSW1500m_2015 0.000000000
## GordaRidge_Vent106_SUPRS10_2019 NA
## GordaRidge_Vent107_SUPRS11_2019 NA
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021 NA
## GordaRidge_Vent010_SUPRS2_2019 1.010000000
## GordaRidge_Vent087_SUPRS2_2019 NA
## Axial_Skadi_FS902_2013 0.086783554
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021 1.960000000
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021 1.790000000
## CH4 ProkConc
## GordaRidge_Vent039_SUPRS1_2019 0.9000000 111192.50
## GordaRidge_Vent040_SUPRS2_2019 0.9000000 111192.50
## GordaRidge_BSW081_sterivex_2019 NA NA
## Axial_Boca_FS905_2013 1.6377600 850000.00
## Axial_Skadi_FS910_2014 2.1191968 NA
## GordaRidge_BSW056_sterivex_2019_REPb NA 39100.00
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021 1893.6600000 NA
## Axial_N3Area_FS898_2013 65.5214790 257000.00
## GordaRidge_Vent088_SUPRS3_2019 23.7000000 55076.66
## GordaRidge_Plume036_sterivex_2019_REPb NA 76900.00
## GordaRidge_Plume096_sterivex_2019 NA NA
## GordaRidge_Vent105_SUPRS9_2019 NA 52998.29
## Axial_ElGuapo_FS896_2013 2.8743807 168000.00
## Axial_Marker33_FS908_2014 6.5510400 390000.00
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021 1893.6600000 NA
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021 NA 16478.31
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021 1985.7840000 NA
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021 1893.6600000 10369.79
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 27.5348400 238585.68
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021 11.4643200 53878.14
## GordaRidge_Vent041_SUPRS3_2019 0.9000000 111192.50
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021 218.0268000 NA
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021 1310.2080000 111429.78
## Axial_Anemone_FS891_2013 15.1492800 410000.00
## Axial_Marker113_FS915_2015 22.8262800 1500000.00
## Axial_Marker33_FS904_2013 19.4484000 420000.00
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021 NA 11860.19
## GordaRidge_Vent086_SUPRS1_2019 23.7000000 55076.66
## Axial_AnemonePlume_AnemonePlume_2015 NA NA
## Axial_Marker113_FS903_2013 17.2988400 460000.00
## Axial_Marker113_FS906_2014 38.8968000 680000.00
## GordaRidge_Vent009_SUPRS1_2019 10.1000000 51439.52
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021 NA 34705.92
## GordaRidge_Vent011_SUPRS3_2019 10.1000000 51439.52
## Axial_BSW1500m_BSW1500m_2015 0.0020472 25000.00
## GordaRidge_Vent106_SUPRS10_2019 NA 52998.29
## GordaRidge_Vent107_SUPRS11_2019 NA 52998.29
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021 NA 51429.13
## GordaRidge_Vent010_SUPRS2_2019 10.1000000 51439.52
## GordaRidge_Vent087_SUPRS2_2019 23.7000000 55076.66
## Axial_Skadi_FS902_2013 4.4884860 565000.00
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021 2251.9200000 NA
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021 1819.9608000 56677.00
## Sample_or_Control
## GordaRidge_Vent039_SUPRS1_2019 Sample
## GordaRidge_Vent040_SUPRS2_2019 Sample
## GordaRidge_BSW081_sterivex_2019 Sample
## Axial_Boca_FS905_2013 Sample
## Axial_Skadi_FS910_2014 Sample
## GordaRidge_BSW056_sterivex_2019_REPb Sample
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021 Sample
## Axial_N3Area_FS898_2013 Sample
## GordaRidge_Vent088_SUPRS3_2019 Sample
## GordaRidge_Plume036_sterivex_2019_REPb Sample
## GordaRidge_Plume096_sterivex_2019 Sample
## GordaRidge_Vent105_SUPRS9_2019 Sample
## Axial_ElGuapo_FS896_2013 Sample
## Axial_Marker33_FS908_2014 Sample
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021 Sample
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021 Sample
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021 Sample
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021 Sample
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 Sample
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021 Sample
## GordaRidge_Vent041_SUPRS3_2019 Sample
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021 Sample
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021 Sample
## Axial_Anemone_FS891_2013 Sample
## Axial_Marker113_FS915_2015 Sample
## Axial_Marker33_FS904_2013 Sample
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021 Sample
## GordaRidge_Vent086_SUPRS1_2019 Sample
## Axial_AnemonePlume_AnemonePlume_2015 Sample
## Axial_Marker113_FS903_2013 Sample
## Axial_Marker113_FS906_2014 Sample
## GordaRidge_Vent009_SUPRS1_2019 Sample
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021 Sample
## GordaRidge_Vent011_SUPRS3_2019 Sample
## Axial_BSW1500m_BSW1500m_2015 Sample
## GordaRidge_Vent106_SUPRS10_2019 Sample
## GordaRidge_Vent107_SUPRS11_2019 Sample
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021 Sample
## GordaRidge_Vent010_SUPRS2_2019 Sample
## GordaRidge_Vent087_SUPRS2_2019 Sample
## Axial_Skadi_FS902_2013 Sample
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021 Sample
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021 Sample
# CLR-transformed data, transpose and create euclidean dist matrix
<- vegdist(t(count_table_clr), method = "euclidean")
dist_euc # ?vegdist()
# ?betadisper
# dist_jac <- vegdist(t(count_table_notrans), method = "jaccard")
# ?betadisper()
<- betadisper(dist_euc, sample_data(physeq_insitu)$SITE)
beta_SITE <- betadisper(dist_euc, sample_data(physeq_insitu)$SAMPLETYPE)
beta_SAMPLETYPE <- betadisper(dist_euc, sample_data(physeq_insitu)$TYPE_SITE) beta_TYPE_SITE
SITE
anova(beta_SITE)
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 3 112.43 37.475 2.7555 0.05527 .
## Residuals 39 530.41 13.600
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(beta_SITE)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## GordaRidge-Axial -1.4140515 -5.1091060 2.281003 0.7348370
## Piccard-Axial 0.5897262 -5.0684479 6.247900 0.9922407
## VonDamm-Axial 2.8385758 -1.3238428 7.000994 0.2750595
## Piccard-GordaRidge 2.0037777 -3.5281759 7.535731 0.7660453
## VonDamm-GordaRidge 4.2526273 0.2634789 8.241776 0.0328264
## VonDamm-Piccard 2.2488496 -3.6056198 8.103319 0.7326007
plot(beta_SITE)
# View(beta_SITE$vectors) # to confirm sample that is an outlier
Sampletype
anova(beta_SAMPLETYPE)
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 2 15.19 7.5974 0.4238 0.6574
## Residuals 40 716.99 17.9247
TukeyHSD(beta_SAMPLETYPE)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## Plume-Background 1.5609149 -4.956307 8.078137 0.8299904
## Vent-Background 1.8695977 -3.075585 6.814781 0.6308843
## Vent-Plume 0.3086828 -4.636500 5.253866 0.9873595
plot(beta_SAMPLETYPE)
# View(beta_SAMPLETYPE$vectors)
Based on dispersion testing, it is clear that Boca (Axial, snowblower vent) is an outlier. Secondary to Boca, however are the plume and CTD (Background samples).
Repeat dispersion test with removed samples, Boca and background.
<- compositions::clr(otu_table(subset_samples(physeq_insitu, VENT != "Boca")))
count_table_clr_noboca
<- compositions::clr(otu_table(subset_samples(physeq_insitu, (VENT != "Boca" & SAMPLETYPE == "Vent"))))
count_table_clr_nobocaBSW # names(count_table_clr_nobocaBSW)
# CLR-transformed data, transpose and create euclidean dist matrix
<- vegdist(t(count_table_clr_noboca), method = "euclidean")
dist_euc_noboca
<- vegdist(t(count_table_clr_nobocaBSW), method = "euclidean")
dist_euc_nobocaBSW # ?vegdist()
Re-check dispersion without Boca sample.
<- betadisper(dist_euc_noboca, sample_data(subset_samples(physeq_insitu, VENT != "Boca"))$SITE)
beta_SITE anova(beta_SITE)
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 3 142.05 47.351 5.0841 0.004668 **
## Residuals 38 353.92 9.314
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(beta_SITE)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## GordaRidge-Axial -0.01488429 -3.1457852 3.116017 0.9999992
## Piccard-Axial 2.04455322 -2.6889240 6.778030 0.6550081
## VonDamm-Axial 4.33568392 0.8252432 7.846125 0.0103625
## Piccard-GordaRidge 2.05943751 -2.5237321 6.642607 0.6261842
## VonDamm-GordaRidge 4.35056821 1.0455976 7.655539 0.0057447
## VonDamm-Piccard 2.29113069 -2.5592402 7.141502 0.5878419
plot(beta_SITE)
# View(beta_SITE$vectors) # to confirm samples that are outliers
Check dispersion without Boca or background samples.
<- betadisper(dist_euc_nobocaBSW, sample_data(subset_samples(physeq_insitu, (VENT != "Boca" & SAMPLETYPE == "Vent")))$SITE)
beta_SITE anova(beta_SITE)
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 3 51.705 17.2351 2.1788 0.1128
## Residuals 28 221.495 7.9105
TukeyHSD(beta_SITE)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## GordaRidge-Axial 0.1538572 -3.1341755 3.441890 0.9992363
## Piccard-Axial -0.5993290 -6.5476041 5.348946 0.9925605
## VonDamm-Axial 2.9344569 -0.7081027 6.577017 0.1480828
## Piccard-GordaRidge -0.7531862 -6.6182645 5.111892 0.9848926
## VonDamm-GordaRidge 2.7805997 -0.7244550 6.285654 0.1576322
## VonDamm-Piccard 3.5337860 -2.5371468 9.604719 0.4007670
plot(beta_SITE)
# View(beta_SITE$vectors) # to confirm sample that is an outlier
Legendre and Anderson (1999), RDA that looks for linear relationships on dissimilarity matrices. Constrained ordination with non-Euclidean distance measures. (Legendre P, Anderson MJ (1999) Distance-based redundancy analysis: testing multispecies responses in multifactorial ecological experiments. Ecol Monogr. 69:1–24.)
Below dbrda()
statement inputs all samples as a distance
matrix (Euclidean), with the RH side variables as numeric data following
the ~.
With various outliers depending on the question posed, below function
takes subseted data and runs dbrda()
analysis.
dbrda() analysis input different subsets of phyloseq object, perform Jaccard dissimilarity.
<- function(subset_df){
dbrda_subset # category <- enquo(category)
# subset_df <- subset_samples(physeq_insitu, category %in% selection)
<- compositions::clr(otu_table(subset_df))
clr_out <- vegdist(t(clr_out), method = "euclidean") #CLR with Euclidean is aitchison
euc_out # jac_out <- vegdist(t(otu_table(subset_df)), method = "jaccard")
# Params
# ?vegdist
<- sample_data(subset_df)$TEMP
temp <- sample_data(subset_df)$pH
ph <- sample_data(subset_df)$PercSeawater
persea <- sample_data(subset_df)$Mg
mg <- sample_data(subset_df)$H2
h2 <- sample_data(subset_df)$H2S
h2s <- sample_data(subset_df)$CH4
ch4 <- sample_data(subset_df)$ProkConc
prok <- sample_data(subset_df)$DEPTH
depth # DBRDA
<- dbrda(euc_out ~ temp + persea + prok + h2s + mg + h2 + ch4 + depth + ph, dist = vegdist, na.action = na.omit)
rda_out # Overall significance
# anova(rda_out)
# Print results by margin
anova(rda_out, by = 'margin')
# Plot
# xy <- anova(rda_out, by = 'axis')
# x <- round(xy$Variance[1], 1); y <- round(xy$Variance[2], 1)
# plot(rda_out, xlab = x, ylab = y)
}
Usage of function, input phyloseq object with all data, it will be transformed (CLR) and dist matrix will be estimates (Euclidean). Parameters from the metadata will be formatted and dbrda() will run on all.
CLR transformed data with Euclidean distance estimate is Aitchison’s
Removed Boca outlier and isolate vent-only sites, test for significant parameters.
dbrda_subset(subset_samples(physeq_insitu, (VENT != "Boca" & SAMPLETYPE == "Vent")))
## Permutation test for dbrda under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 999
##
## Model: dbrda(formula = euc_out ~ temp + persea + prok + h2s + mg + h2 + ch4 + depth + ph, distance = vegdist, na.action = na.omit)
## Df Variance F Pr(>F)
## temp 1 21.341 0.9219 0.594
## persea 1 21.864 0.9446 0.575
## prok 1 24.184 1.0448 0.443
## h2s 1 23.247 1.0043 0.476
## mg 1 21.461 0.9271 0.598
## h2 1 23.585 1.0189 0.442
## ch4 1 26.973 1.1653 0.254
## depth 1 24.958 1.0782 0.378
## ph 1 25.775 1.1135 0.338
## Residual 4 92.590
Isolate subsets of data frame by ASV or taxonomic ID.
<- filter(insitu_asv_wClass,
noNAs != "Bacteria_X" |
(Supergroup !is.na(Supergroup) |
!= "Terrabacteria" |
Supergroup != "Archaea_X" |
Supergroup != "Opisthokonta"))
Supergroup <- as.character(noNAs$FeatureID)
list_of_noNAs # length(list_of_noNAs)
<- prune_taxa(list_of_noNAs, physeq_insitu)
noNAs_phy
###
<- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
cosmo
<- filter(insitu_asv_wClass, CLASS == "Vent only")
res <- filter(insitu_asv_wClass, CLASS %in% cosmo)
cos
<- as.character(unique(res$FeatureID))
list_of_resident <- as.character(unique(cos$FeatureID))
list_of_cos # length(list_of_resident); length(list_of_cos)
<- prune_taxa(list_of_cos, physeq_insitu)
cosmo_only <- prune_taxa(list_of_resident, physeq_insitu)
res_vent
###
# sample_data(insitu_asv_wClass)
<- filter(insitu_asv_wClass, Supergroup == "Alveolata")
alvs <- as.character(alvs$FeatureID)
list_of_alvs <- prune_taxa(list_of_alvs, physeq_insitu)
alvs_phy
##
# sample_data(insitu_asv_wClass)
<- filter(insitu_asv_wClass, (Phylum == "Dinoflagellata" | Class == "Syndiniales"))
dino_syn <- as.character(dino_syn$FeatureID)
list_of_dino_syn <- prune_taxa(list_of_dino_syn, physeq_insitu)
dino_syn_phy ##
# sample_data(insitu_asv_wClass)
<- filter(insitu_asv_wClass, Phylum == "Ciliophora")
cili <- as.character(cili$FeatureID)
list_of_cili <- prune_taxa(list_of_cili, physeq_insitu)
cili_phy
##
# sample_data(insitu_asv_wClass)
<- filter(insitu_asv_wClass, (Supergroup == "Rhizaria"))
rhiz <- as.character(rhiz$FeatureID)
list_of_rhiz <- prune_taxa(list_of_rhiz, physeq_insitu)
rhiz_phy
##
# sample_data(insitu_asv_wClass)
<- filter(insitu_asv_wClass, (Supergroup == "Stramenopiles"))
stram <- as.character(stram$FeatureID)
list_of_stram <- prune_taxa(list_of_stram, physeq_insitu) stram_phy
set.seed(1245)
Test with data set, vents only. Populate Table 2.
# All samples, without Boca as an outlier
<- function(df, inputdata){
dbrda_output <- dbrda_subset(subset_samples(df, (VENT != "Boca")))
allsamples <- data.frame(variables = row.names(allsamples), sig = allsamples$`Pr(>F)`, samples = "All samples") %>% drop_na()
allsamples_df
# vent samples only, Boca removed
<- dbrda_subset(subset_samples(df, (SAMPLETYPE == "Vent" & VENT != "Boca")))
ventonly <- data.frame(variables = row.names(ventonly), sig = ventonly$`Pr(>F)`, samples = "Vent only") %>% drop_na()
ventonly_df
# MCR: Piccard and Von Damm, vents only
<- dbrda_subset(subset_samples(df, (SITE == "VonDamm" | SITE == "Piccard" | SAMPLETYPE == "Vent")))
mcr <- data.frame(variables = row.names(mcr), sig = mcr$`Pr(>F)`, samples = "MCR") %>% drop_na()
mcr_df
# GR and Axial
<- dbrda_subset(subset_samples(df, (SITE == "GordaRidge" | SITE == "Axial" & VENT != "Boca" | SAMPLETYPE == "Vent")))
NEpac <- data.frame(variables = row.names(NEpac), sig = NEpac$`Pr(>F)`, samples = "NE Pacific") %>% drop_na()
NEpac_df
# Von Damm and then Piccard only
<- dbrda_subset(subset_samples(df, (SITE == "VonDamm" | SAMPLETYPE == "Vent")))
vd <- data.frame(variables = row.names(vd), sig = vd$`Pr(>F)`, samples = "Von Damm") %>% drop_na()
vd_df
<- dbrda_subset(subset_samples(df, (SITE == "Piccard" | SAMPLETYPE == "Vent")))
picc <- data.frame(variables = row.names(picc), sig = picc$`Pr(>F)`, samples = "Piccard") %>% drop_na()
picc_df
# Axial and then Gorda only
<- dbrda_subset(subset_samples(df, (VENT != "Boca" & SITE == "Axial" | SAMPLETYPE == "Vent")))
axial <- data.frame(variables = row.names(axial), sig = axial$`Pr(>F)`, samples = "Axial Seamount") %>% drop_na()
axial_df
<- dbrda_subset(subset_samples(df, (SITE == "GordaRidge" | SAMPLETYPE == "Vent"))); gr
gr <- data.frame(variables = row.names(gr), sig = gr$`Pr(>F)`, samples = "Gorda Ridge") %>% drop_na()
gr_df
<- bind_rows(
table2_output %>%
allsamples_df, ventonly_df, mcr_df, NEpac_df, vd_df, picc_df, axial_df, gr_df) add_column(Taxa = inputdata)
return(table2_output)
}
<- bind_rows(
table2_stats dbrda_output(physeq_insitu, "All taxa"),
dbrda_output(noNAs_phy, "No NAs, no metazoa"),
dbrda_output(res_vent, "Resident only"),
dbrda_output(cosmo_only, "Cosmopolitan only"),
dbrda_output(alvs_phy, "Alveolates (all)"),
dbrda_output(dino_syn_phy, "Dinoflagelaltes & Syndiniales"),
dbrda_output(cili_phy, "Ciliates"),
dbrda_output(rhiz_phy, "Rhizaria"),
dbrda_output(stram_phy, "Stramenopiles"),
)# View(table2_stats)
# head(table2_stats)
# unique(table2_stats$Taxa)
# range(table2_output$sig)
<- c("#f7f7f7","#fee5d9", "#fcae91", "#fb6a4a", "#a50f15")
reds
# Factor variables
# rm <- c("Von Damm", "Piccard", "Axial Seamount", "Gorda Ridge")
$SAMPLES <- factor(table2_stats$samples, levels = c("All samples", "Vent only", "MCR", "NE Pacific", "Von Damm", "Piccard", "Axial Seamount", "Gorda Ridge"), labels = c("All samples", "Vent only", "Mid-Cayman Rise (vents)", "NE Pacific (vents)", "Von Damm", "Piccard", "Axial Seamount", "Gorda Ridge"))
table2_stats
<- c("All taxa", "No NAs, no metazoa", "Resident only", "Cosmopolitan only", "Alveolates (all)", "Dinoflagelaltes & Syndiniales", "Ciliates", "Rhizaria", "Stramenopiles")
order_taxa $TAXA <- factor(table2_stats$Taxa, levels = rev(order_taxa))
table2_stats
$VARIABLES <- factor(table2_stats$variables,
table2_statslevels = c("depth", "temp", "prok", "ph", "persea", "mg", "h2", "h2s", "ch4"),
labels = c("Depth (m)", "Temperature (C)", "Cells ml^-1", "pH", "Seawater %", "Mg", "H2", "H2S", "CH4"))
# svg("dbrda-outcome-heatmap.svg", h = 4, w = 14)
%>%
table2_stats # filter(!(samples %in% rm)) %>%
ggplot(aes(x = VARIABLES, y = TAXA, fill = sig)) +
geom_tile(stat = "identity", color = "white") +
facet_grid(. ~ SAMPLES) +
scale_fill_stepsn(
limits = c(0,0.25),
breaks = c(0.01, 0.05, 0.1),
show.limits = TRUE,
colors = rev(reds)) +
theme_bw() +
scale_x_discrete(position = "top") +
theme(axis.text.x = element_text(angle = 90, vjust = 0, hjust = 0, color = "black"),
strip.background = element_blank(),
panel.border = element_blank(),
axis.text.y = element_text(color = "black"),
axis.ticks = element_blank(),
panel.grid = element_blank(),
strip.placement = "outside") +
labs(x = "Parameters by dataset", y = "Taxa")
# dev.off()
<- c("Axial")
axial <- c("VonDamm", "Piccard")
mcr <- c("GordaRidge")
gr <- c("Axial", "VonDamm", "Piccard", "GordaRidge") all
Create a bar plot showing the relative sequence abundance of 18S results to the Supergroup and Phylum level. Function averages across replicates and then sums to the phylum and supergroup level. Bar plot shows the relative sequence abundance.
<- function(df, selection){
make_bar_relabun <- df %>%
df_out filter(SITE %in% selection) %>%
filter(Domain == "Eukaryota") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species,
%>%
VENT, SITE, SAMPLETYPE, YEAR, DATASET) summarise(SEQ_AVG_REP = mean(value)) %>%
ungroup()
<- df_out %>%
supergroup group_by(SITE, SAMPLETYPE, VENT, YEAR, Supergroup) %>%
summarise(SEQ_SUM = sum(SEQ_AVG_REP)) %>%
ggplot(aes(x = VENT, y = SEQ_SUM, fill = Supergroup)) +
geom_bar(stat = "identity", position = "fill", color = "black", width = 0.9) +
facet_grid(. ~ SITE +YEAR + SAMPLETYPE, scale = "free", space = "free") +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black"),
legend.position = "right") +
scale_y_continuous(expand = c(0,0)) +
# scale_fill_brewer(palette = "Set2") +
scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")) +
labs(x = "", y = "Relative abundance")
<- df_out %>%
phylum unite(SupergroupPhylum, Supergroup, Phylum, sep = "-") %>%
group_by(SITE, SAMPLETYPE, VENT, YEAR, SupergroupPhylum) %>%
summarise(SEQ_SUM = sum(SEQ_AVG_REP)) %>%
ggplot(aes(x = VENT, y = SEQ_SUM, fill = SupergroupPhylum)) +
geom_bar(stat = "identity", position = "fill", color = "black", width = 0.9) +
facet_grid(. ~ SITE +YEAR + SAMPLETYPE, scale = "free", space = "free") +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black"),
legend.position = "right") +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = c("#f1eef6", "#d7b5d8", "#df65b0", "#ce1256", "#fc9272", "#ef3b2c",
"#800026", "#fff7bc", "#fec44f", "#d95f0e", "#a63603", "#74c476", "#238b45",
"#00441b", "#7fcdbb", "#084081", "#c6dbef", "#2b8cbe", "#016c59", "#bcbddc",
"#807dba", "#54278f", "#bdbdbd", "black", "white", "#969696", "#525252", "#f1eef6", "#d7b5d8", "#df65b0", "#ce1256", "#fc9272", "#ef3b2c",
"#800026", "#fff7bc", "#fec44f", "#d95f0e", "#a63603", "#74c476", "#238b45",
"#00441b", "#7fcdbb", "#084081", "#c6dbef", "#2b8cbe", "#016c59", "#bcbddc",
"#807dba", "#54278f", "#bdbdbd", "black", "white")) +
labs(x = "", y = "Relative abundance")
+ phylum + patchwork::plot_layout(ncol = 1)
supergroup
}
# make_bar_relabun(insitu_asv_wClass, axial)
Relative abundance plots are misleading, as this tag-sequence data is compositional. To combat this, we can also perform a center log-ratio transformation of the sequence counts. This tile plot (or heat map) will show the relationship from the data mean. Positive values thus demonstrate an increase in the taxa, while negative values illustrate the opposite.
Ahead of the CLR transformation, average across replicates, then sum to the Class level. THEN perform CLR transformation and plot as heat map.
<- function(df, selection){
make_clr_trans_tile <- df %>%
df_wide filter(SITE %in% selection) %>%
# df_wide <- insitu_asv_wClass %>%
# filter(SITE %in% axial) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
# Sum to the Order taxonomic classification
unite(SAMPLENAME_2, SAMPLENAME, VENT, sep = "_") %>%
group_by(SAMPLENAME_2, Supergroup, Phylum, Class) %>%
summarise(CLASS_SUM = sum(AVG)) %>%
unite(CLASS, Supergroup, Phylum, Class, sep = " ") %>%
select(CLASS, SAMPLENAME_2, CLASS_SUM) %>%
pivot_wider(names_from = SAMPLENAME_2, values_from = CLASS_SUM, values_fill = 0) %>%
column_to_rownames(var = "CLASS")
## Take wide data frame and CLR transform, pivot to wide, and plot
data.frame(compositions::clr(df_wide)) %>%
rownames_to_column(var = "CLASS") %>%
pivot_longer(cols = starts_with(selection), values_to = "CLR", names_to = "SAMPLENAME_2") %>%
separate(SAMPLENAME_2, c("SAMPLENAME", "VENT"), sep = "_") %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(VENT = str_replace_all(VENT, "\\.", " ")) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE # SITE == "Piccard" ~ "Piccard",
# SITE == "VonDamm" ~ "Von Damm",
== "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>%
separate(CLASS, c("Supergroup", "Phylum", "Class"), sep = " ", remove = FALSE) %>%
ggplot(aes(x = SAMPLE, y = Class, fill = CLR)) +
geom_tile(color = "#252525") +
theme(legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, color = "black",size = 8),
axis.text.y = element_text(color = "black", size = 8),
strip.background = element_blank(),
strip.text.y = element_text(hjust = 0, vjust = 0.5, angle = 0),
# strip.text.x = element_blank(),
legend.title = element_blank()) +
labs(x = "", y = "") +
scale_fill_gradient2(low = "#4575b4", mid = "white", high = "#d73027", na.value = "grey50") +
facet_grid(Supergroup + Phylum ~ SAMPLETYPE, space = "free", scales = "free")
}
Similar to aove, the first step in this function transforms data using CLR (to ASV level though). First plot will show eigen values (scree plot to determine if 2 vs. 3 dimensions is best for data). Then function extracts data points and creates PCA plot.
<- function(df, selection){
make_pca <- df %>%
df_wide_asv # df_wide_asv <- insitu_asv_wClass %>%
filter(SITE %in% selection) %>%
# filter(SITE %in% axial) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, SAMPLETYPE, REGION, VENTNAME, sep = "_", remove = FALSE) %>%
group_by(FeatureID, SAMPLE) %>%
summarise(SUM = sum(AVG)) %>%
pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>%
column_to_rownames(var = "SAMPLE")
# look at eigenvalues
<- prcomp(data.frame(compositions::clr(df_wide_asv)))
pca_lr <- (pca_lr$sdev^2)/sum(pca_lr$sdev^2)
variance_lr ## View bar plot
barplot(variance_lr, main = "Log-Ratio PCA Screeplot", xlab = "PC Axis", ylab = "% Variance",
cex.names = 1.5, cex.axis = 1.5, cex.lab = 1.5, cex.main = 1.5)
## Extract PCR points
data.frame(pca_lr$x, SAMPLE = rownames(pca_lr$x)) %>%
separate(SAMPLE, c("SAMPLETYPE", "REGION", "VENTNAME"), sep = "_", remove = FALSE) %>%
## Generate PCA plot
ggplot(aes(x = PC1, y = PC2, shape = SAMPLETYPE, fill = VENTNAME)) +
geom_hline(yintercept = 0) + geom_vline(xintercept = 0, color = "#525252") +
geom_point(size=4, stroke = 1, aes(fill = VENTNAME)) +
ylab(paste0('PC2 ',round(variance_lr[2]*100,2),'%')) +
xlab(paste0('PC1 ',round(variance_lr[1]*100,2),'%')) +
scale_shape_manual(values = c(21, 23, 24)) +
scale_fill_viridis(discrete = TRUE, option = "turbo") +
# scale_fill_manual(values = fill_color) +
# scale_color_manual(values = color_color) +
theme_bw() +
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 14),
plot.margin = margin(2, 1, 2, 1, "cm")) +
guides(fill = guide_legend(override.aes = list(shape = 21) ),
shape = guide_legend(override.aes = list(fill = "black")))
}
From complete dataset, average across replicates, then sum the total number of ASVs in each sample. Then plot a data point for total number of ASVs (ASV richness) by sample type - where sample type represents the vent, plume, vs. background. Box plots show the median and range.
<- function(df, selection){
make_asv_rich %>%
df filter(SITE %in% selection) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Supergroup) %>%
summarise(AVG = mean(value)) %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>%
ungroup() %>%
group_by(SITE, REGION, SAMPLE, SAMPLETYPE) %>%
summarise(NUM_ASV = n()) %>%
ggplot(aes(x = SAMPLETYPE, y = NUM_ASV, shape = SAMPLETYPE)) +
geom_boxplot(aes(group = SAMPLETYPE), alpha = 0.8, width = 0.4) +
geom_jitter(size=2, aes(fill = SITE)) +
scale_shape_manual(values = c(21, 23, 24)) +
theme_bw() +
theme(axis.text = element_text(color="black", size=12),
legend.title = element_blank(),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 14)) +
guides(fill = guide_legend(override.aes = list(shape = 21) ),
shape = guide_legend(override.aes = list(fill = "black") ) ) +
labs(x = "", y = "Total number of ASVs")
}
Bar plot (colors correspond to Supergroup) represents the number of ASVs shared or unique to each sample. Combination matrix below bars shows which samples are considered for the bar plot.
<- function(df, selection){
make_upset_plot %>%
df filter(SITE %in% selection) %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) # Taxa to supergroup
mutate(SupergroupPhylum = SUPERGROUP) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, SupergroupPhylum) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, SITE, SAMPLETYPE, VENT, sep = " ", remove = FALSE) %>%
group_by(FeatureID, SupergroupPhylum, SAMPLE) %>%
summarise(SUM = sum(AVG)) %>%
# filter(SUM > 200) %>%
ungroup() %>%
distinct(FeatureID, SupergroupPhylum, SUM, SAMPLE, .keep_all = TRUE) %>%
group_by(FeatureID, SupergroupPhylum) %>%
summarise(SAMPLE = list(SAMPLE)) %>%
ggplot(aes(x = SAMPLE)) +
geom_bar(color = "black", width = 0.7, aes(fill = SupergroupPhylum)) +
scale_x_upset(n_intersections = 25) +
scale_y_continuous(expand = c(0,0)) +
labs(x = "", y = "Shared at ASV level") +
theme_linedraw() +
theme(axis.text.y = element_text(color="black", size=14, face = "bold"),
axis.text.x = element_text(color="black", size=14, face = "bold"),
axis.title = element_text(color="black", size=14, face = "bold"),
legend.text = element_text(color = "black", size = 12, face = "bold"),
legend.title = element_blank(),
panel.grid.minor = element_blank(),
plot.margin = margin(1, 1, 1, 5, "cm")) +
scale_fill_manual(values = all_taxa_color)
}# all_taxa_color
#values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#deebf7", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")
Filter data to reduce noise and show sample type to vent ecosystem variability.
<- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
alv <- c("Deep seawater", "BSW", "Shallow seawater")
bkgd <- c("Candelabra Plume", "Mt Edwards Plume", "Plume", "Near vent BW")
plume <- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
cosmo
##
<- function(df, site){
tree_res_v_cosmo <- df %>%
to_supergroup_vent_only filter(SITE %in% site) %>%
mutate(DISTRIBUTION = case_when(
%in% cosmo ~ "Cosmopolitan",
CLASS == "Vent only" ~ "Resident"
CLASS %>%
)) filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) mutate(SAMPLETYPEORDER = case_when(
%in% bkgd ~ "Background",
VENT %in% plume ~ "Plume",
VENT TRUE ~ "Vent"
%>%
)) filter((DISTRIBUTION == "Resident" | DISTRIBUTION == "Cosmopolitan")) %>%
group_by(FeatureID, Taxon, SUPERGROUP,
%>%
VENT, SITE, SAMPLETYPE, YEAR, DATASET, SAMPLETYPEORDER, DISTRIBUTION) summarise(SEQ_AVG_REP = mean(value)) %>%
ungroup() %>%
group_by(SITE, SUPERGROUP, DISTRIBUTION) %>%
summarise(ASV_COUNT = n(),
SEQ_SUM = sum(SEQ_AVG_REP))
##
%>%
to_supergroup_vent_only ggplot(aes(area = SEQ_SUM, fill = SUPERGROUP, subgroup = SUPERGROUP)) +
geom_treemap(color = "white") +
geom_treemap_subgroup_border(colour = "white", size = 2) +
# geom_bar(stat = "identity", position = "fill", color = "black", width = 0.9) +
facet_grid(SITE ~ DISTRIBUTION) +
theme_linedraw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
strip.background = element_blank(), strip.text = element_text(color = "black"),
legend.position = "right",
legend.title = element_blank(),
panel.border = element_blank()) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#737373", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")) +
labs(x = "", y = "Sequence proportion by Supergroup")
}
Isolate the top 15 taxa
# head(insitu_asv_wClass)
<- as.character(unique(insitu_asv_wClass$CLASS))
all_class
<- as.character(unique(insitu_asv_wClass$SITE_CLASS))
all_class_site
<- function(df, NUM, site, level, taxa, class, category, plot_tax){
top15 <- enquo(level)
level <- enquo(class)
class <- enquo(plot_tax)
plot_tax <- as.character(unique(insitu_asv_wClass$CLASS))
all_class <- as.character(unique(insitu_asv_wClass$SITE_CLASS))
all_class_site <- df %>%
out_table # filter(SITE %in% site) %>%
filter(!!level == taxa) %>%
# filter(Domain %in% "Eukaryota") %>%
# filter(CLASS %in% all_class) %>%
filter(!!class %in% category) %>%
# Average across replicates
group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species,
!!class) %>%
VENT, SITE, SAMPLETYPE, YEAR, DATASET, summarise(SEQ_AVG_REP = mean(value)) %>%
ungroup() %>%
# Select top 15 ASVs
group_by(VENT, SITE, SAMPLETYPE, YEAR, DATASET) %>%
top_n(NUM, wt = SEQ_AVG_REP) %>%
unite(SAMPLENAME, SITE, SAMPLETYPE, VENT, YEAR, sep = " ", remove = FALSE)
#
<- ggplot(out_table, aes(x = SAMPLENAME, fill = !!plot_tax)) +
plot geom_bar(stat = "count", color = "black", width = 0.7) +
coord_flip() +
facet_grid(SAMPLETYPE + SITE ~ ., space = "free", scales = "free") +
theme_classic() +
scale_fill_viridis(discrete = TRUE, option = "turbo")
##
::ggplotly(plot)
plotly
}
# Function usage:
# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
# top15(insitu_asv_wClass, 10, all, Phylum, "Ciliophora", CLASS, all_class, Class)
# all_class
make_bar_relabun(insitu_asv_wClass, axial)
Axial Seamount samples from archived material - span 2013, 2014, and 2015. First, the background and plume (from 2015 only, and from plume associated with the Anemone vent) are different from the vent samples - overwhelmingly stramneopile and rhizaria. For the background and plume, the stramenopiles appear to be associated with ochrophyta or opalozoa. For the plume, the rhizaria population was associated with cercozoa, while the background seawater was identified as belonging to radiolaria.
The major difference between the background/plume and vent sites was the higher relative sequence abundance of ciliates and opisthokonta. For the opisthokonta, these are primarily metazoa - and I will need to investigate this further. Exceptions for this include the ‘Dependable’ vent from 2013, which had a completely different composition, and ‘Marker 113’ in 2015, which the opisthokonta sequences were assigned choanoflagellate and fungi.
Further questions to consider
Any geochemical changes to Marker 113 from 2013/2014 to 2015? Could attribute difference of opisthokonta colonization.
# make_clr_trans_tile(insitu_asv_wClass, axial)
Tile plot goes to the Class taxonomic level. Here at Axial, mostly the ciliate class had higher CLR values (more enriched relative to the data mean). Second to ciliates were cercozoa. Also noticing how Marker 113 2013 and 2015 are more similar to each other than 2014?
make_pca(insitu_asv_wClass, axial)
## Warning: Expected 4 pieces. Additional pieces discarded in 7717 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
While we only have 1 plume and bsw each for Axial, they are grouping together - away from vents. So that is an expected signature and likely consistent with the other sites. These colors are a little confusing, it does look like Boca is an outlier.
# make_asv_rich(insitu_asv_wClass, axial)
We only have 1 sample for background and plume from Axial Seamount. But this shows that the vent sites have varied ASV richness,
make_upset_plot(insitu_asv_wClass, axial)
## Warning: Expected 4 pieces. Additional pieces discarded in 6856 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 723 rows containing non-finite values (stat_count).
<- make_upset_plot(insitu_asv_wClass, axial) axial_upset
## Warning: Expected 4 pieces. Additional pieces discarded in 6856 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
<- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
alv # svg("upsetR-bysite-sampletype-nov2.svg", h=9, w=15)
<- insitu_asv_wClass %>%
axial_loc_yr filter(SITE %in% axial) %>%
filter(Domain == "Eukaryota") %>%
filter(Supergroup != "Opisthokonta") %>%
mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>%
mutate(SUPERGROUP = case_when(
%in% alv ~ "Other Alveolata",
Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
Supergroup == "Cercozoa" ~ "Rhizaria-Cercozoa",
Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
Phylum TRUE ~ Supergroup
%>%
)) # Taxa to supergroup
mutate(SupergroupPhylum = SUPERGROUP) %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, SupergroupPhylum) %>%
summarise(AVG = mean(value)) %>%
ungroup() %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) filter(REGION == "Axial") %>%
unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>%
unite(YR_LOC, SAMPLETYPE, YEAR, sep = " ", remove = FALSE) %>%
group_by(FeatureID, SupergroupPhylum, YR_LOC) %>%
summarise(SUM = sum(AVG)) %>%
# filter(SUM > 200) %>%
ungroup()
## Warning: Expected 4 pieces. Additional pieces discarded in 6856 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
##
%>%
axial_loc_yr distinct(FeatureID, SupergroupPhylum, SUM, YR_LOC, .keep_all = TRUE) %>%
group_by(FeatureID, SupergroupPhylum) %>%
summarise(SAMPLE = list(YR_LOC)) %>%
ggplot(aes(x = SAMPLE)) +
geom_bar(color = "black", width = 0.5, aes(fill = SupergroupPhylum)) +
scale_x_upset(n_intersections = 25) +
scale_y_continuous(expand = c(0,0)) +
labs(x = "", y = "Shared ASVs") +
theme_linedraw() +
theme(axis.text.y = element_text(color="black", size=14),
axis.text.x = element_text(color="black", size=14),
axis.title = element_text(color="black", size=14),
legend.text = element_text(color = "black", size = 12),
plot.margin = margin(1, 1, 1, 5, "cm")) +
scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#deebf7", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525"))
## Warning: Removed 13 rows containing non-finite values (stat_count).
# dev.off()
Extract total number of ASVs from Axial data, and those that were found within the vent fluid only.
length(unique(axial_loc_yr$FeatureID))
## [1] 3586
<- axial_loc_yr %>%
tmp filter(grepl("Vent", YR_LOC)) %>%
pivot_wider(names_from = YR_LOC, values_from = SUM, values_fill = NA) %>%
drop_na()
head(tmp)
## # A tibble: 6 × 5
## FeatureID SupergroupPhylum Vent …¹ Vent …² Vent …³
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 00b72d1a5fefb03bc39edbfba4566d03 Alveolata-Ciliophora 11403 1817 721
## 2 01a71ee728b1597b04d339505a58ed04 Stramenopiles 368 51 224
## 3 04f5a1d4ab104eeb74573c3b79de10e0 Stramenopiles 273 120 200
## 4 05b6d079805b2bb389fee9b9a7e0feba Stramenopiles-Sageni… 133 50 10
## 5 068cf0d76352f3978c0e4f1dd9457ba0 Stramenopiles 988 40 172
## 6 06cb67e2036452e926aaf2edeef3acc6 Alveolata-Dinoflagel… 544 240 140
## # … with abbreviated variable names ¹`Vent 2013`, ²`Vent 2014`, ³`Vent 2015`
length(unique(tmp$FeatureID))
## [1] 177
%>%
tmp pivot_longer(starts_with("Vent ")) %>%
group_by(name, SupergroupPhylum) %>%
summarise(VALUE = sum(value)) %>%
ggplot(aes(x = name, y = VALUE, fill = SupergroupPhylum)) +
geom_bar(stat = "identity")
Plot only cosmopolitan vs. resident ASV relative abundances by Supergroup.
tree_res_v_cosmo(insitu_asv_wClass, axial)
# Function usage:
# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
top15(insitu_asv_wClass, 15, axial, Domain, "Eukaryota", CLASS, all_class, Phylum)
unique(insitu_asv_wClass$Phylum)
## [1] "Sagenista" "Radiolaria" "Ciliophora"
## [4] "Ochrophyta" "Cercozoa" "Opalozoa"
## [7] "Metazoa" NA "Fungi"
## [10] "Dinoflagellata" "Stramenopiles_X" "Patescibacteria"
## [13] "Apusomonadidae" "Pseudofungi" "Haptophyta"
## [16] "Chlorophyta" "Perkinsea" "Telonemia"
## [19] "Choanoflagellida" "Lobosa" "Conosa"
## [22] "Centroheliozoa" "Cryptophyta" "Streptophyta"
## [25] "Picozoa" "Opisthokonta_X" "Metamonada"
## [28] "Hilomonadea" "Mesomycetozoa" "Chrompodellids"
## [31] "Prasinodermophyta" "Apicomplexa" "Katablepharidophyta"
## [34] "Breviatea" "Epsilonbacteraeota" "Proteobacteria"
## [37] "Aquificae" "Cyanobacteria" "Euryarchaeota"
## [40] "Alveolata_X" "Bacteroidetes" "Rhodophyta"
## [43] "Protalveolata_X" "Discoba"
<- c("Vent only")
vent_only # Isolate top 15 vent-only ASVs from axial - at the phylum level
top15(insitu_asv_wClass, 15, axial, Domain, "Eukaryota", CLASS, vent_only, Phylum)
# Top 10 ciliate only taxa, as most were ciliates
top15(insitu_asv_wClass, 10, axial, Phylum, "Ciliophora", CLASS, vent_only, Class)
# Top 10 ciliate only taxa, as most were ciliates
top15(insitu_asv_wClass, 10, axial, Phylum, "Opalozoa", CLASS, vent_only, Class)
# make_bar_relabun(insitu_asv_wClass, mcr)
# make_clr_trans_tile(insitu_asv_wClass, mcr)
make_pca(insitu_asv_wClass, mcr)
## Warning: Expected 4 pieces. Additional pieces discarded in 8327 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# make_asv_rich(insitu_asv_wClass, mcr)
make_upset_plot(insitu_asv_wClass, mcr)
## Warning: Expected 4 pieces. Additional pieces discarded in 7678 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 1000 rows containing non-finite values (stat_count).
<- make_upset_plot(insitu_asv_wClass, mcr) mcr_upset
## Warning: Expected 4 pieces. Additional pieces discarded in 7678 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
Repeat presence-absence plot, but with a lower resolution.
%>%
insitu_asv_wClass filter(SITE %in% mcr) %>%
filter(Domain == "Eukaryota") %>%
# Average across replicates
group_by(FeatureID, SAMPLENAME, VENT, Supergroup) %>%
summarise(AVG = mean(value)) %>%
separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>%
mutate(REGION = case_when(
== "GordaRidge" ~ "Gorda Ridge",
SITE %in% mcr ~ "Mid-Cayman Rise",
SITE == "Axial" ~ "Axial")) %>%
SITE mutate(VENTNAME = case_when(
== "Gorda Ridge" ~ VENT,
REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
REGION %>% select(-Sample_tmp) %>%
)) unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>%
group_by(FeatureID, Supergroup, SAMPLE) %>%
summarise(SUM = sum(AVG)) %>%
ungroup() %>%
distinct(FeatureID, Supergroup, SUM, SAMPLE, .keep_all = TRUE) %>%
group_by(FeatureID, Supergroup) %>%
summarise(SAMPLE = list(SAMPLE)) %>%
ggplot(aes(x = SAMPLE)) +
geom_bar(color = "black", width = 0.5, aes(fill = Supergroup)) +
scale_x_upset(n_intersections = 15) +
scale_y_continuous(expand = c(0,0)) +
labs(x = "", y = "Shared ASVs") +
theme_linedraw() +
theme(axis.text = element_text(color="black", size=10),
axis.title = element_text(color="black", size=10),
legend.text = element_text(color = "black", size = 10),
plot.margin = margin(1, 1, 1, 5, "cm")) +
scale_fill_manual(values = c("#f1eef6", "#d7b5d8", "#df65b0", "#ce1256", "#fc9272", "#ef3b2c",
"#800026", "#fff7bc", "#fec44f", "#d95f0e", "#a63603", "#74c476", "#238b45",
"#00441b", "#7fcdbb", "#084081", "#c6dbef", "#2b8cbe", "#016c59", "#bcbddc",
"#807dba", "#54278f", "#bdbdbd", "black"))
## Warning: Expected 4 pieces. Additional pieces discarded in 8327 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 331 rows containing non-finite values (stat_count).
# Function usage:
# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
top15(insitu_asv_wClass, 15, mcr, Domain, "Eukaryota", CLASS, all_class, Phylum)
# unique(insitu_asv_wClass$CLASS)
<- c("Vent only")
vent_only # Isolate top 15 vent-only ASVs from MCR - at the phylum level
top15(insitu_asv_wClass, 15, mcr, Domain, "Eukaryota", CLASS, vent_only, Phylum)
# Top 10 ciliate only taxa, as most were ciliates
top15(insitu_asv_wClass, 10, mcr, Phylum, "Ciliophora", CLASS, vent_only, Class)
## Bar plot relative abundance: GR
make_bar_relabun(insitu_asv_wClass, gr)
# make_clr_trans_tile(insitu_asv_wClass, gr)
make_pca(insitu_asv_wClass, gr)
## Warning: Expected 4 pieces. Additional pieces discarded in 9456 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# make_asv_rich(insitu_asv_wClass, gr)
make_upset_plot(insitu_asv_wClass, gr)
## Warning: Expected 4 pieces. Additional pieces discarded in 8710 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 731 rows containing non-finite values (stat_count).
<- make_upset_plot(insitu_asv_wClass, gr) gr_upset
## Warning: Expected 4 pieces. Additional pieces discarded in 8710 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/figs3-upsetRpanels.svg", h = 17, w = 20)
+ theme(legend.position = "none")) + (axial_upset + theme(legend.position = "none")) + (gr_upset + theme(legend.position = "none")) + (mcr_upset + theme(legend.position = "none")) + patchwork::plot_layout(nrow = 2, ncol = 2) + plot_annotation(tag_levels = "a") (genus_upset
## Warning: Removed 316 rows containing non-finite values (stat_count).
## Warning: Removed 723 rows containing non-finite values (stat_count).
## Warning: Removed 731 rows containing non-finite values (stat_count).
## Warning: Removed 1000 rows containing non-finite values (stat_count).
# dev.off()
# Function usage:
# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
top15(insitu_asv_wClass, 15, gr, Domain, "Eukaryota", CLASS, all_class, Phylum)
# unique(insitu_asv_wClass$CLASS)
<- c("Vent only")
vent_only # Isolate top 15 vent-only ASVs from GR - at the phylum level
top15(insitu_asv_wClass, 15, gr, Domain, "Eukaryota", CLASS, vent_only, Phylum)
# Top 10 ciliate only taxa, as most were ciliates
top15(insitu_asv_wClass, 10, gr, Phylum, "Ciliophora", CLASS, vent_only, Class)
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS/LAPACK: /Users/sarahhu/anaconda3/envs/r_4.1/lib/libopenblasp-r0.3.15.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] DivNet_0.3.7 breakaway_4.7.3 geosphere_1.5-14 ggdendro_0.1.22
## [5] vegan_2.5-7 lattice_0.20-45 permute_0.9-5 viridis_0.6.2
## [9] viridisLite_0.4.1 plotly_4.10.0 treemapify_2.5.5 gt_0.3.1
## [13] ggupset_0.3.0 ggpubr_0.4.0 patchwork_1.1.1 compositions_2.0-4
## [17] decontam_1.12.0 phyloseq_1.36.0 cowplot_1.1.1 forcats_0.5.1
## [21] stringr_1.4.0 dplyr_1.0.9 purrr_0.3.4 readr_2.1.2
## [25] tidyr_1.2.0 tibble_3.1.7 ggplot2_3.3.6 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.4.1 plyr_1.8.7
## [4] igraph_1.3.1 lazyeval_0.2.2 sp_1.5-0
## [7] splines_4.1.0 crosstalk_1.2.0 GenomeInfoDb_1.28.4
## [10] digest_0.6.29 foreach_1.5.1 htmltools_0.5.2
## [13] fansi_1.0.3 magrittr_2.0.3 checkmate_2.0.0
## [16] doParallel_1.0.16 cluster_2.1.3 tzdb_0.2.0
## [19] Biostrings_2.60.2 ggfittext_0.9.1 modelr_0.1.8
## [22] bayesm_3.1-4 colorspace_2.0-3 rvest_1.0.2
## [25] haven_2.4.3 xfun_0.29 crayon_1.5.1
## [28] RCurl_1.98-1.3 jsonlite_1.8.0 lme4_1.1-27.1
## [31] survival_3.3-1 iterators_1.0.13 ape_5.6-2
## [34] glue_1.6.2 gtable_0.3.0 zlibbioc_1.38.0
## [37] XVector_0.32.0 car_3.0-12 Rhdf5lib_1.14.2
## [40] BiocGenerics_0.38.0 DEoptimR_1.0-11 abind_1.4-5
## [43] scales_1.2.1 DBI_1.1.2 rstatix_0.7.0
## [46] Rcpp_1.0.8 stats4_4.1.0 htmlwidgets_1.5.4
## [49] httr_1.4.3 ellipsis_0.3.2 pkgconfig_2.0.3
## [52] farver_2.1.0 sass_0.4.0 dbplyr_2.2.1
## [55] utf8_1.2.2 tidyselect_1.1.2 labeling_0.4.2
## [58] rlang_1.0.4 reshape2_1.4.4 polynom_1.4-0
## [61] munsell_0.5.0 cellranger_1.1.0 tools_4.1.0
## [64] cachem_1.0.6 cli_3.3.0 generics_0.1.3
## [67] ade4_1.7-18 broom_1.0.0 evaluate_0.15
## [70] biomformat_1.20.0 fastmap_1.1.0 yaml_2.3.5
## [73] knitr_1.39 fs_1.5.2 robustbase_0.93-9
## [76] nlme_3.1-158 mvnfast_0.2.7 xml2_1.3.3
## [79] compiler_4.1.0 rstudioapi_0.13 ggsignif_0.6.3
## [82] reprex_2.0.1 bslib_0.4.0 stringi_1.7.5
## [85] highr_0.9 Matrix_1.4-1 nloptr_1.2.2.3
## [88] tensorA_0.36.2 multtest_2.48.0 vctrs_0.4.1
## [91] pillar_1.8.1 lifecycle_1.0.1 rhdf5filters_1.4.0
## [94] jquerylib_0.1.4 data.table_1.14.0 bitops_1.0-7
## [97] R6_2.5.1 gridExtra_2.3 IRanges_2.26.0
## [100] codetools_0.2-18 boot_1.3-28 MASS_7.3-57
## [103] assertthat_0.2.1 rhdf5_2.36.0 withr_2.5.0
## [106] S4Vectors_0.30.2 GenomeInfoDbData_1.2.6 mgcv_1.8-36
## [109] parallel_4.1.0 hms_1.1.1 grid_4.1.0
## [112] minqa_1.2.4 rmarkdown_2.13 carData_3.0-5
## [115] Biobase_2.52.0 lubridate_1.8.0