1 Background

Preliminary analyses for microeukaryote tag-sequence survey.

Description: Investigate the diversity of single-celled microbial eukaryotic communities across several deep-sea hydrothermal vent sites (including NE Pacific, Caribbean). We plan to address questions related to the environmental factors that shape protistan community dynamics, and determine if patterns in species diversity and distribution vary at different deep-sea habitats. These questions will be addressed using similarly generated metabarcoding data from several distinct hydrothermal vents. Along with characterizing community structure, we plan to evaluate interactions between protist species (to identify putative predator-prey or parasite-host relationships) and their environment (to explore their relationship to geochemical properties).

Questions to address

What is the general biogeography and distribution of the deep-sea hydrothermal vent microbial eukaryotic community?

What community structure features (i.e., species richness, proportion cosmopolitan versus endemic, species evenness) are shared across or unique to deep-sea hydrothermal vent sites?

What environmental features (i.e., temperature, geochemistry) influence microbial eukaryotic community diversity? Can we identify if certain environmental factors select for putative vent endemics?

1.1 Set up R environment

All upstream processing of sequences from QIIME2 can be found here.

## Warning: package 'ggplot2' was built under R version 4.1.3
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'lattice' was built under R version 4.1.1
axial <- c("Axial")
mcr <- c("VonDamm", "Piccard")
gr <- c("GordaRidge")
all <- c("Axial", "VonDamm", "Piccard", "GordaRidge")
load("data-input/asv-tables-processed-27012022.RData", verbose = TRUE)
## Loading objects:
##   asv_insitu
##   asv_insitu_qc
##   insitu_asv_wClass
metadata <- read.delim("data-input/samplelist-metadata.txt", na.strings = "")

2 Overall community composition

Investigate overall protistan community composition.

2.1 Relative abundance bar plot

First, establish order and color key for all taxonomic visuals.

all_taxa_order <- c("Alveolata-Ciliophora", "Alveolata-Dinoflagellata", "Protalveolata", "Other Alveolata", "Amoebozoa", "Apusozoa", "Excavata", "Hacrobia", "Archaeplastida", "Rhizaria", "Rhizaria-Radiolaria", "Rhizaria-Cercozoa", "Stramenopiles", "Stramenopiles-Opalozoa", "Stramenopiles-Sagenista", "Stramenopiles-Ochrophyta", "Opisthokonta", "Unknown Eukaryota")
# length(all_taxa_order)

all_taxa_color = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c","#cc4c02", "#c7e9b4", "#238b45", "#c6dbef", "#99d8c9","#1d91c0", "#253494", "#9e9ac8", "#54278f", "#bdbdbd", "#252525", "#f0f0f0")
# length(all_taxa_color)
alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
bkgd <- c("Deep seawater", "BSW", "Shallow seawater", "Near vent BW")
plume <- c("Candelabra Plume", "Mt Edwards Plume", "Plume", "Anemone Plume")

all_taxa_color_protalve = c("#fa9fb5", "#c51b8a","#f0f0f0", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c","#cc4c02", "#c7e9b4", "#238b45", "#c6dbef", "#99d8c9","#1d91c0", "#253494", "#9e9ac8", "#54278f", "#bdbdbd", "#252525", "#f0f0f0")

make_bar_relabun <- function(df, selection){
  df_out <- df %>%
  filter(SITE %in% selection) %>% 
  filter(Domain == "Eukaryota") %>%
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species,
           VENT, SITE, SAMPLETYPE, YEAR, DATASET, SUPERGROUP) %>% 
    summarise(SEQ_AVG_REP = mean(value)) %>% 
  ungroup()
df_out %>% 
  group_by(SITE, SAMPLETYPE, VENT, YEAR, SUPERGROUP) %>% 
    summarise(SEQ_SUM = sum(SEQ_AVG_REP)) %>% 
  mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>% 
  ggplot(aes(x = VENT, y = SEQ_SUM, fill = SUPERGROUP_ORDER)) +
    geom_bar(stat = "identity", position = "fill", color = "black", width = 0.9) +
    facet_grid(. ~ SITE +YEAR + SAMPLETYPE, scale = "free", space = "free") +
  theme_linedraw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, face = "bold", color = "black"),
        strip.background = element_blank(), 
        strip.text = element_text(color = "black", face = "bold", angle = 90),
        axis.title = element_text(color = "black", face = "bold"),
        axis.text.y = element_text(color = "black", face = "bold"),
        legend.position = "right",
        legend.title = element_blank()) +
  scale_y_continuous(expand = c(0,0)) +
    scale_fill_manual(values = all_taxa_color_protalve) +
  labs(x = "", y = "Relative sequence abundance")
}
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/figs2-barplotall.svg", h = 9, w = 13)
all <- c("Axial", "VonDamm", "Piccard", "GordaRidge")
make_bar_relabun(insitu_asv_wClass, all)

# dev.off()

2.2 Heat map at class level

alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
bkgd <- c("Deep seawater", "BSW", "Shallow seawater", "Near vent BW")
plume <- c("Candelabra Plume", "Mt Edwards Plume", "Plume", "Anemone Plume")

tmp_phyla <- insitu_asv_wClass %>%
  filter(Domain == "Eukaryota") %>%
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>% 
  group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class,
           SITE, SAMPLETYPE, VENT, YEAR, SUPERGROUP_ORDER) %>% 
    summarise(SEQ_AVG_REP = mean(value)) %>% 
  ungroup() %>% 
  group_by(SITE, SAMPLETYPE, VENT, YEAR) %>% 
    mutate(TOTAL = sum(SEQ_AVG_REP)) %>% 
  ungroup(SITE, SAMPLETYPE, VENT, YEAR) %>% 
  group_by(SITE, SAMPLETYPE, VENT, YEAR, SUPERGROUP_ORDER, Phylum, Class) %>% 
    summarise(SEQ_SUM = sum(SEQ_AVG_REP),
              REL_ABUN = (SEQ_SUM/TOTAL)) %>% 
  distinct()

###
# ggplot(tmp_phyla, aes(x = VENT, y = Class, fill = (REL_ABUN))) +
#     geom_tile(stat = "identity", color = "black") +
#     facet_grid(SUPERGROUP_ORDER + Phylum ~ SITE +YEAR + SAMPLETYPE, scale = "free", space = "free") +
#   theme_linedraw() +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
#         strip.background = element_blank(), strip.text = element_text(color = "black"),
#         legend.position = "right",
#         strip.text.y = element_text(angle = 0)) +
#   scale_fill_gradient(low = "white", high = "red") +
#   labs(x = "", y = "Relative abundance")

2.2.1 Stats on sequences and ASVs

Out of all the ASVs and sequences, what percentage of ASVs were found at all 4 of my vent sites? what % of ASVs were unique to individual sites? what % of sequences?

totalasvs <- length(unique(insitu_asv_wClass$FeatureID)); totalasvs
## [1] 12375
totalseq <- sum(insitu_asv_wClass$value); totalseq
## [1] 3788734
unique(insitu_asv_wClass$SITE_CLASS)
##  [1] "Gorda Ridge only"               "Axial only"                    
##  [3] "Von Damm only"                  "Piccard & Von Damm"            
##  [5] "Piccard only"                   "MCR & Axial"                   
##  [7] "Axial & Gorda Ridge"            "Von Damm & Gorda Ridge"        
##  [9] "Von Damm, Axial, & Gorda Ridge" "Von Damm & Axial"              
## [11] "All sites"                      "MCR & Gorda Ridge"             
## [13] "Piccard, Axial, & Gorda Ridge"  "Piccard & Gorda Ridge"         
## [15] "Piccard & Axial"
tmp_unique <- filter(insitu_asv_wClass, grepl(" only", SITE_CLASS))
tmp_shared <- filter(insitu_asv_wClass, !(grepl(" only", SITE_CLASS)))
tmp_allshared <- filter(insitu_asv_wClass, SITE_CLASS == "All sites")

# Total number of ASVs that were unique to an individual vent field, includes "only" in the site classification.
a <- length(unique(tmp_unique$FeatureID)); a
## [1] 10191
100*(a/totalasvs)
## [1] 82.35152
# % of sequences
a_sum <- sum(tmp_unique$value); a_sum
## [1] 1187229
100*(a_sum/totalseq)
## [1] 31.33577
# Total number of ASVs that were found in more than 1 sample
b <- length(unique(tmp_shared$FeatureID)); b
## [1] 2184
100*(b/totalasvs)
## [1] 17.64848
b_sum <- sum(tmp_shared$value); b_sum
## [1] 2601505
100*(b_sum/totalseq)
## [1] 68.66423
# Total number of ASVs  at all sites
c <- length(unique(tmp_allshared$FeatureID)); c
## [1] 194
100*(c/totalasvs)
## [1] 1.567677
c_sum <- sum(tmp_allshared$value); c_sum
## [1] 834734
100*(c_sum/totalseq)
## [1] 22.032
# Select ASVs found in 1 field and at vent only
tmp_1field_vent <- filter(insitu_asv_wClass, grepl(" only", SITE_CLASS) & CLASS == "Vent only")
d <- length(unique(tmp_1field_vent$FeatureID)); d
## [1] 7325
100*(d/totalasvs)
## [1] 59.19192
d_sum <- sum(tmp_1field_vent$value); d_sum
## [1] 682645
100*(d_sum/totalseq)
## [1] 18.01776

2.3 PCA analysis with all samples

df_wide_asv <- insitu_asv_wClass %>%
  filter(SITE %in% all) %>%
  filter(Domain == "Eukaryota") %>% 
  # Average across replicates
    group_by(FeatureID, SAMPLENAME, VENT) %>% 
    summarise(AVG = mean(value)) %>% 
    ungroup() %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION = case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  unite(SAMPLE, SAMPLETYPE, SITE, VENTNAME, sep = "_", remove = FALSE) %>% 
   group_by(FeatureID, SAMPLE) %>% 
   summarise(SUM = sum(AVG)) %>% 
  pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>% 
  column_to_rownames(var = "SAMPLE")
## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
  # look at eigenvalues
  pca_lr <- prcomp(data.frame(compositions::clr(df_wide_asv)))
  variance_lr <- (pca_lr$sdev^2)/sum(pca_lr$sdev^2)
  ## View bar plot
  barplot(variance_lr, main = "Log-Ratio PCA Screeplot", xlab = "PC Axis", ylab = "% Variance", 
    cex.names = 1.5, cex.axis = 1.5, cex.lab = 1.5, cex.main = 1.5)

  ## Extract PCR points
  data.frame(pca_lr$x, SAMPLE = rownames(pca_lr$x)) %>% 
      separate(SAMPLE, c("SAMPLETYPE", "REGION", "VENTNAME"), sep = "_", remove = FALSE) %>% 
  ## Generate PCA plot
  ggplot(aes(x = PC1, y = PC2, shape = SAMPLETYPE, fill = REGION)) +
    geom_hline(yintercept = 0) + geom_vline(xintercept = 0, color = "#525252") +
    geom_point(size=3, stroke = 1, aes(fill = REGION)) +
    ylab(paste0('PC2 ',round(variance_lr[2]*100,2),'%')) +
    xlab(paste0('PC1 ',round(variance_lr[1]*100,2),'%')) +
    scale_shape_manual(values = c(21, 23, 24)) +
    scale_fill_manual(values = c("#fdbb84", "#31a354", "#ef3b2c", "#02818a")) +
    theme_bw() +
    theme(axis.text = element_text(color="black", size=12),
          legend.title = element_blank(),
          axis.title = element_text(color="black", size=14),
          legend.text = element_text(color = "black", size = 14),
          plot.margin = margin(2, 1, 2, 1, "cm")) +
    guides(fill = guide_legend(override.aes = list(shape = 21) ),
              shape = guide_legend(override.aes = list(fill = "black")))

2.4 Dendrogram Jaccard dissimilarity

df <- as.data.frame(t(df_wide_asv))
names(df)
##  [1] "Vent_GordaRidge_Venti Latte"           
##  [2] "Background_GordaRidge_Shallow seawater"
##  [3] "Vent_Axial_Boca 2013"                  
##  [4] "Vent_Axial_Escargot 2014"              
##  [5] "Background_GordaRidge_Deep seawater"   
##  [6] "Vent_VonDamm_VonDamm Rav2"             
##  [7] "Vent_Axial_N3Area 2013"                
##  [8] "Vent_GordaRidge_Candelabra"            
##  [9] "Background_GordaRidge_Near vent BW"    
## [10] "Plume_GordaRidge_Candelabra Plume"     
## [11] "Plume_GordaRidge_Mt Edwards Plume"     
## [12] "Vent_GordaRidge_Sir Ventsalot"         
## [13] "Vent_Axial_El Guapo 2013"              
## [14] "Vent_Axial_Marker33 2014"              
## [15] "Plume_VonDamm_VonDamm Plume"           
## [16] "Vent_VonDamm_VonDamm OldManTree"       
## [17] "Vent_Piccard_Piccard LotsOShrimp"      
## [18] "Vent_Piccard_Piccard Shrimpocalypse"   
## [19] "Vent_VonDamm_VonDamm ArrowLoop"        
## [20] "Vent_Axial_Anemone 2013"               
## [21] "Vent_VonDamm_VonDamm ShrimpHole"       
## [22] "Vent_VonDamm_VonDamm X18"              
## [23] "Vent_Axial_Marker113 2015"             
## [24] "Vent_Axial_Marker33 2013"              
## [25] "Background_Piccard_Piccard BSW"        
## [26] "Plume_Axial_Anemone Plume 2015"        
## [27] "Vent_Axial_Marker113 2013"             
## [28] "Vent_Axial_Marker113 2014"             
## [29] "Vent_GordaRidge_Mt Edwards"            
## [30] "Background_VonDamm_VonDamm BSW"        
## [31] "Background_Axial_Deep seawater 2015"   
## [32] "Plume_Piccard_Piccard Plume"           
## [33] "Vent_Axial_Skadi 2013"                 
## [34] "Vent_VonDamm_VonDamm WhiteCastle"      
## [35] "Vent_VonDamm_VonDamm MustardStand"
###
  colnames(df) <- gsub(x = names(df), pattern = "_", replacement = " ")
  colnames(df) <- gsub(x = names(df), pattern = "Vent Axial", replacement = "Axial")
  colnames(df) <- gsub(x = names(df), pattern = "Vent GordaRidge", replacement = "Gorda Ridge")
  colnames(df) <- gsub(x = names(df), pattern = "GordaRidge", replacement = "Gorda Ridge")
  colnames(df) <- gsub(x = names(df), pattern = "Vent VonDamm VonDamm", replacement = "Von Damm")
  colnames(df) <- gsub(x = names(df), pattern = "Vent Piccard Piccard", replacement = "Piccard")
  colnames(df) <- gsub(x = names(df), pattern = "Background Gorda Ridge", replacement = "Gorda Ridge")
  colnames(df) <- gsub(x = names(df), pattern = "Plume Gorda Ridge", replacement = "Gorda Ridge")
  colnames(df) <- gsub(x = names(df), pattern = "VonDamm VonDamm", replacement = "Von Damm")
  colnames(df) <- gsub(x = names(df), pattern = "Piccard Piccard", replacement = "Piccard")
  colnames(df) <- gsub(x = names(df), pattern = " BSW", replacement = "")
  colnames(df) <- gsub(x = names(df), pattern = "Background Axial Deep seawater 2015", replacement = "Background Axial 2015")
  colnames(df) <- gsub(x = names(df), pattern = "Plume Axial Anemone Plume 2015", replacement = "Axial Anemone Plume 2015")
  colnames(df) <- gsub(x = names(df), pattern = "Rav2", replacement = "Ravelin #2")
  colnames(df) <- gsub(x = names(df), pattern = "Plume Von Damm", replacement = "Von Damm")
  colnames(df) <- gsub(x = names(df), pattern = "Plume Piccard", replacement = "Piccard")
# names(df)

# Write over same data frame - fix sample names
dendro_input <- df
# head(dendro_input)

Estimate Jaccard distance

dendro_jacc <- vegan::vegdist(t(dendro_input), method = "jaccard")
# dendro_jacc <- vegan::vegdist(t(compositions::clr(dendro_input)), method = "euclidean")

cluster_jacc <- hclust(dist(t(dendro_jacc)), method = "average")

library(ggdendro)
dendro_plot_df <- ggdendro::dendro_data(as.dendrogram(cluster_jacc), type = "rectangle")
label_dendro_order <- as.character(dendro_plot_df$labels$label)
# label_dendro_order

Check stress

# euc_test <- vegan::vegdist(t(compositions::clr(dendro_input)), method = "euclidean")
# 
# jac_test <- vegan::vegdist(t(dendro_input), method = "jaccard")
# 
# set.seed(2930) # set seed
# ## So we can compare the relative composition based distance metric to the presence/absence based distance metric
# euc_nmds <- metaMDS(euc_test, k=2, autotransform=FALSE)
# jac_nmds <- metaMDS(jac_test, k=2, autotransform=FALSE)
# 
# euc_nmds$stress
# jac_nmds$stress

Plot dendrogram

dendro_plot_output <- ggplot(segment(dendro_plot_df)) + 
  geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
  coord_flip() + 
  scale_y_reverse(expand = c(0.2, 0.5), breaks = c(0, 0.2, 0.4, 0.6, 0.8)) + 
  geom_text(aes(x = x, y = y, label = label, angle = 0, hjust = 0), data = label(dendro_plot_df)) + 
  theme_dendro() + 
  labs(y = "Dissimilarity", title = "Jaccard distance") + 
  theme(axis.text.x = element_text(color = "black", size = 14), 
        axis.line.x = element_line(color = "#252525"), 
        axis.ticks.x = element_line(), axis.title.x = element_text(color = "black", size = 14))
dendro_plot_output

Add bar plot in the same order to show proportion of resident versus cosmpolitan ASVs in each sample.

2.4.1 Bar plot to pair with dendrogram

# head(insitu_asv_wClass)
# unique(insitu_asv_wClass$SITE_CLASS)
# unique(insitu_asv_wClass$CLASS)

dendro_bar <- insitu_asv_wClass %>% 
  filter(SITE %in% all) %>%
  filter(Domain == "Eukaryota") %>% 
  # Average across replicates
    group_by(FeatureID, SAMPLENAME, VENT, SITE_CLASS) %>% 
    summarise(AVG = mean(value)) %>% 
    ungroup() %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION= case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  unite(SAMPLE, SAMPLETYPE, SITE, VENTNAME, sep = "_", remove = FALSE) %>%
  mutate(SITE_CLASS_2 = case_when(
    SITE_CLASS == "Piccard & Von Damm" ~ "Piccard & Von Damm only",
    SITE_CLASS == "Piccard & Axial" ~ "MCR & Axial",
    SITE_CLASS == "Piccard & Gorda Ridge" ~ "MCR & Gorda Ridge",
    SITE_CLASS == "Von Damm & Axial" ~ "MCR & Axial",
    SITE_CLASS == "Von Damm & Gorda Ridge" ~ "MCR & Gorda Ridge",
    SITE_CLASS == "Piccard, Axial, & Gorda Ridge" ~ "MCR, Axial, & Gorda Ridge",
    SITE_CLASS == "Von Damm, Axial, & Gorda Ridge" ~ "MCR, Axial, & Gorda Ridge",
    TRUE ~ SITE_CLASS
  )) %>% 
  group_by(SITE_CLASS_2, SAMPLE) %>% 
  summarise(SEQ_SUM = sum(AVG),
     ASV_COUNT = n()) %>%  
  mutate(SAMPLE = gsub(SAMPLE, pattern = "_", replacement = " ")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Axial", replacement = "Axial")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent GordaRidge", replacement = "Gorda Ridge")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "GordaRidge", replacement = "Gorda Ridge")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent VonDamm VonDamm", replacement = "Von Damm")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Piccard Piccard", replacement = "Piccard")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Background Gorda Ridge", replacement = "Gorda Ridge")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Gorda Ridge", replacement = "Gorda Ridge")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "VonDamm VonDamm", replacement = "Von Damm")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Piccard Piccard", replacement = "Piccard")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = " BSW", replacement = "")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Background Axial Deep seawater 2015", replacement = "Background Axial 2015")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Axial Anemone Plume 2015", replacement = "Axial Anemone Plume 2015")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Rav2", replacement = "Ravelin #2")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Von Damm", replacement = "Von Damm")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Piccard", replacement = "Piccard"))
## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# unique(insitu_asv_wClass$CLASS)
cosmo <- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")

dendro_res_cos_df <- insitu_asv_wClass %>% 
  filter(SITE %in% all) %>%
  filter(Domain == "Eukaryota") %>% 
  mutate(DISTRIBUTION = case_when(
    CLASS %in% cosmo ~ "Cosmopolitan",
    TRUE ~ CLASS
  )) %>% 
  # Average across replicates
    group_by(FeatureID, SAMPLENAME, VENT, DISTRIBUTION) %>% 
    summarise(AVG = mean(value)) %>% 
    ungroup() %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION= case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  unite(SAMPLE, SAMPLETYPE, SITE, VENTNAME, sep = "_", remove = FALSE) %>%
  group_by(DISTRIBUTION, SAMPLE) %>% 
  summarise(SEQ_SUM = sum(AVG),
     ASV_COUNT = n()) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "_", replacement = " ")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Axial", replacement = "Axial")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent GordaRidge", replacement = "Gorda Ridge")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "GordaRidge", replacement = "Gorda Ridge")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent VonDamm VonDamm", replacement = "Von Damm")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Vent Piccard Piccard", replacement = "Piccard")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Background Gorda Ridge", replacement = "Gorda Ridge")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Gorda Ridge", replacement = "Gorda Ridge")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "VonDamm VonDamm", replacement = "Von Damm")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Piccard Piccard", replacement = "Piccard")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = " BSW", replacement = "")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Background Axial Deep seawater 2015", replacement = "Background Axial 2015")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Axial Anemone Plume 2015", replacement = "Axial Anemone Plume 2015")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Rav2", replacement = "Ravelin #2")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Von Damm", replacement = "Von Damm")) %>% 
  mutate(SAMPLE = gsub(SAMPLE, pattern = "Plume Piccard", replacement = "Piccard"))
## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
dendro_bar$SAMPLE_ORDER <- factor(dendro_bar$SAMPLE, levels = label_dendro_order) 
dendro_res_cos_df$SAMPLE_ORDER <- factor(dendro_res_cos_df$SAMPLE, levels = label_dendro_order)

dendro_bar$SITE_CLASS_ORDER <- factor(dendro_bar$SITE_CLASS_2, levels = c("All sites","Von Damm only","Piccard only","Piccard & Von Damm only","MCR & Axial","MCR & Gorda Ridge", "MCR, Axial, & Gorda Ridge", "Gorda Ridge only","Axial only","Axial & Gorda Ridge"))
dendro_bar_plot <- ggplot(dendro_bar, aes(x = SAMPLE_ORDER, y = ASV_COUNT, fill = SITE_CLASS_ORDER)) +
  geom_bar(stat = "identity", position = "fill", color = "black", width = 0.7, alpha = 0.8) +
  coord_flip() +
  scale_y_continuous(expand = c(0,0)) +
  # scale_fill_manual(values = c("grey", "#e6550d", "#fdbb84", "#31a354", "#1c9099", "#fde0dd", "#c51b8a")) +
  scale_fill_manual(values = c("#636363","#bd0026","#fed976","#fd8d3c","#a63603","#fdae6b","#c994c7","#00441b","#ce1256","#addd8e","#f7fcb9","#016c59","#41ab5d","#6a51a3","#3690c0")) +
    theme_bw() +
    theme(axis.text = element_text(color="black", size=5),
          legend.title = element_blank(),
          panel.border = element_blank(),
          panel.grid = element_blank(),
          axis.title = element_text(color="black", size=14),
          legend.text = element_text(color = "black", size = 12),
          plot.margin = margin(1, 1, 1, 1, "cm"),
          legend.position = "top") +
  labs(x = "", y = "Proportion of ASVs")

dendro_bar_plot_res_cos <- ggplot(dendro_res_cos_df, aes(x = SAMPLE_ORDER, y = ASV_COUNT, fill = DISTRIBUTION)) +
  geom_bar(stat = "identity", position = "fill", color = "black", width = 0.7, alpha = 0.8) +
  coord_flip() +
  scale_y_continuous(expand = c(0,0)) +
  viridis::scale_fill_viridis(discrete=TRUE, option = "H") +
    theme_bw() +
    theme(axis.text = element_text(color="black", size=5),
          legend.title = element_blank(),
          panel.border = element_blank(),
          panel.grid = element_blank(),
          axis.title = element_text(color="black", size=14),
          legend.text = element_text(color = "black", size = 12),
          plot.margin = margin(1, 1, 1, 1, "cm"),
          legend.position = "top") +
  labs(x = "", y = "Proportion of ASVs")
# dendro_bar_plot_res_cos
# ?scale_fill_viridis

dendro_bar_plot_SEQ <- ggplot(dendro_bar, aes(x = SAMPLE_ORDER, y = SEQ_SUM, fill = SITE_CLASS_ORDER)) +
  geom_bar(stat = "identity", position = "fill", color = "black", width = 0.7, alpha = 0.8) +
  coord_flip() +
  scale_y_continuous(expand = c(0,0)) +
  # scale_fill_manual(values = c("grey", "#e6550d", "#fdbb84", "#31a354", "#1c9099", "#fde0dd", "#c51b8a")) +
  scale_fill_manual(values = c("#636363","#bd0026","#fed976","#fd8d3c","#a63603","#fdae6b","#c994c7","#00441b","#ce1256","#addd8e","#f7fcb9","#016c59","#41ab5d","#6a51a3","#3690c0")) +
    theme_bw() +
    theme(axis.text = element_text(color="black", size=5),
          legend.title = element_blank(),
          panel.border = element_blank(),
          panel.grid = element_blank(),
          axis.title = element_text(color="black", size=14),
          legend.text = element_text(color = "black", size = 12),
          plot.margin = margin(1, 1, 1, 1, "cm"),
          legend.position = "top") +
  labs(x = "", y = "Proportion of sequences")

dendro_bar_plot_res_cos_SEQ <- ggplot(dendro_res_cos_df, aes(x = SAMPLE_ORDER, y = SEQ_SUM, fill = DISTRIBUTION)) +
  geom_bar(stat = "identity", position = "fill", color = "black", width = 0.7, alpha = 0.8) +
  coord_flip() +
  scale_y_continuous(expand = c(0,0)) +
  viridis::scale_fill_viridis(discrete=TRUE, option = "H") +
    theme_bw() +
    theme(axis.text = element_text(color="black", size=5),
          legend.title = element_blank(),
          panel.border = element_blank(),
          panel.grid = element_blank(),
          axis.title = element_text(color="black", size=14),
          legend.text = element_text(color = "black", size = 12),
          plot.margin = margin(1, 1, 1, 1, "cm"),
          legend.position = "t∂p") +
  labs(x = "", y = "Proportion of sequences")
# dendro_bar_plot

Combine all plots with patchwork.

# library(patchwork)
# svg("dendrogram_wbarplots.svg", w = 18, h = 12)
dendro_plot_output + dendro_bar_plot + dendro_bar_plot_res_cos + patchwork::plot_layout(nrow = 1, widths = c(1, 0.2, 0.2),
                  heights = c(1.5, 0.2, 0.2)) + plot_annotation(tag_levels = "a")

# dev.off()

2.4.2 ASV richness

ASV richness with customized color schema

insitu_asv_wClass %>% 
  filter(SITE %in% all) %>%
  filter(Domain == "Eukaryota") %>% 
  # Average across replicates
    group_by(FeatureID, SAMPLENAME, VENT, Supergroup) %>% 
    summarise(AVG = mean(value)) %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION = case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>% 
  ungroup() %>% 
  group_by(SITE, REGION, SAMPLE, SAMPLETYPE) %>% 
    summarise(NUM_ASV = n()) %>% 
  ggplot(aes(x = SAMPLETYPE, y = NUM_ASV, shape = SAMPLETYPE)) +
  geom_boxplot(aes(group = SAMPLETYPE), alpha = 0.8, width = 0.4) +
  geom_jitter(size=2, width = 0.3, aes(fill = SITE)) +
  scale_shape_manual(values = c(21, 23, 24)) +
    scale_fill_manual(values = c("#fdbb84", "#31a354", "#ef3b2c", "#02818a")) +
  theme_bw() +
  theme(axis.text = element_text(color="black", size=12),
        legend.title = element_blank(),
        axis.title = element_text(color="black", size=14),
        legend.text = element_text(color = "black", size = 14)) +
  guides(fill = guide_legend(override.aes = list(shape = 21) ),
            shape = guide_legend(override.aes = list(fill = NA) ) ) +
  labs(x = "", y = "Total number of ASVs")
## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

2.4.3 ASV richness by sample type & taxa

Figure S4.

# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/figs4-ASVrichness-bytax.svg", h = 8, w = 12)
insitu_asv_wClass %>% 
  filter(SITE %in% all) %>%
  filter(Domain == "Eukaryota") %>% 
  filter(Domain == "Eukaryota") %>%
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>%
  # Average across replicates
    group_by(FeatureID, SAMPLENAME, VENT, SUPERGROUP) %>% 
    summarise(AVG = mean(value)) %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION = case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>% 
  ungroup() %>% 
  group_by(SITE, REGION, SAMPLE, SAMPLETYPE, SUPERGROUP) %>% 
    summarise(NUM_ASV = n()) %>% 
  mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
  ggplot(aes(x = SAMPLETYPE, y = NUM_ASV, shape = SAMPLETYPE)) +
  geom_boxplot(aes(group = SAMPLETYPE), alpha = 0.8, width = 0.4) +
  geom_jitter(size=2, width = 0.3, aes(fill = SUPERGROUP_ORDER)) +
  facet_wrap(.~ SUPERGROUP_ORDER, scales = "free_y") +
  scale_shape_manual(values = c(21, 23, 24)) +
    scale_fill_manual(values = all_taxa_color_protalve) +
  theme_bw() +
  theme(axis.text = element_text(color="black", size=12),
        legend.title = element_blank(),
        axis.title = element_text(color="black", size=14),
        legend.text = element_text(color = "black", size = 14),
        strip.background = element_blank(),
        axis.text.x = element_text(color="black", size=12, angle = 90, hjust = 1, vjust = 0.5)) +
  guides(fill = guide_legend(override.aes = list(shape = 21) ),
            shape = guide_legend(override.aes = list(fill = NA) ) ) +
  labs(x = "", y = "Total number of ASVs")
## Warning: Expected 4 pieces. Additional pieces discarded in 23244 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

# dev.off()

3 ASV distribution by sample/site

3.1 UpsetR by ASV level

# set.seed(1234)
alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/fig2b-upsetR-all_revised.svg", h=8, w=12)
insitu_asv_wClass %>% 
  filter(SITE %in% all) %>%
  filter(Domain == "Eukaryota") %>% 
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    Supergroup == "Protalveolata" ~ "Other Alveolata",
    TRUE ~ Supergroup
  )) %>% 
  # Taxa to supergroup
  mutate(SupergroupPhylum = SUPERGROUP) %>%
  # Average across replicates
    group_by(FeatureID, SAMPLENAME, VENT, SupergroupPhylum) %>% 
    summarise(AVG = mean(value)) %>% 
  ungroup() %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION = case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>% 
  group_by(FeatureID, SupergroupPhylum, SAMPLE) %>% 
    summarise(SUM = sum(AVG)) %>%
  ungroup() %>% 
  mutate(SUPERGROUP_ORDER = factor(SupergroupPhylum, levels = all_taxa_order)) %>%
  distinct(FeatureID, SUPERGROUP_ORDER, SUM, SAMPLE, .keep_all = TRUE) %>% 
  group_by(FeatureID, SUPERGROUP_ORDER) %>% 
  summarise(SAMPLE = list(SAMPLE)) %>% 
  ggplot(aes(x = SAMPLE)) +
  geom_hline(yintercept = 200, linetype = "dashed", alpha = 0.6) +
    geom_bar(color = NA, width = 0.7, aes(fill = SUPERGROUP_ORDER)) +
  scale_x_upset(order_by = "freq", n_intersections = 25) +
  scale_y_continuous(expand = c(0,0)) +
  labs(x = "", y = "Shared ASVs") +
  theme_linedraw() +
  theme(axis.text.y = element_text(color="black", size=14, face = "bold"),
        axis.text.x = element_text(color="black", size=14, face = "bold"),
        axis.title = element_text(color="black", size=14, face = "bold"),
        legend.text = element_text(color = "black", size = 12),
        plot.margin = margin(1, 1, 1, 5, "cm"),
        legend.title = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        legend.position = "none") +
  scale_fill_manual(values = all_taxa_color) +
  theme_combmatrix(
    combmatrix.panel.striped_background.color.two = "#878686",
    combmatrix.panel.point.size = 3.5
  )
## Warning: Expected 4 pieces. Additional pieces discarded in 23244 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 1425 rows containing non-finite values (stat_count).

# dev.off()
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/fig-revised-200_upset.svg", h=8, w=12)
insitu_asv_wClass %>% 
  filter(SITE %in% all) %>%
  filter(Domain == "Eukaryota") %>% 
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    Supergroup == "Protalveolata" ~ "Other Alveolata",
    TRUE ~ Supergroup
  )) %>% 
  # Taxa to supergroup
  mutate(SupergroupPhylum = SUPERGROUP) %>%
  # Average across replicates
    group_by(FeatureID, SAMPLENAME, VENT, SupergroupPhylum) %>% 
    summarise(AVG = mean(value)) %>% 
  ungroup() %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION = case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>% 
  group_by(FeatureID, SupergroupPhylum, SAMPLE) %>% 
    summarise(SUM = sum(AVG)) %>%
  # filter(SUM > 200) %>% 
  ungroup() %>% 
  distinct(FeatureID, SupergroupPhylum, SUM, SAMPLE, .keep_all = TRUE) %>% 
  group_by(FeatureID, SupergroupPhylum) %>% 
  summarise(SAMPLE = list(SAMPLE)) %>% 
  mutate(samples = sapply(SAMPLE, paste, collapse = " ")) %>%
  group_by(samples) %>%
  filter(n() < 250) %>%
  ungroup() %>% 
  mutate(SUPERGROUP_ORDER = factor(SupergroupPhylum, levels = all_taxa_order)) %>%
  #how to order samples in the same way as above?
  # mutate(SAMPLE_ORDER = factor(SAMPLE, levels = c("Axial Vent", "GordaRidge Vent", "VonDamm Vent", "GordaRidge Background", "Piccard Vent", "VonDamm Plume", "GordaRidge Plume", "VonDamm Background", "Axial Plume", "Piccard Background", "Axial Background", "Piccard Plume"))) %>% 
  ggplot(aes(x = SAMPLE)) +
  geom_hline(yintercept = 200, linetype = "dashed", alpha = 0.6) +
    geom_bar(color = NA, width = 0.7, aes(fill = SUPERGROUP_ORDER)) +
    scale_x_upset(order_by = "freq", n_intersections = 25) +
    # axis_combmatrix(sep = "-") +
  scale_y_continuous(expand = c(0,0)) +
  labs(x = "", y = "Shared ASVs") +
  theme_linedraw() +
  theme(axis.text.y = element_text(color="black", size=14, face = "bold"),
        axis.text.x = element_text(color="black", size=14, face = "bold"),
        axis.title = element_text(color="black", size=14, face = "bold"),
        legend.text = element_text(color = "black", size = 12),
        plot.margin = margin(1, 1, 1, 5, "cm"),
        legend.title = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        legend.position = "none") + #when revising this figure, always check color legend
  scale_fill_manual(values = all_taxa_color) +
  theme_combmatrix(
    combmatrix.panel.striped_background.color.two = "#878686",
    combmatrix.panel.point.size = 3.5
  )
## Warning: Expected 4 pieces. Additional pieces discarded in 23244 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 1254 rows containing non-finite values (stat_count).

# dev.off()

Observations regarding above plot: - Axial and Gorda Ridge vent sites have more shared ASVs than any other pairwise comparison. After this, there were also many ASVs shared throughout MCR (vent, plume, + background). May be a reflection of sample size, as MCR had more vent sites - a small subset of ASVs were found at all vent sites or all samples. - ASVs within the vents had much higher unique # of ASVs (not shared with another habitat type) than any other sample type/location (furtherest left bars).

3.1.1 NE Pacific shared

# head(insitu_asv_wClass)
# unique(insitu_asv_wClass$SITE_CLASS)

pacific_shared <- insitu_asv_wClass %>% 
  filter(SITE_CLASS == "Axial & Gorda Ridge" & CLASS == "Vent only")
# View(pacific_shared %>% filter(Phylum == "Ciliophora"))
# View(pacific_shared %>% filter(Phylum == "Cercozoa"))
# View(pacific_shared %>% filter(Phylum == "Stramenopiles"))

axial_yr <- insitu_asv_wClass %>% 
  filter(CLASS == "Vent only") %>% 
  filter(grepl("Axial", SITE_CLASS)) %>% 
  filter(YEAR < 2017) %>% 
  select(YEAR, Taxon, FeatureID, value) %>% 
  pivot_wider(names_from = YEAR, values_from = value, values_fill = NA, values_fn = sum) %>% 
  drop_na()

3.1.2 MCR shared

mcr_shared <- insitu_asv_wClass %>% 
  filter(SITE_CLASS == "Piccard & Von Damm" & CLASS == "Vent only")
length(unique(mcr_shared$FeatureID))
## [1] 235

3.1.3 Vents all shared

allvents_resident <- insitu_asv_wClass %>% 
  filter(SITE_CLASS == "All sites" & CLASS == "Vent only")
length(unique(allvents_resident$FeatureID))
## [1] 7

3.2 UpsetR at Genus level

Repeat upsetR plot, but summarize at genus level, rather than “species” or “strain”

alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
# svg("upsetR-bysite-sampletype-nov2.svg", h=9, w=15)
genus_upset <- insitu_asv_wClass %>% 
  filter(SITE %in% all) %>%
  filter(Domain == "Eukaryota") %>% 
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  # Taxa to supergroup
  mutate(SupergroupPhylum = SUPERGROUP) %>%
  unite(GENUS, Domain:Genus, sep = ";") %>% 
  # Average across replicates
    group_by(GENUS, SAMPLENAME, VENT, SupergroupPhylum) %>% 
    summarise(AVG = mean(value)) %>% 
  ungroup() %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION = case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>% 
  group_by(GENUS, SupergroupPhylum, SAMPLE) %>% 
    summarise(SUM = sum(AVG)) %>%
  # filter(SUM > 200) %>% 
  ungroup() %>% 
  distinct(GENUS, SupergroupPhylum, SUM, SAMPLE, .keep_all = TRUE) %>% 
  group_by(GENUS, SupergroupPhylum) %>% 
  summarise(SAMPLE = list(SAMPLE)) %>% 
  ggplot(aes(x = SAMPLE)) +
    geom_bar(color = "black", width = 0.7, aes(fill = SupergroupPhylum)) +
    scale_x_upset(n_intersections = 25) +
  scale_y_continuous(expand = c(0,0)) +
  labs(x = "", y = "Shared at Genus level") +
  theme_linedraw() +
  theme(axis.text.y = element_text(color="black", size=14, face = "bold"),
        axis.text.x = element_text(color="black", size=14, face = "bold"),
        axis.title = element_text(color="black", size=14, face = "bold"),
        legend.text = element_text(color = "black", size = 12, face = "bold"),
        legend.title = element_blank(),
        panel.grid.minor = element_blank(),
        plot.margin = margin(1, 1, 1, 5, "cm")) +
  scale_fill_manual(values = all_taxa_color)
## Warning: Expected 4 pieces. Additional pieces discarded in 8229 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
#
genus_upset
## Warning: Removed 316 rows containing non-finite values (stat_count).

Isolate list of genus level that are shared at all sites.

alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
# svg("upsetR-bysite-sampletype-nov2.svg", h=9, w=15)
shared_genus <- insitu_asv_wClass %>% 
  filter(SITE %in% all) %>%
  filter(Domain == "Eukaryota") %>% 
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  # Taxa to supergroup
  mutate(SupergroupPhylum = SUPERGROUP) %>%
  unite(GENUS, Domain:Genus, sep = ";") %>% 
  # Average across replicates
    group_by(GENUS, SAMPLENAME, VENT, SupergroupPhylum) %>% 
    summarise(AVG = mean(value)) %>% 
  ungroup() %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION = case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>% 
  select(GENUS, SAMPLE, AVG) %>% 
  pivot_wider(names_from = SAMPLE, values_from = "AVG", values_fn = sum) %>%
  drop_na()
## Warning: Expected 4 pieces. Additional pieces discarded in 8229 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# head(shared_genus)

3.3 Resident vs. cosmopolitan stats

3.3.1 Resident

From the vent-only ASVs, what percentage of them appear only at 1 site? what percent also appear at other vent sites?

resident_only <- filter(insitu_asv_wClass, CLASS == "Vent only")

totalasvs <- length(unique(resident_only$FeatureID)); totalasvs
## [1] 8107
totalseq <- sum(resident_only$value); totalseq
## [1] 1267675
# resident includes
100*(totalasvs/length(unique(insitu_asv_wClass$FeatureID))) #%ASVs
## [1] 65.51111
100*(totalseq/sum(insitu_asv_wClass$value)) #%sequences
## [1] 33.45907
  # unique(resident_only$SITE_CLASS)
tmp_unique <- filter(resident_only, grepl(" only", SITE_CLASS))
tmp_shared <- filter(resident_only, !(grepl(" only", SITE_CLASS)))
tmp_allshared <- filter(resident_only, SITE_CLASS == "All sites")

# Of the ASVs found ONLY within diffuse venting fluid, just over 90% were unique to only the individual vent site - makes up 53% of the vent only sequences
a <- length(unique(tmp_unique$FeatureID)); a
## [1] 7325
100*(a/totalasvs)
## [1] 90.35402
100*(a/length(unique(insitu_asv_wClass$FeatureID)))
## [1] 59.19192
a_sum <- sum(tmp_unique$value); a_sum
## [1] 682645
100*(a_sum/totalseq)
## [1] 53.85016
# Of the ASVs from vent only, 9.6% were also found at other vent sites, totals to 46% of the sequences.
b <- length(unique(tmp_shared$FeatureID)); b
## [1] 782
100*(b/totalasvs)
## [1] 9.645985
b_sum <- sum(tmp_shared$value); b_sum 
## [1] 585030
100*(b_sum/totalseq)
## [1] 46.14984
# Total number of ASVs designated to be found at all sites
c <- length(unique(tmp_allshared$FeatureID)); c
## [1] 7
100*(c/totalasvs)
## [1] 0.08634513
c_sum <- sum(tmp_allshared$value); c_sum
## [1] 3022
100*(c_sum/totalseq)
## [1] 0.2383892
vent_only_allsites <- tmp_allshared
# View(vent_only_allsites)
table(vent_only_allsites$Genus)
## 
##            Chrysochromulina     Chrysophyceae_Clade-C_X 
##                          21                           8 
## Prymnesiophyceae_Clade_B4_X    Stephanoecidae_Group_D_X 
##                           7                           8
unique(vent_only_allsites$Taxon)
## [1] "Eukaryota;Hacrobia;Haptophyta;Prymnesiophyceae;Prymnesiales;Chrysochromulinaceae;Chrysochromulina;Chrysochromulina_sp.;"                             
## [2] "Eukaryota;Stramenopiles;Ochrophyta;Chrysophyceae;Chrysophyceae_X;Chrysophyceae_Clade-C;Chrysophyceae_Clade-C_X;Chrysophyceae_Clade-C_X_sp.;"         
## [3] "Eukaryota;Opisthokonta;Choanoflagellida;Choanoflagellatea;Acanthoecida;Stephanoecidae_Group_D;Stephanoecidae_Group_D_X;Stephanoecidae_Group_D_X_sp.;"
## [4] "Eukaryota;Hacrobia;Haptophyta;Prymnesiophyceae;Prymnesiales;Prymnesiophyceae_Clade_B4;Prymnesiophyceae_Clade_B4_X;Prymnesiophyceae_Clade_B4_X_sp.;"  
## [5] "Unassigned"
# unique(vent_only_allsites$FeatureID)

3.3.2 Cosmopolitan

Isolate ASVs that were found in all habitat types! (Vent, plume, & background)

# unique(insitu_asv_wClass$CLASS)
cosmo <- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
# cosmo <- c("Vent, plume, & background")
cosmo_only <- filter(insitu_asv_wClass, CLASS %in% cosmo)

totalasvs <- length(unique(cosmo_only$FeatureID)); totalasvs
## [1] 2133
totalseq <- sum(cosmo_only$value); totalseq
## [1] 2424982
100*(totalasvs/length(unique(insitu_asv_wClass$FeatureID)))
## [1] 17.23636
100*(totalseq/sum(insitu_asv_wClass$value))
## [1] 64.00507
# unique(cosmo_only$SITE_CLASS)
tmp_unique <- filter(cosmo_only, grepl(" only", SITE_CLASS))
tmp_shared <- filter(cosmo_only, !(grepl(" only", SITE_CLASS)))
tmp_allshared <- filter(cosmo_only, SITE_CLASS == "All sites")

# Of the xoamopolitan ASVs, the ASVs found unique to a vent field
a <- length(unique(tmp_unique$FeatureID)); a
## [1] 761
100*(a/totalasvs)
## [1] 35.67745
100*(a/length(unique(insitu_asv_wClass$FeatureID)))
## [1] 6.149495
a_sum <- sum(tmp_unique$value); a_sum
## [1] 414955
100*(a_sum/totalseq)
## [1] 17.11167
# Of the ASVs found in all habitat types, 77% of them were also found at another vent site, consisting of 92% of the sequences
b <- length(unique(tmp_shared$FeatureID)); b
## [1] 1372
100*(b/totalasvs)
## [1] 64.32255
b_sum <- sum(tmp_shared$value); b_sum 
## [1] 2010027
100*(b_sum/totalseq)
## [1] 82.88833
# Of the ASVS found in all habitat types, 16% of them were found at all vent sites, which made up 40% of the sequences.
c <- length(unique(tmp_allshared$FeatureID)); c
## [1] 187
100*(c/totalasvs)
## [1] 8.766995
c_sum <- sum(tmp_allshared$value); c_sum
## [1] 831712
100*(c_sum/totalseq)
## [1] 34.29766

3.3.3 Treemap of resident vs. cosmopolitan

alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
bkgd <- c("Deep seawater", "BSW", "Shallow seawater", "Near vent BW")
plume <- c("Candelabra Plume", "Mt Edwards Plume", "Plume", "Anemone Plume")

cosmo <- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
res <- c("Vent only")
# head(insitu_asv_wClass)
# unique(insitu_asv_wClass$CLASS)
to_supergroup_CLASS <- insitu_asv_wClass %>% 
  filter(SITE %in% all) %>% 
  mutate(CATEGORY = case_when(
    CLASS %in% cosmo ~ "Cosmopolitan",
    CLASS %in% res ~ "Resident",
    TRUE ~ "Other"
  )) %>% 
  filter(Domain == "Eukaryota") %>%
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  mutate(SAMPLETYPEORDER = case_when(
    VENT %in% bkgd ~ "Background",
    VENT %in% plume ~ "Plume",
    TRUE ~ "Vent"
  )) %>% 
  group_by(FeatureID, Taxon, SUPERGROUP,
           CATEGORY) %>% 
    summarise(SEQ_AVG_REP = mean(value)) %>% 
  ungroup() %>% 
  group_by(CATEGORY, SUPERGROUP) %>% 
    summarise(ASV_COUNT = n(),
      SEQ_SUM = sum(SEQ_AVG_REP))

# head(to_supergroup_CLASS)
to_supergroup_CLASS %>% 
  filter(CATEGORY != "Other") %>% 
  pivot_longer(cols = ASV_COUNT:SEQ_SUM) %>% 
  mutate(NAME = case_when(
    name == "SEQ_SUM" ~ "Proportion of sequences",
    TRUE ~ "Proportion of ASVs"
  )) %>% 
  mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
  ggplot(aes(area = value, fill = SUPERGROUP_ORDER, subgroup = SUPERGROUP_ORDER)) +
    geom_treemap(color = "white") +
    geom_treemap_subgroup_border(colour = "white", size = 2) +
    facet_grid(CATEGORY ~ NAME) +
  theme_linedraw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        strip.background = element_blank(), strip.text = element_text(color = "black"),
        legend.position = "right",
        legend.title = element_blank(),
        panel.border = element_blank()) +
  scale_y_continuous(expand = c(0,0)) +
  scale_fill_manual(values = all_taxa_color) +
  labs(x = "", y = "")

3.3.4 Bubble plot comparison

supergroup_distri_bubble <- insitu_asv_wClass %>% 
  filter(SITE %in% all) %>% 
  mutate(CATEGORY = case_when(
    CLASS %in% cosmo ~ "Cosmopolitan",
    CLASS %in% res ~ "Resident",
    TRUE ~ "Other"
  )) %>% 
  filter(Domain == "Eukaryota") %>%
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  mutate(SAMPLETYPEORDER = case_when(
    VENT %in% bkgd ~ "Background",
    VENT %in% plume ~ "Plume",
    TRUE ~ "Vent"
  )) %>% 
  group_by(SUPERGROUP, CATEGORY) %>%
    summarise(SEQ_TOTAL = sum(value),
              ASV_TOTAL = n()) %>% 
  filter(SEQ_TOTAL > 200) %>%
  ungroup() %>% 
  pivot_wider(names_from = CATEGORY, values_from = c(SEQ_TOTAL, ASV_TOTAL), values_fill = 0) %>%
  mutate(SUPERGROUP_SEQ_TOTAL = SEQ_TOTAL_Cosmopolitan + SEQ_TOTAL_Other + SEQ_TOTAL_Resident,
         cosmo_seq_perc = 100*(SEQ_TOTAL_Cosmopolitan/SUPERGROUP_SEQ_TOTAL),
         res_seq_perc = 100*(SEQ_TOTAL_Resident/SUPERGROUP_SEQ_TOTAL),
         other_seq_perc = 100*(SEQ_TOTAL_Other/SUPERGROUP_SEQ_TOTAL),
         SUPERGROUP_ASV_TOTAL = ASV_TOTAL_Cosmopolitan + ASV_TOTAL_Other + ASV_TOTAL_Resident,
         cosmo_asv_perc = 100*(ASV_TOTAL_Cosmopolitan/SUPERGROUP_ASV_TOTAL),
         res_asv_perc = 100*(ASV_TOTAL_Resident/SUPERGROUP_ASV_TOTAL),
         other_asv_perc = 100*(ASV_TOTAL_Other/SUPERGROUP_ASV_TOTAL))
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/fig5a-bubble-wlegend.svg", h = 4, w = 4)
supergroup_distri_bubble %>% 
  mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
  ggplot(aes(y = res_asv_perc, x = cosmo_asv_perc, size = SUPERGROUP_SEQ_TOTAL, fill = SUPERGROUP_ORDER)) +
  # geom_abline(slope = 1, intercept = 0, alpha = 0.5) +
  geom_point(shape = 21, stat = "identity", color = "#525252", stroke = 1) +
  scale_size_continuous(range = c(5,25)) +
  scale_y_continuous(limits = c(0,100)) +
  scale_x_continuous(limits = c(0,100)) +
  theme_linedraw() +
  scale_fill_manual(values = all_taxa_color) +
  labs(x = "Proportion of cosmopolitan ASVs", y = "Proportion of resident ASVs") +
  theme(legend.title = element_blank(),
        # legend.position = "none",
        legend.position = "right", # color key is the same
        axis.text = element_text(color = "black", face = "bold", size = 12),
        axis.title = element_text(color = "black", face = "bold", size = 12),
        panel.background = element_blank())

dev.off()
## null device 
##           1
# View(select(supergroup_distri_bubble, SUPERGROUP, SUPERGROUP_SEQ_TOTAL)) # check legend using these values
supergroup_distri_bubble %>% 
  mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>% 
  ggplot(aes(x = ASV_TOTAL_Cosmopolitan, y = ASV_TOTAL_Resident, size = SUPERGROUP_SEQ_TOTAL, fill = SUPERGROUP_ORDER)) +
  # geom_abline(slope = 1, intercept = 0, alpha = 0.5) +
  geom_point(shape = 21, stat = "identity", color = "black") +
  scale_size_continuous(range = c(4,20)) +
  scale_y_continuous(limits = c(0,4500)) +
  scale_x_continuous(limits = c(0,4500)) +
  theme_linedraw() +
  scale_fill_manual(values = all_taxa_color) +
  labs(x = "Total number of cosmopolitan ASVs", y = "Total number of resident ASVs") +
  theme(legend.title = element_blank())

3.3.5 Bar plot res vs. cosmopolitan

seq_res_cosmo_bar <- insitu_asv_wClass %>% 
  # filter(SITE %in% all) %>% 
  mutate(CATEGORY = case_when(
    CLASS %in% cosmo ~ "Cosmopolitan",
    CLASS %in% res ~ "Resident",
    TRUE ~ "Other"
  )) %>% 
  filter(Domain == "Eukaryota") %>%
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  mutate(SAMPLETYPEORDER = case_when(
    VENT %in% bkgd ~ "Background",
    VENT %in% plume ~ "Plume",
    TRUE ~ "Vent"
  )) %>% 
  group_by(SUPERGROUP, CATEGORY, SITE) %>%
    summarise(SEQ_TOTAL = sum(value),
              ASV_TOTAL = n()) %>% 
  filter(CATEGORY != "Other") %>% 
  mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
  ggplot(aes(x = SITE, y = SEQ_TOTAL, fill = SUPERGROUP_ORDER)) +
    geom_bar(stat = "identity", position = "stack", width = 0.4) +
    facet_grid(. ~ CATEGORY) +
  theme_linedraw() +
  scale_fill_manual(values = all_taxa_color_protalve) +
  labs(x = "", y = "Total sequences") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        strip.background = element_blank(),
        strip.text = element_text(color = "black"))
insitu_asv_wClass %>% 
  mutate(CATEGORY = case_when(
    CLASS %in% cosmo ~ "Cosmopolitan",
    CLASS %in% res ~ "Resident",
    TRUE ~ "Other"
  )) %>% 
  filter(Domain == "Eukaryota") %>%
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  mutate(SAMPLETYPEORDER = case_when(
    VENT %in% bkgd ~ "Background",
    VENT %in% plume ~ "Plume",
    TRUE ~ "Vent"
  )) %>% 
  group_by(SUPERGROUP, CATEGORY, SITE) %>%
    summarise(SEQ_TOTAL = sum(value),
              ASV_TOTAL = n()) %>% 
  filter(CATEGORY != "Other") %>% 
  mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
  ggplot(aes(x = SITE, y = SEQ_TOTAL, fill = SUPERGROUP_ORDER)) +
    geom_bar(stat = "identity", position = "fill", width = 0.4) +
    facet_grid(. ~ CATEGORY) +
  theme_linedraw() +
  scale_fill_manual(values = all_taxa_color_protalve) +
  labs(x = "", y = "Sequence relative abundance") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        strip.background = element_blank(),
        strip.text = element_text(color = "black"),
        legend.title = element_blank())

asv_res_cosmo_bar <- insitu_asv_wClass %>% 
  # filter(SITE %in% all) %>% 
  mutate(CATEGORY = case_when(
    CLASS %in% cosmo ~ "Cosmopolitan",
    CLASS %in% res ~ "Resident",
    TRUE ~ "Other"
  )) %>% 
  filter(Domain == "Eukaryota") %>%
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  mutate(SAMPLETYPEORDER = case_when(
    VENT %in% bkgd ~ "Background",
    VENT %in% plume ~ "Plume",
    TRUE ~ "Vent"
  )) %>% 
  group_by(SUPERGROUP, CATEGORY, SITE) %>%
    summarise(SEQ_TOTAL = sum(value),
              ASV_TOTAL = n()) %>% 
  filter(CATEGORY != "Other") %>% 
  mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%  
  ggplot(aes(x = SITE, y = ASV_TOTAL, fill = SUPERGROUP_ORDER)) +
    geom_bar(stat = "identity", position = "stack", width = 0.4) +
    facet_grid(. ~ CATEGORY) +
  theme_linedraw() +
  scale_fill_manual(values = all_taxa_color_protalve) +
  labs(x = "", y = "Total ASVs") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        strip.background = element_blank(),
        strip.text = element_text(color = "black"))

Figure S5

# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/figs5-res_cosmo_barplot.svg", w = 9, h = 5)
(seq_res_cosmo_bar + theme(legend.position = "none")) + (asv_res_cosmo_bar + theme(legend.position = "none")) + patchwork::plot_layout(nrow = 1) + plot_annotation(tag_levels = "a")

# dev.off()
vent_only_resident <- c("Excavata", "Apusozoa", "Amoebozoa")
insitu_asv_wClass %>% 
  # filter(SITE %in% all) %>% 
  mutate(CATEGORY = case_when(
    CLASS %in% cosmo ~ "Cosmopolitan",
    CLASS %in% res ~ "Resident",
    TRUE ~ "Other"
  )) %>% 
  filter(Domain == "Eukaryota") %>%
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  mutate(SAMPLETYPEORDER = case_when(
    VENT %in% bkgd ~ "Background",
    VENT %in% plume ~ "Plume",
    TRUE ~ "Vent"
  )) %>% 
  filter(SUPERGROUP %in% vent_only_resident) %>% 
  group_by(SUPERGROUP, CATEGORY, SITE) %>%
    summarise(SEQ_TOTAL = sum(value),
              ASV_TOTAL = n()) %>% 
  filter(CATEGORY != "Other") %>% 
  mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>%
  ggplot(aes(x = SITE, y = SEQ_TOTAL, fill = SUPERGROUP_ORDER)) +
    geom_bar(stat = "identity", position = "stack", width = 0.4) +
    facet_grid(. ~ CATEGORY) +
  theme_linedraw() +
  scale_fill_manual(values = all_taxa_color) +
  labs(x = "", y = "Total sequences") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        strip.background = element_blank(),
        strip.text = element_text(color = "black"))

vent_only_resident <- c("Excavata", "Apusozoa", "Amoebozoa")
resident_subset <- insitu_asv_wClass %>% 
  # filter(SITE %in% all) %>% 
  mutate(CATEGORY = case_when(
    CLASS %in% cosmo ~ "Cosmopolitan",
    CLASS %in% res ~ "Resident",
    TRUE ~ "Other"
  )) %>% 
  filter(Domain == "Eukaryota") %>%
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  mutate(SAMPLETYPEORDER = case_when(
    VENT %in% bkgd ~ "Background",
    VENT %in% plume ~ "Plume",
    TRUE ~ "Vent"
  )) %>% 
  filter(SUPERGROUP %in% vent_only_resident) %>% 
  filter(CLASS == "Vent only")

dim(resident_subset)
## [1] 346  35
table(resident_subset$Class)
## 
## Apusomonadidae_Group-1            Breviatea_X        Carpediemonadea 
##                     55                      7                     30 
##             Discoba_XX   Discosea-Flabellinia            Fornicata_X 
##                     29                      1                      1 
##               Lobosa_X  Mycetozoa-Myxogastrea       NAMAKO-1-lineage 
##                     16                      1                      1 
##          Planomonadida            Preaxostyla              Tubulinea 
##                     84                     17                     78 
##               Variosea       YS16Ec34-lineage 
##                     13                      1

3.4 ASV prevalence by sequence and taxa

# Not sure if I need this
tax_key <- insitu_asv_wClass %>% 
  select(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species, CLASS, SITE_CLASS) %>% 
  distinct()
# head(insitu_asv_wClass)
alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")

tmp <- insitu_asv_wClass %>% 
  filter(Domain == "Eukaryota") %>% 
  filter(!is.na(Supergroup)) %>% 
  # Average across replicates
    group_by(FeatureID, SAMPLENAME, VENT) %>% 
    summarise(AVG = mean(value)) %>% 
  ungroup() %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION = case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>% 
  group_by(FeatureID, SAMPLE) %>% 
  summarise(SUM = sum(AVG)) %>% 
  pivot_wider(names_from = "SAMPLE", values_from = SUM, values_fill = 0) %>% 
  column_to_rownames(var = "FeatureID") %>%
  mutate(PREVALENCE = rowSums(. > 0),
         SEQ_TOTAL = rowSums(.)) %>% 
  rownames_to_column(var = "FeatureID") %>% 
  left_join(tax_key) %>% 
   filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order))
## Warning: Expected 4 pieces. Additional pieces discarded in 25354 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# head(tmp)
tmp %>% 
  filter(SEQ_TOTAL > 0) %>% 
  ggplot(aes(x = PREVALENCE, y = SEQ_TOTAL, fill = SUPERGROUP_ORDER)) +
  geom_jitter(stat = "identity", shape = 21) +
  scale_y_log10() +
  facet_wrap(SUPERGROUP_ORDER ~ .) +
  scale_fill_manual(values = all_taxa_color) +
  theme_linedraw() +
  labs(x = "Number of samples ASV appears in", y = "Total sequences (log)")

3.5 ASV CLR value by supergroup

Get CLR transformed data for all ASVs

head(insitu_asv_wClass)
## # A tibble: 6 × 32
##   FeatureID    SAMPLE value Taxon Domain Super…¹ Phylum Class Order Family Genus
##   <chr>        <chr>  <dbl> <chr> <chr>  <chr>   <chr>  <chr> <chr> <chr>  <chr>
## 1 000562094e0… Gorda…     8 Euka… Eukar… Strame… Sagen… <NA>  <NA>  <NA>   <NA> 
## 2 000562094e0… Gorda…    13 Euka… Eukar… Strame… Sagen… <NA>  <NA>  <NA>   <NA> 
## 3 00096455166… Gorda…    91 Euka… Eukar… Rhizar… Radio… Acan… <NA>  <NA>   <NA> 
## 4 000ee37747b… Axial…   282 Euka… Eukar… Alveol… Cilio… Nass… Nass… Disco… NASS…
## 5 000ee37747b… Axial…    32 Euka… Eukar… Alveol… Cilio… Nass… Nass… Disco… NASS…
## 6 00165708dab… Gorda…     1 Euka… Eukar… Strame… Ochro… Pela… Pela… Pelag… Pela…
## # … with 21 more variables: Species <chr>, Consensus <dbl>, SAMPLENAME <chr>,
## #   VENT <chr>, SITE <chr>, SAMPLEID <chr>, DEPTH <chr>, SAMPLETYPE <chr>,
## #   YEAR <chr>, TEMP <chr>, pH <chr>, PercSeawater <chr>, Mg <chr>, H2S <chr>,
## #   CH4 <chr>, ProkConc <chr>, Sample_or_Control <chr>, DATASET <chr>,
## #   DECONTAM <chr>, CLASS <chr>, SITE_CLASS <chr>, and abbreviated variable
## #   name ¹​Supergroup
alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
ciliate <- c("Ciliophora")

avg_insitu_asv <- insitu_asv_wClass %>% 
  # isolate euks only, remove opisthokonta
  filter(Domain == "Eukaryota") %>% 
  filter(Supergroup != "Opisthokonta") %>%
  # Modify supergroup level information
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  mutate(PHYLUM = case_when(
    Phylum == "Unknown" ~ paste(SUPERGROUP, "Other"),
    grepl("_X", Phylum) ~ paste(SUPERGROUP, "Other"),
    is.na(Phylum) ~ paste(SUPERGROUP, "Other"),
    TRUE ~ Phylum
  )) %>% 
  mutate(CLASS = case_when(
    Class == "Unknown" ~ PHYLUM,
    grepl("_X", Class) ~ PHYLUM,
    is.na(Class) ~ Phylum,
    grepl("MAST-", Class) ~ "MAST",
    TRUE ~ Class
  )) %>% 
  # filter(value > 200) %>% 
  # Average across replicates
  group_by(FeatureID, SITE, VENT, SAMPLETYPE, YEAR, Domain, SUPERGROUP, PHYLUM, CLASS, Order, Family, Genus, Species) %>% 
  summarise(AVG = mean(value)) %>% 
  ungroup() %>%  
  # isolate sample name and vent
  unite(SAMPLENAME_2, SITE, VENT, SAMPLETYPE, YEAR, sep = "_") %>% 
  pivot_wider(names_from = SAMPLENAME_2, values_from = AVG, values_fill = 0)
  ###
# names(avg_insitu_asv)

Perform CLR transformation

# Isolate key for ASV ID and taxonomic information
tax_key <- select(avg_insitu_asv, FeatureID, Domain:Species)

# CLR, and re-format data frame
clr_trans_insitu_asv <- data.frame(compositions::clr(avg_insitu_asv %>% 
                               select(-(Domain:Species)) %>% 
                               column_to_rownames(var = "FeatureID"))) %>% 
  rownames_to_column(var = "FeatureID") %>% 
  pivot_longer(cols = starts_with(all), values_to = "CLR", names_to = "SAMPLENAME_2") %>%
  left_join(tax_key) %>% 
  separate(SAMPLENAME_2, c("SITE", "VENT", "SAMPLETYPE", "YEAR"), sep = "_") %>% 
  mutate(VENT = str_replace_all(VENT, "\\.", " ")) %>% 
      mutate(REGION = case_when(
        SITE == "GordaRidge" ~ "Gorda Ridge",
        SITE == "VonDamm" ~ "Von Damm",
        SITE == "Piccard" ~ "Piccard",
        SITE == "Axial" ~ "Axial")) %>% 
      mutate(VENTNAME = case_when(
        REGION == "Axial" ~ paste(VENT, YEAR, sep = " "),
        TRUE ~ VENT)) %>%
  unite(SAMPLE, REGION, VENTNAME, SAMPLETYPE, sep = " ", remove = FALSE) %>% 
  filter(CLR != 0) %>% 
  unite(sampletype_tax, SAMPLETYPE, SUPERGROUP, sep = " ", remove = FALSE) %>% 
  add_column(PRESENCE = 1)
# check output
# head(clr_trans_insitu_asv)
# hist(clr_trans_insitu_asv$CLR)
sites_order <- c("Axial", "GordaRidge", "Piccard", "VonDamm")
clr_trans_insitu_asv$SITE_ORDER <- factor(clr_trans_insitu_asv$SITE, levels = rev(sites_order))
##
##

clr_trans_insitu_asv %>% 
  mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>% 
  group_by(SAMPLETYPE, SUPERGROUP_ORDER, SITE_ORDER) %>% 
    summarize(COUNT_SUM = sum(PRESENCE))  %>% 
ggplot(aes(x = SAMPLETYPE, y = SITE_ORDER, fill = SUPERGROUP_ORDER)) +
  geom_point(shape = 21, aes(size = COUNT_SUM), stat = "identity") +
  facet_wrap(SUPERGROUP_ORDER ~ .) +
  scale_size_continuous(range = c(1,10)) +
  theme(legend.position = "top", 
        panel.grid.major = element_line(),
        panel.grid.minor = element_line(),
        panel.border = element_blank(), 
        panel.background = element_blank(),
        axis.line = element_line(),
        axis.text = element_text(color = "black", size = 8),
        strip.background = element_blank(), 
        strip.text = element_blank(),
        legend.title = element_blank(),
        axis.ticks = element_blank(),
        strip.placement = "outside") + 
  labs(x = "", y = "") +
  scale_fill_manual(values = all_taxa_color)

4 Taxonomic diversity

4.1 Tree map

Filter data to reduce noise and show sample type to vent ecosystem variability.

alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
bkgd <- c("Deep seawater", "BSW", "Shallow seawater", "Near vent BW")
plume <- c("Candelabra Plume", "Mt Edwards Plume", "Plume", "Anemone Plume")

to_supergroup <- insitu_asv_wClass %>% 
  filter(SITE %in% all) %>% 
  filter(Domain == "Eukaryota") %>%
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  mutate(SAMPLETYPEORDER = case_when(
    VENT %in% bkgd ~ "Background",
    VENT %in% plume ~ "Plume",
    TRUE ~ "Vent"
  )) %>% 
  group_by(FeatureID, Taxon, SUPERGROUP,
           VENT, SITE, SAMPLETYPE, YEAR, DATASET, SAMPLETYPEORDER) %>% 
    summarise(SEQ_AVG_REP = mean(value)) %>% 
  ungroup() %>% 
  group_by(SITE, SUPERGROUP, SAMPLETYPEORDER) %>% 
    summarise(ASV_COUNT = n(),
      SEQ_SUM = sum(SEQ_AVG_REP))
  
# Order sample type
to_supergroup$SAMPLETYPEORDER <- factor(to_supergroup$SAMPLETYPEORDER, levels = c("Background", "Plume", "Vent"))

# Remove ASVs with fewer than 200 sequences
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/fig2a-supergroup-treemap.svg", h = 7, w = 8)
to_supergroup %>% 
  mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order)) %>% 
  filter(SEQ_SUM > 200) %>% 
  ggplot(aes(area = SEQ_SUM, fill = SUPERGROUP_ORDER, subgroup = SUPERGROUP)) +
    geom_treemap(color = "white") +
    geom_treemap_subgroup_border(colour = "white", size = 2) +
    facet_grid(SITE ~ SAMPLETYPEORDER) +
  theme_linedraw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, face = "bold", size = 12),
        strip.background = element_blank(), strip.text = element_text(color = "black", face = "bold", vjust = 0.5, hjust = 0, size = 12),
        legend.position = "right",
        legend.title = element_blank(),
        legend.text = element_text(color = "black", size = 12),
        panel.border = element_blank(),
        axis.title = element_text(face = "bold", size = 12)) +
  scale_y_continuous(expand = c(0,0)) +
  scale_fill_manual(values = all_taxa_color) +
  labs(x = "", y = "Sequence proportion by Supergroup")

# dev.off()

4.1.1 Stats on protist supergroups, ASV & sequences

# head(to_supergroup)
totalsum_supergroup <- sum(to_supergroup$SEQ_SUM)

table_perc_supergroup <- to_supergroup %>% 
  group_by(SITE) %>% 
  mutate(SUM_SITE = sum(SEQ_SUM)) %>%
  ungroup() %>% 
  group_by(SITE, SAMPLETYPEORDER) %>% 
  mutate(SUM_SITE_TYPE = sum(SEQ_SUM)) %>% 
  ungroup() %>% 
mutate(Perc_seq_total = (100*(SEQ_SUM/totalsum_supergroup)),
       Perc_seq_site = (100*(SEQ_SUM/SUM_SITE)),
       Perc_seq_site_type = (100*(SEQ_SUM/SUM_SITE_TYPE))) %>% 
  select(-starts_with("SUM_")) %>% 
  gt(
    groupname_col = c("SITE", "SAMPLETYPEORDER")
    # rowname_col = "SAMPLENAME"
  ) %>% 
  fmt_number(columns = starts_with("Perc_"), decimals = 2) %>% 
  fmt_number(columns = SEQ_SUM, decimals = 0) %>% 
  fmt_number(columns = ASV_COUNT, decimals = 0)

# gtsave(table_perc_supergroup, filename = "output-tables/perc_seq_asv_bysupergroup.html")

5 Distance-decay

5.1 Calculate distances across all samples

library(geosphere)

# Attach better coordinates (fixed in 'metadata-compilation-table.R')
coords_input <- read.delim("data-input/tableS1-geochem-params.txt") %>% 
  select(VENT, SITE, SAMPLETYPE, LONG, LAT) %>% 
  unite(SAMPLENAME, VENT, SITE, SAMPLETYPE, sep = "_") %>% 
  distinct(SAMPLENAME, .keep_all = TRUE) %>%
  column_to_rownames(var = "SAMPLENAME")

vents_all <- row.names(coords_input)
distances_m <- as.data.frame(distm(coords_input, fun = distHaversine))
colnames(distances_m) <- vents_all
rownames(distances_m) <- vents_all

# Distinction with "_all" here is that some samples are not included in below analysis. 

Calculate distances and create long format dataframe

dist_m <- distances_m %>% 
  rownames_to_column(var = "start") %>% 
  pivot_longer(cols = vents_all, names_to = "end") %>%
  filter(!(start == end))

5.2 Format input ASV data

# Widen an numeric the ASVs for the distance calculations
# vents # use this list of samples

df_wide_num <- insitu_asv_wClass %>% 
  group_by(FeatureID, SAMPLENAME, VENT, SITE, SAMPLETYPE) %>% 
  summarise(AVG = mean(value)) %>% 
    ungroup() %>%
  select(-SAMPLENAME) %>% 
  unite(SAMPLENAME, VENT, SITE, SAMPLETYPE, sep = "_") %>% 
  group_by(FeatureID, SAMPLENAME) %>% 
  summarise(SUM = sum(AVG)) %>% 
  pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>% 
  column_to_rownames(var = "SAMPLENAME")

vents <- row.names(df_wide_num)

5.3 Calculate distance matrices

Inclusion of three metrics below is because I want to see if the answer varies across the

dist_bray <- as.data.frame(as.matrix(vegdist(df_wide_num, method = "bray"))) %>% 
  rownames_to_column(var = "start") %>% 
  pivot_longer(cols = vents, names_to = "end") %>% 
  select(start, end, Bray_Curtis_metric = value)

dist_jacc <- as.data.frame(as.matrix(vegdist(df_wide_num, method = "jaccard"))) %>% 
  rownames_to_column(var = "start") %>% 
  pivot_longer(cols = vents, names_to = "end") %>% 
  select(start, end, Jaccard_metric = value)

dist_euc_clr <- as.data.frame(as.matrix(vegdist(compositions::clr(df_wide_num), method = "euclidean"))) %>% 
  rownames_to_column(var = "start") %>% 
  pivot_longer(cols = vents, names_to = "end") %>% 
  select(start, end, Euclidean_CLR_metric = value)

5.4 Compile distances of pairwise comparisons

dist_compiled_output <- dist_m %>% 
  select(start, end, meters = value) %>% 
  left_join(dist_jacc) %>% 
  left_join(dist_bray) %>% 
  left_join(dist_euc_clr) %>% 
  pivot_longer(cols = ends_with("_metric"), names_to = "comm_dist") %>% 
  filter(!(start == end)) %>%
  mutate(km = (meters/1000),
         dist0 = gsub("_", " ", comm_dist),
         dist = gsub(" metric", "", dist0))

5.5 Plot distance-decay

Compare several distance metrics with distance.

# ggplot(dist_compiled_output, aes(x = km, y = value)) +
#   geom_jitter() +
#   scale_x_log10(labels = scales::comma) +
#   facet_grid(dist ~ ., scales = "free") +
#   theme_linedraw() +
#   geom_smooth(method="lm") +
#   stat_regline_equation(aes(label = ..rr.label..), color = "black", label.y.npc = 0.09, label.x.npc = 0.7) +
#   theme(strip.background = element_blank(),
#         strip.text = element_text(color = "black")) +
#   labs(y = "Distance metric")
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/fig4a-distdecay.svg", h = 4, w = 5)
dist_compiled_output %>% 
  filter(dist == "Jaccard") %>% 
  ggplot(aes(x = km, y = value)) +
  geom_jitter() +
  scale_x_log10(labels = scales::comma) +
  facet_grid(dist ~ ., scales = "free") +
  # geom_smooth(method="lm") +
  theme_linedraw() +
  # stat_regline_equation(aes(label = ..rr.label..), face = "bold", color = "black", label.y.npc = 0.09, label.x.npc = 0.7) +
  scale_y_continuous(limits = c(0.7,1)) +
  theme(strip.background = element_blank(),
        strip.text = element_text(color = "black", face = "bold", size = 12),
        axis.text = element_text(color = "black", face = "bold", size = 12),
        axis.title = element_text(color = "black", face = "bold", size = 12)) +
  labs(y = "Jaccard Dissimilarity")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 198 rows containing missing values (geom_point).

# dev.off()
# labs(y = "Distance metric", title = "Distance-Decay: all ASVs")

5.6 Plot variance of Jaccard index by distance in km

# head(dist_compiled_output)
# # ?var()
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/figS4-var-dist.svg", h = 4, w = 5)
var_km <- dist_compiled_output %>% 
  filter(dist == "Jaccard") %>% 
  # separate(start, into = c("NAME_start", "SITE_start"), sep = "_", remove = FALSE, extra = "merge") %>%
  # separate(end, into = c("NAME_end", "SITE_end"), sep = "_", remove = FALSE, extra = "merge") %>% 
  separate(start, into = c("NAME_start", "SITE_start", "TYPE_start"), sep = "_", remove = FALSE, extra = "merge") %>%
  separate(end, into = c("NAME_end", "SITE_end", "TYPE_end"), sep = "_", remove = FALSE, extra = "merge") %>%
  drop_na() %>% 
  # group_by(start, end) %>% 
  group_by(SITE_start, SITE_end) %>%
  mutate(VARIANCE = var(value)) %>% 
ggplot(aes(x = km, y = VARIANCE)) +
  scale_x_log10(labels = scales::comma) +
  geom_jitter(shape = 21, size = 2, color = "black", fill = "black") +
  geom_smooth(method="lm") +
  theme_linedraw() +
  stat_regline_equation(aes(label = ..rr.label..), color = "black", label.y.npc = 0.0007, label.x.npc = 0.08) +
  # scale_y_continuous(limits = c(0.7,1)) +
  theme(strip.background = element_blank(),
        strip.text = element_text(color = "black"),
        axis.text = element_text(color = "black"),
        axis.title = element_text(color = "black")) +
  labs(y = "Variance of Jaccard Dissimilarity", x = "")
# dev.off()

5.7 Repeat distance-decay with vent-only sites

Sample to vent sites only

# head(insitu_asv_wClass)
df_wide_vent <- insitu_asv_wClass %>% 
  filter(SAMPLETYPE == "Vent") %>% 
  group_by(FeatureID, SAMPLENAME, VENT, SITE, SAMPLETYPE) %>% 
  summarise(AVG = mean(value)) %>% 
    ungroup() %>%
  select(-SAMPLENAME) %>% 
  unite(SAMPLENAME, VENT, SITE, SAMPLETYPE, sep = "_") %>% 
  group_by(FeatureID, SAMPLENAME) %>% 
  summarise(SUM = sum(AVG)) %>% 
  pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>% 
  column_to_rownames(var = "SAMPLENAME")

vents <- rownames(df_wide_vent)
dist_bray <- as.data.frame(as.matrix(vegdist(df_wide_vent, method = "bray"))) %>% 
  rownames_to_column(var = "start") %>% 
  pivot_longer(cols = vents, names_to = "end") %>% 
  select(start, end, Bray_Curtis_metric = value)

dist_jacc <- as.data.frame(as.matrix(vegdist(df_wide_vent, method = "jaccard"))) %>% 
  rownames_to_column(var = "start") %>% 
  pivot_longer(cols = vents, names_to = "end") %>% 
  select(start, end, Jaccard_metric = value)

dist_euc_clr <- as.data.frame(as.matrix(vegdist(compositions::clr(df_wide_vent), method = "euclidean"))) %>% 
  rownames_to_column(var = "start") %>% 
  pivot_longer(cols = vents, names_to = "end") %>% 
  select(start, end, Euclidean_CLR_metric = value)

dist_compiled_output_vents <- dist_m %>% 
  select(start, end, meters = value) %>% 
  left_join(dist_jacc) %>% 
  left_join(dist_bray) %>% 
  left_join(dist_euc_clr) %>% 
  pivot_longer(cols = ends_with("_metric"), names_to = "comm_dist") %>% 
  filter(!(start == end)) %>%
  mutate(km = (meters/1000),
         dist0 = gsub("_", " ", comm_dist),
         dist = gsub(" metric", "", dist0))
dist_compiled_output_vents %>% 
  # filter(grepl("Vent", start) & grepl("Vent", end)) %>%
  ggplot(aes(x = km, y = value)) +
  geom_jitter() +
  scale_x_log10(labels = scales::comma) +
  facet_grid(dist ~ ., scales = "free") +
  geom_smooth(method="lm") +
  theme_linedraw() +
  stat_regline_equation(aes(label = ..rr.label..), color = "black", label.y.npc = 0.09, label.x.npc = 0.7) +
  theme(strip.background = element_blank(),
        strip.text = element_text(color = "black")) +
  labs(y = "Distance metric")
## Warning: Transformation introduced infinite values in continuous x-axis
## Transformation introduced infinite values in continuous x-axis
## Transformation introduced infinite values in continuous x-axis
## Warning: Removed 2310 rows containing non-finite values (stat_smooth).
## Warning: Removed 2310 rows containing non-finite values (stat_regline_equation).
## Warning: Removed 2310 rows containing missing values (geom_point).

vent_dist_decay <- dist_compiled_output_vents %>% 
  filter(dist == "Jaccard") %>% 
  ggplot(aes(x = km, y = value)) +
  geom_jitter() +
  scale_x_log10(labels = scales::comma) +
  facet_grid(dist ~ ., scales = "free") +
  geom_smooth(method="lm") +
  theme_linedraw() +
  stat_regline_equation(aes(label = ..rr.label..), color = "black", label.y.npc = 0.09, label.x.npc = 0.7) +
  theme(strip.background = element_blank(),
        strip.text = element_text(color = "black")) +
  labs(y = "Jaccard Dissimilarity", title = "",  x = "")

5.8 Repeat distance-decay with subpopulations

Sample to resident only and cosmopolitan only

cosmo <- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
res <- c("Vent only")

df_wide_res <- insitu_asv_wClass %>% 
  filter(CLASS %in% res) %>% 
  group_by(FeatureID, SAMPLENAME, VENT, SITE, SAMPLETYPE) %>% 
  summarise(AVG = mean(value)) %>% 
    ungroup() %>%
  select(-SAMPLENAME) %>% 
  unite(SAMPLENAME, VENT, SITE, SAMPLETYPE, sep = "_") %>% 
  group_by(FeatureID, SAMPLENAME) %>% 
  summarise(SUM = sum(AVG)) %>% 
  pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>% 
  column_to_rownames(var = "SAMPLENAME")
#
vents_res <- row.names(df_wide_res)
#
dist_bray <- as.data.frame(as.matrix(vegdist(df_wide_res, method = "bray"))) %>% 
  rownames_to_column(var = "start") %>% 
  pivot_longer(cols = vents_res, names_to = "end") %>% 
  select(start, end, Bray_Curtis_metric = value)

dist_jacc <- as.data.frame(as.matrix(vegdist(df_wide_res, method = "jaccard"))) %>% 
  rownames_to_column(var = "start") %>% 
  pivot_longer(cols = vents_res, names_to = "end") %>% 
  select(start, end, Jaccard_metric = value)

dist_euc_clr <- as.data.frame(as.matrix(vegdist(compositions::clr(df_wide_res), method = "euclidean"))) %>% 
  rownames_to_column(var = "start") %>% 
  pivot_longer(cols = vents_res, names_to = "end") %>% 
  select(start, end, Euclidean_CLR_metric = value)

dist_compiled_output_res <- dist_m %>% 
  select(start, end, meters = value) %>% 
  left_join(dist_jacc) %>% 
  left_join(dist_bray) %>% 
  left_join(dist_euc_clr) %>% 
  pivot_longer(cols = ends_with("_metric"), names_to = "comm_dist") %>% 
  filter(!(start == end)) %>%
  mutate(km = (meters/1000),
         dist0 = gsub("_", " ", comm_dist),
         dist = gsub(" metric", "", dist0))
d_d_resident <- dist_compiled_output_res %>% 
  filter(dist == "Jaccard") %>% 
  ggplot(aes(x = km, y = value)) +
  geom_jitter() +
  scale_x_log10(labels = scales::comma) +
  facet_grid(dist ~ ., scales = "free") +
  geom_smooth(method="lm") +
  theme_linedraw() +
  stat_regline_equation(aes(label = ..rr.label..), color = "black", label.y.npc = 0.09, label.x.npc = 0.7) +
  theme(strip.background = element_blank(),
        strip.text = element_text(color = "black")) +
  labs(y = "Distance metric", title = "Distance-Decay: Resident ASVs")
d_d_resident
## Warning: Transformation introduced infinite values in continuous x-axis
## Transformation introduced infinite values in continuous x-axis
## Transformation introduced infinite values in continuous x-axis
## Warning: Removed 770 rows containing non-finite values (stat_smooth).
## Warning: Removed 770 rows containing non-finite values (stat_regline_equation).
## Warning: Removed 770 rows containing missing values (geom_point).

Target cosmpolitan

cosmo <- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
res <- c("Vent only")

df_wide_cosmo <- insitu_asv_wClass %>% 
  filter(CLASS %in% cosmo) %>% 
  group_by(FeatureID, SAMPLENAME, VENT, SITE, SAMPLETYPE) %>% 
  summarise(AVG = mean(value)) %>% 
    ungroup() %>%
  select(-SAMPLENAME) %>% 
  unite(SAMPLENAME, VENT, SITE, SAMPLETYPE, sep = "_") %>% 
  group_by(FeatureID, SAMPLENAME) %>% 
  summarise(SUM = sum(AVG)) %>% 
  pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>% 
  column_to_rownames(var = "SAMPLENAME")
#
vents_cosmo <- row.names(df_wide_cosmo)
#
dist_bray <- as.data.frame(as.matrix(vegdist(df_wide_cosmo, method = "bray"))) %>% 
  rownames_to_column(var = "start") %>% 
  pivot_longer(cols = vents_cosmo, names_to = "end") %>% 
  select(start, end, Bray_Curtis_metric = value)

dist_jacc <- as.data.frame(as.matrix(vegdist(df_wide_cosmo, method = "jaccard"))) %>% 
  rownames_to_column(var = "start") %>% 
  pivot_longer(cols = vents_cosmo, names_to = "end") %>% 
  select(start, end, Jaccard_metric = value)

dist_euc_clr <- as.data.frame(as.matrix(vegdist(compositions::clr(df_wide_cosmo), method = "euclidean"))) %>% 
  rownames_to_column(var = "start") %>% 
  pivot_longer(cols = vents_cosmo, names_to = "end") %>% 
  select(start, end, Euclidean_CLR_metric = value)

dist_compiled_output_cosmo<- dist_m %>% 
  select(start, end, meters = value) %>% 
  left_join(dist_jacc) %>% 
  left_join(dist_bray) %>% 
  left_join(dist_euc_clr) %>% 
  pivot_longer(cols = ends_with("_metric"), names_to = "comm_dist") %>% 
  filter(!(start == end)) %>%
  mutate(km = (meters/1000),
         dist0 = gsub("_", " ", comm_dist),
         dist = gsub(" metric", "", dist0))
cosmo_dist_decay <- dist_compiled_output_cosmo %>% 
  filter(dist == "Jaccard") %>% 
  ggplot(aes(x = km, y = value)) +
  geom_jitter() +
  scale_x_log10(labels = scales::comma) +
  facet_grid(dist ~ ., scales = "free") +
  geom_smooth(method="lm") +
  theme_linedraw() +
  stat_regline_equation(aes(label = ..rr.label..), color = "black", label.y.npc = 0.09, label.x.npc = 0.7) +
  theme(strip.background = element_blank(),
        strip.text = element_text(color = "black")) +
  labs(y = "Jaccard Dissimilarity", title = "", x = "")

Figure S4

# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/supp_dist_decay_figS4a-c.svg", h = 10, w = 5)
# var_km + vent_dist_decay + cosmo_dist_decay +patchwork::plot_layout(nrow = 3) + plot_annotation(tag_levels = 'a')

# Execute all commands  in clustered-ASV-analysis markdown file to get dist_decay for OTU clustering
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/supp_dist_decay_figS4_all.svg", h = 10, w = 9)
# ((var_km / vent_dist_decay / cosmo_dist_decay + plot_layout(guides = 'keep')) | dist_decay) + plot_layout(guides = 'collect') + plot_annotation(tag_levels = 'a')
# dev.off()
# dev.off()

Isolate taxa key, and manually curated taxonomic groupings.

alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
ciliate <- c("Ciliophora")

tax_key_curated <- insitu_asv_wClass %>% 
  select(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species) %>% 
  distinct() %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  mutate(PHYLUM = case_when(
    Phylum == "Unknown" ~ paste(SUPERGROUP, "Other"),
    grepl("_X", Phylum) ~ paste(SUPERGROUP, "Other"),
    is.na(Phylum) ~ paste(SUPERGROUP, "Other"),
    TRUE ~ Phylum
  )) %>% 
  mutate(CLASS = case_when(
    Class == "Unknown" ~ PHYLUM,
    grepl("_X", Class) ~ PHYLUM,
    is.na(Class) ~ Phylum,
    grepl("MAST-", Class) ~ "MAST",
    TRUE ~ Class
  )) %>% 
  mutate(CLASS2 = case_when(
    CLASS == Order ~ Order,
    is.na(Order) ~ paste(CLASS, "Other"),
    TRUE ~ Order)) %>%
  filter(!is.na(Order)) %>%
  select(FeatureID, SUPERGROUP, PHYLUM, CLASS, CLASS2) %>% 
  mutate(SUPERGROUP_ORDER = factor(SUPERGROUP, levels = all_taxa_order))
# head(tax_key_curated)
# unique(tax_key_curated$CLASS_2)

6 Plot total sequences by # of vent sites

tmp <- insitu_asv_wClass %>%
    filter(Domain == "Eukaryota") %>% 
    # filter(CLASS %in% slice) %>%
    # Average across replicates
      group_by(FeatureID, SAMPLENAME, VENT, distribution = CLASS) %>% 
      summarise(AVG = mean(value)) %>% 
      ungroup() %>% 
    left_join(tax_key_curated) %>% 
    separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp")) %>% 
    mutate(VENTNAME = case_when(
      SITE == "Axial" ~ paste(SITE, VENT, YEAR, sep = " "),
      SITE == "GordaRidge" ~ paste(SITE, VENT, sep = " "),
      SITE %in% mcr ~ paste(SITE, VENT, sep = " ")
      )) %>% select(-Sample_tmp) %>% 
  # filter(SAMPLETYPE == "Vent") %>% 
  group_by(VENTNAME, distribution, SUPERGROUP, PHYLUM, CLASS2) %>% 
    summarise(TOTAL_SEQ = sum(AVG),
              COUNT = n()) # %>% #IF you use this please modify location
## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
  # filter(!is.na(SUPERGROUP))

head(tmp)
## # A tibble: 6 × 7
## # Groups:   VENTNAME, distribution, SUPERGROUP, PHYLUM [2]
##   VENTNAME           distribution      SUPERGROUP    PHYLUM CLASS2 TOTAL…¹ COUNT
##   <chr>              <chr>             <chr>         <chr>  <chr>    <dbl> <int>
## 1 Axial Anemone 2013 Vent & background Alveolata-Ci… Cilio… Apost…      24     1
## 2 Axial Anemone 2013 Vent & background Alveolata-Ci… Cilio… Hypot…      20     1
## 3 Axial Anemone 2013 Vent & background Alveolata-Ci… Cilio… Strom…      38     2
## 4 Axial Anemone 2013 Vent & background Alveolata-Di… Dinof… Dino-…      20     1
## 5 Axial Anemone 2013 Vent & background Alveolata-Di… Dinof… Dino-…      96     5
## 6 Axial Anemone 2013 Vent & background Alveolata-Di… Dinof… Dino-…       8     1
## # … with abbreviated variable name ¹​TOTAL_SEQ
tmp %>% 
  filter(distribution == "Vent only") %>% 
  ggplot(aes(x = VENTNAME, y = CLASS2)) +
  geom_point(aes(fill = log(TOTAL_SEQ), size = COUNT), shape = 21) +
  scale_size_continuous(range = c(1,18)) +
  # scale_shape_manual(values = c(21, 23, 22, 25)) +
  facet_grid(.~SUPERGROUP, space = "free", scales = "free") +
  theme(axis.text.x = element_text(angle = 90)) +
  coord_flip()

7 Pairwise comparison of community diversity

7.1 Compare resident vs.cosmopolitan population

cosmo <- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")
res <- c("Vent only")
all_classes <- as.character(unique(insitu_asv_wClass$CLASS))

axial <- c("Axial")
mcr <- c("VonDamm", "Piccard")
gr <- c("GordaRidge")
all <- c("Axial", "VonDamm", "Piccard", "GordaRidge")

7.2 Jaccard comparison of resident vs. cosmopolitan

isolate_calc_jaccard <- function(df, slice){
  df_out_wide <- df %>%
    filter(Domain == "Eukaryota") %>% 
    filter(CLASS %in% slice) %>%
    # Average across replicates
      group_by(FeatureID, SAMPLENAME, VENT) %>% 
      summarise(AVG = mean(value)) %>% 
      ungroup() %>% 
    separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp")) %>% 
    mutate(VENTNAME = case_when(
      SITE == "Axial" ~ paste(SITE, VENT, YEAR, sep = " "),
      SITE == "GordaRidge" ~ paste(SITE, VENT, sep = " "),
      SITE %in% mcr ~ paste(SITE, VENT, sep = " ")
      )) %>% select(-Sample_tmp) %>% 
    unite(SAMPLEID, SITE, VENT, YEAR, SAMPLETYPE, sep = "-") %>%
    unite(sample_full, SAMPLEID, VENTNAME, sep = "--") %>% 
     group_by(FeatureID, sample_full) %>% 
      summarise(SUM = sum(AVG)) %>% 
    pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>% 
    column_to_rownames(var = "sample_full")
  }

Apply function to resident population, cosmopolitan population, and whole population

resident <- isolate_calc_jaccard(insitu_asv_wClass, res)
## Warning: Expected 4 pieces. Additional pieces discarded in 10510 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
cosmopolitan <- isolate_calc_jaccard(insitu_asv_wClass, cosmo)
## Warning: Expected 4 pieces. Additional pieces discarded in 12990 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
all_jacc <- isolate_calc_jaccard(insitu_asv_wClass, all_classes)
## Warning: Expected 4 pieces. Additional pieces discarded in 25500 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
# head(resident)

Plot resident

dist_jacc_clr_res <- as.data.frame(as.matrix(vegdist(compositions::clr(resident), method = "euclidean"))) %>%
    rownames_to_column(var = "start") %>% 
    pivot_longer(cols = row.names(resident), names_to = "end") %>% 
    select(start, end, Jaccard_CLR_metric = value) %>% 
    # mutate(VARS = purrr::map2_chr(start, end, ~toString(sort(c(.x, .y))))) %>%
    # distinct(VARS, .keep_all = TRUE) %>%
    # separate(VARS, c("X", "Y"), sep = ", ") %>% 
    # select(-start, -end) %>% 
    # add_column(SUBSET = "RESIDENT") %>% 
    separate(start, c("Y_FULL", "Y"), sep = "--", remove = FALSE) %>% 
    separate(Y_FULL, c("Y_SITE", "Y_VENT", "Y_YEAR"), sep = "-", remove = FALSE) %>% 
    separate(end, c("X_FULL", "X"), sep = "--", remove = FALSE) %>% 
    separate(X_FULL, c("X_SITE", "X_VENT", "X_YEAR"), sep = "-", remove = FALSE) %>% 
  ggplot(aes(x = X, y = Y, fill = Jaccard_CLR_metric)) +
  geom_tile(color = "black") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        axis.title = element_blank()) +
  # scale_fill_gradientn(colors = c("#badbdb", "#dedad2", "#e4bcad", "#df979e", "#d7658b", "#c80064")) +
  # scale_fill_gradientn(colors = c("#fff7f3", "#fde0dd", "#fcc5c0", "#fa9fb5", "#f768a1", "#dd3497", "#ae017e", "#7a0177", "#49006a"))
  scale_fill_gradientn(colors = c("#ffffff","#f0f0f0","#d9d9d9","#bdbdbd","#969696","#737373","#525252","#252525","#000000")) +
  coord_fixed(ratio = 1)
## Warning: Expected 3 pieces. Additional pieces discarded in 576 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Expected 3 pieces. Additional pieces discarded in 576 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
dist_jacc_clr_res

Plot cosmopolitan

dist_jacc_clr_cos <- as.data.frame(as.matrix(vegdist(compositions::clr(cosmopolitan), method = "euclidean"))) %>%
    rownames_to_column(var = "start") %>% 
    pivot_longer(cols = row.names(cosmopolitan), names_to = "end") %>% 
    select(start, end, Jaccard_CLR_metric = value) %>% 
    separate(start, c("Y_FULL", "Y"), sep = "--", remove = FALSE) %>% 
    separate(Y_FULL, c("Y_SITE", "Y_VENT", "Y_YEAR"), sep = "-", remove = FALSE) %>% 
    separate(end, c("X_FULL", "X"), sep = "--", remove = FALSE) %>% 
    separate(X_FULL, c("X_SITE", "X_VENT", "X_YEAR"), sep = "-", remove = FALSE) %>% 
  ggplot(aes(x = X, y = Y, fill = Jaccard_CLR_metric)) +
  geom_tile(color = "black") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        axis.title = element_blank()) +
  # scale_fill_gradientn(colors = c("#badbdb", "#dedad2", "#e4bcad", "#df979e", "#d7658b", "#c80064")) +
  # scale_fill_gradientn(colors = c("#fff7f3", "#fde0dd", "#fcc5c0", "#fa9fb5", "#f768a1", "#dd3497", "#ae017e", "#7a0177", "#49006a"))
  scale_fill_gradientn(colors = c("#ffffff","#f0f0f0","#d9d9d9","#bdbdbd","#969696","#737373","#525252","#252525","#000000")) +
  coord_fixed(ratio = 1)
## Warning: Expected 3 pieces. Additional pieces discarded in 1225 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Expected 3 pieces. Additional pieces discarded in 1225 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
dist_jacc_clr_cos

Plot all ASVs

dist_jacc_clr_all <- as.data.frame(as.matrix(vegdist(compositions::clr(all_jacc), method = "euclidean"))) %>%
    rownames_to_column(var = "start") %>% 
    pivot_longer(cols = row.names(all_jacc), names_to = "end") %>% 
    select(start, end, Jaccard_CLR_metric = value) %>% 
    separate(start, c("Y_FULL", "Y"), sep = "--", remove = FALSE) %>% 
    separate(Y_FULL, c("Y_SITE", "Y_VENT", "Y_YEAR"), sep = "-", remove = FALSE) %>% 
    separate(end, c("X_FULL", "X"), sep = "--", remove = FALSE) %>% 
    separate(X_FULL, c("X_SITE", "X_VENT", "X_YEAR"), sep = "-", remove = FALSE) %>% 
  ggplot(aes(x = X, y = Y, fill = Jaccard_CLR_metric)) +
  geom_tile(color = "black") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        axis.title = element_blank()) +
  # scale_fill_gradientn(colors = c("#badbdb", "#dedad2", "#e4bcad", "#df979e", "#d7658b", "#c80064")) +
  # scale_fill_gradientn(colors = c("#fff7f3", "#fde0dd", "#fcc5c0", "#fa9fb5", "#f768a1", "#dd3497", "#ae017e", "#7a0177", "#49006a"))
  scale_fill_gradientn(colors = c("#ffffff","#f0f0f0","#d9d9d9","#bdbdbd","#969696","#737373","#525252","#252525","#000000")) +
  coord_fixed(ratio = 1)
## Warning: Expected 3 pieces. Additional pieces discarded in 1225 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Expected 3 pieces. Additional pieces discarded in 1225 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
dist_jacc_clr_all

8 Plot population sets as CLR

clr_to_plot_maintax <- function(df, slice, tax_slice){
  df_out_wide <- df %>%
    filter(Domain == "Eukaryota") %>%
    filter(CLASS %in% slice) %>%
    # Average across replicates
      group_by(FeatureID, SAMPLENAME, VENT) %>% 
      summarise(AVG = mean(value)) %>% 
      ungroup() %>% 
    separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp")) %>% 
    mutate(VENTNAME = case_when(
      SITE == "Axial" ~ paste(SITE, VENT, YEAR, sep = " "),
      SITE == "GordaRidge" ~ paste(SITE, VENT, sep = " "),
      SITE %in% mcr ~ paste(SITE, VENT, sep = " ")
      )) %>% select(-Sample_tmp) %>% 
    left_join(tax_key_curated) %>%
    # Modify Class 2 level with excess text.
    mutate(CLASS2 = str_replace_all(CLASS2, "_X", "")) %>%
    mutate(CLASS2 = str_replace_all(CLASS2, "_XX", "")) %>%
    mutate(CLASS2 = str_replace_all(CLASS2, "X$", "")) %>% 
    mutate(CLASS2 = str_replace_all(CLASS2, "_[:alnum:]$", "")) %>% 
    group_by(VENTNAME, SUPERGROUP, PHYLUM, CLASS, CLASS2) %>% 
        summarise(SUM_VALUE = sum(AVG)) %>% 
    filter(SUPERGROUP %in% tax_slice) %>% 
    unite(TAX, SUPERGROUP, PHYLUM, CLASS, CLASS2, sep = "__") %>% 
    pivot_wider(names_from = TAX, values_from = SUM_VALUE, values_fill = 0) %>% 
    column_to_rownames(var = "VENTNAME")
  # Plot
  as.data.frame(compositions::clr(df_out_wide)) %>% 
  rownames_to_column(var = "VENTNAME") %>%
    pivot_longer(cols = names(df_out_wide), values_to = "CLR", names_to = "TAX") %>% 
    separate(TAX, into = c("SUPERGROUP", "PHYLUM", "CLASS", "CLASS2"), sep = "__") %>% 
    separate(VENTNAME, into = c("SITE"), sep = " ", remove = FALSE) %>% 
    ggplot(aes(x = CLASS2, y = VENTNAME, fill = CLR)) +
             geom_tile(aes(fill = CLR), color = "black") +
      theme(legend.position = "right", 
            panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(), 
            panel.border = element_blank(), 
            panel.background = element_blank(),
            axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, color = "black",size = 10, face = "bold"), 
            axis.text.y = element_text(color = "black", size = 10, face = "bold", angle = 45, hjust = 1, vjust = 1),
            strip.background = element_blank(), 
            strip.text.y = element_text(hjust = 0, vjust = 0, angle = 45, size = 10, color = "black", face = "bold"),
            strip.text.x = element_text(hjust = 1, vjust = 0, angle = 90, size = 10, color = "black", face = "bold"),
            legend.title = element_blank(),
            strip.placement = "outside",
            axis.ticks.y = element_blank()) + 
      labs(x = "", y = "") +
    scale_fill_steps2(
  low = "#2166ac",
  mid = "white",
  high = "#b2182b",
  midpoint = 0,
  space = "Lab",
  na.value = "#4d4d4d",
  guide = "coloursteps",
  aesthetics = "fill") +
    # coord_flip() +
    facet_grid(SITE ~ SUPERGROUP + PHYLUM, space = "free", scales = "free", switch = "both")
}
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/fig5b-clr-tile.svg", h = 11, w = 21)
maintext_supergroup <- c("Alveolata-Ciliophora", "Amoebozoa", "Apusozoa", "Excavata", "Rhizaria-Cercozoa", "Rhizaria-Radiolaria")
clr_to_plot_maintax(insitu_asv_wClass, res, maintext_supergroup)
## Warning: Expected 4 pieces. Additional pieces discarded in 10510 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 1 pieces. Additional pieces discarded in 2064 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

# dev.off()
unique(tax_key_curated$SUPERGROUP)
##  [1] "Alveolata-Ciliophora"     "Stramenopiles-Ochrophyta"
##  [3] "Rhizaria-Cercozoa"        "Stramenopiles-Opalozoa"  
##  [5] "Opisthokonta"             "Rhizaria-Radiolaria"     
##  [7] "Alveolata-Dinoflagellata" "Stramenopiles-Sagenista" 
##  [9] "Stramenopiles"            "Bacteria_X"              
## [11] "Apusozoa"                 "Archaeplastida"          
## [13] "Hacrobia"                 "Other Alveolata"         
## [15] "Amoebozoa"                "Excavata"                
## [17] "Terrabacteria"            "Archaea_X"               
## [19] "Protalveolata"
mast <- c("Stramenopiles-Opalozoa", "Stramenopiles-Sagenista")
clr_to_plot_maintax(insitu_asv_wClass, res, mast)
## Warning: Expected 4 pieces. Additional pieces discarded in 10510 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Expected 1 pieces. Additional pieces discarded in 748 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

# maintext_supergroup <- c("Alveolata-Ciliophora", "Alveolata-Dinoflagellata", "Rhizaria-Cercozoa", "Rhizaria-Radiolaria")
# clr_to_plot_maintax(insitu_asv_wClass, res, maintext_supergroup)
# maintext_supergroup <- c("Alveolata-Ciliophora", "Other Alveolata", "Amoebozoa", "Apusozoa", "Rhizaria-Cercozoa", "Rhizaria-Radiolaria")
# clr_to_plot_maintax(insitu_asv_wClass, all_classes, maintext_supergroup)

9 Driving forces in microeukaryote community structure

9.1 Diversity estimators (DivNet)

Run on HPC at WHOI.

load("data-input/asv-tables-processed-27012022.RData, verbose = T")

DivNet package - diversity estimation hypothesis testing from Amy Willis’s group. This will also characterize the uncertainty of the richness estimate. Richness estimation is flawed because of sample depth and processing methods.

library(phyloseq); library(breakaway); library(DivNet)
library(tidyverse)

This code block run on HPC.

# Select eukaryotes only and create wide format dataframe
insitu_wide <- asv_insitu_qc %>% 
  filter(Domain == "Eukaryota") %>% 
  filter(!grepl("_Plume001_", SAMPLE)) %>% #removing "near vent background", not relevant in other data sets
  select(FeatureID, Taxon, SAMPLE, value) %>%
  pivot_wider(names_from = SAMPLE, values_from = value, values_fill = 0)

# head(insitu_wide)
insitu_samples <- as.character(colnames(insitu_wide %>% select(-Taxon, -FeatureID)))
# insitu_samples
insitu_tax_matrix <- insitu_wide %>%
  select(FeatureID, Taxon) %>% 
  separate(Taxon, c("Domain", "Supergroup", 
                  "Phylum", "Class", "Order",
                  "Family", "Genus", "Species"), sep = ";") %>% 
  column_to_rownames(var = "FeatureID") %>% 
  as.matrix
## Warning: Expected 8 pieces. Additional pieces discarded in 6222 rows [3, 4, 6,
## 7, 9, 10, 11, 12, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 29, 32, ...].
## Warning: Expected 8 pieces. Missing pieces filled with `NA` in 4264 rows [1, 2,
## 5, 8, 13, 14, 16, 19, 21, 26, 30, 31, 33, 40, 41, 45, 46, 47, 48, 50, ...].
insitu_asv_matrix <- insitu_wide %>% 
  select(-Taxon) %>% 
  column_to_rownames(var = "FeatureID") %>% 
  as.matrix

# Align row names for each matrix
rownames(insitu_tax_matrix) <- row.names(insitu_asv_matrix)

Reformat metadata, but change to numeric.

metadata <- read.csv("data-input/samplelist-metadata.csv", na.strings = "NA")
# ?read.csv()
## Extract relevant metadata information
metadata_formatted <- metadata %>% 
  mutate_all(as.character) %>%
  filter(Sample_or_Control == "Sample") %>% 
  filter(!(SAMPLETYPE == "Incubation")) %>% 
  filter(!(SAMPLETYPE == "Microcolonizer")) %>% 
  select(SAMPLE, VENT, COORDINATES, SITE, SAMPLEID, DEPTH, SAMPLETYPE, YEAR, TEMP = starts_with("TEMP"), pH, PercSeawater = starts_with("Perc"), Mg = starts_with("Mg"), H2 = starts_with("H2."), H2S = starts_with("H2S"), CH4 = starts_with("CH4"), ProkConc, Sample_or_Control)

# Ensure these columns are converted to numeric
env_params <- c("DEPTH", "TEMP", "pH", "PercSeawater", "Mg", "H2", "H2S", "CH4", "ProkConc")

metadata_insitu <- metadata_formatted %>%
  na_if(., "bd") %>% na_if(., "nd") %>% na_if(., "-") %>% na_if(., "") %>% na_if(., "na") %>%
  filter(SAMPLE %in% insitu_samples) %>% # from reformatting df above
  # select(SAMPLE, VENT, SITE, SAMPLETYPE, YEAR) %>%
  unite(SAMPLELABEL, VENT, SITE, SAMPLETYPE, YEAR, sep = "_", remove = FALSE) %>% 
  unite(TYPE_SITE, SITE, SAMPLETYPE, sep = "_", remove = FALSE) %>% 
  mutate_at(env_params,as.numeric)
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
rownames(metadata_insitu) <- metadata_insitu$SAMPLE
# str(metadata_insitu)
# View(metadata_insitu)
# head(metadata_insitu)
# row.names(metadata_insitu)

Import taxa and ASV count matrices into phyloseq objects.

# Import asv and tax matrices
ASV = otu_table(insitu_asv_matrix, taxa_are_rows = TRUE)
TAX = tax_table(insitu_tax_matrix)

phylo_obj <- phyloseq(ASV, TAX)

# Import metadata as sample data in phyloseq
samplenames <- sample_data(metadata_insitu)

# join as phyloseq object
physeq_insitu = merge_phyloseq(phylo_obj, samplenames)

## Check
physeq_insitu
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 10486 taxa and 43 samples ]
## sample_data() Sample Data:       [ 43 samples by 19 sample variables ]
## tax_table()   Taxonomy Table:    [ 10486 taxa by 8 taxonomic ranks ]
# head(insitu_tax_matrix)
# head(metadata_insitu)

save(physeq_insitu, metadata_insitu, file = "phyloseq-objs-180122.RData") #use below for statistical analysis

9.1.1 Apply Divnet

Focus on genus and species level.

###
gen_divnet <- divnet(tax_glom(physeq_insitu, taxrank = "Genus"), base = 3)
gen_divnet_label <- divnet(tax_glom(physeq_insitu, taxrank = "Genus"), X = "SAMPLELABEL", base = 3)
# Vent vs plume vs background
gen_divnet_TYPE <- divnet(tax_glom(physeq_insitu, taxrank = "Genus"), X = "SAMPLETYPE", base = 3)
# location and vent vs plume vs background
gen_divnet_TYPE_SITE <- divnet(tax_glom(physeq_insitu, taxrank = "Genus"), X = "TYPE_SITE", base = 3)
save(gen_divnet, gen_divnet_label, gen_divnet_TYPE, gen_divnet_TYPE_SITE, file = "GENUS.Rdata")

###
## Analyzed at the ASV level:
spp_divnet <- divnet(tax_glom(physeq_insitu, taxrank = "Species"), base = 3)

spp_divnet_label <- divnet(tax_glom(physeq_insitu, taxrank = "Species"), X = "SAMPLELABEL", base = 3)
# Vent vs plume vs background
spp_divnet_TYPE <- divnet(tax_glom(physeq_insitu, taxrank = "Species"), X = "SAMPLETYPE", base = 3)
# location and vent vs plume vs background
spp_divnet_TYPE_SITE <- divnet(tax_glom(physeq_insitu, taxrank = "Species"), X = "TYPE_SITE", base = 3)
save(spp_divnet, spp_divnet_label, spp_divnet_TYPE, spp_divnet_TYPE_SITE, file = "SPECIES.Rdata")

9.2 Extract DivNet output and visualize

Above run on HPC and RData files save so we can look at various levels of species richness.

Function to extract shannon and simpson data from each divnet output.

fxn_extract_divet <- function(df){
  df$shannon %>% summary %>% 
  pivot_longer(cols = starts_with("estimate"), names_to = "ESTIMATE-shannon", values_to = "Shannon") %>%
  pivot_longer(cols = starts_with("error"), names_to = "ERROR-shannon", values_to = "Shannon-error") %>%
  pivot_longer(cols = starts_with("lower"), names_to = "LOWER-shannon", values_to = "Shannon-lower") %>%
  pivot_longer(cols = starts_with("upper"), names_to = "UPPER-shannon", values_to = "Shannon-upper") %>%
  left_join(df$simpson %>% summary %>%
      pivot_longer(cols = starts_with("estimate"), names_to = "ESTIMATE-simpson", values_to = "Simpson") %>%
      pivot_longer(cols = starts_with("error"), names_to = "ERROR-simpson", values_to = "Simpson-error") %>%
      pivot_longer(cols = starts_with("lower"), names_to = "LOWER-simpson", values_to = "Simpson-lower") %>%
      pivot_longer(cols = starts_with("upper"), names_to = "UPPER-simpson", values_to = "Simpson-upper"),
    by = c("sample_names" = "sample_names")) %>%
  left_join(metadata_insitu %>% rownames_to_column(var = "sample_names")) %>%
  select(-sample_names, -ends_with("-simpson"), -ends_with("-shannon"), -starts_with("model."), -starts_with("name.")) %>%
  distinct()
}

###Function to create plots

### Set colors and symbols for dataset
sampletype_order <- c("Background", "Plume", "Vent")
site_symbol<- c(21, 23, 24, 22)
site_order <- c("Axial", "GordaRidge", "Piccard", "VonDamm")
site_color <- c("#fdbb84", "#31a354", "#ef3b2c", "#02818a")

###

plot_sampletype <- function(df){
  df$SITE_ORDER <- factor(df$SITE, levels = site_order)
  df$SAMPLETYPE_ORDER <- factor(df$SAMPLETYPE, levels = sampletype_order)
  names(site_symbol) <- site_order
  plot_grid(df %>%
  ggplot(aes(x = SAMPLETYPE_ORDER, y = Shannon, group = SAMPLETYPE_ORDER)) +
    geom_violin(color = "#525252", fill = "#525252", alpha = 0.2, width = 0.6, draw_quantiles = c(0.25, 0.5, 0.75)) +
    geom_jitter(color = "white", fill = "black", width = 0.2, size = 2, aes(shape = SITE_ORDER)) +
    scale_shape_manual(values = site_symbol) +
  theme_linedraw() +
  theme(axis.text.y = element_text(size = 14),
        axis.text.x = element_blank(),
        strip.background = element_blank(),
        strip.text = element_text(color = "black"),
        legend.position = "none",
        axis.ticks.x = element_blank()) +
  labs(x = "", y = "Shannon"),
  df %>%
    ggplot(aes(x = SAMPLETYPE_ORDER, y = Simpson, group = SAMPLETYPE_ORDER)) +
    geom_violin(color = "#525252", fill = "#525252", alpha = 0.1, width = 0.6, draw_quantiles = c(0.25, 0.5, 0.75)) +
    geom_jitter(color = "white", fill = "black", width = 0.2, size = 2.5, aes(shape = SITE_ORDER)) +
    scale_shape_manual(values = site_symbol) +
  theme_linedraw() +
  theme(axis.text.x = element_text(vjust = 1, hjust = 0.5, size = 14),
        axis.text = element_text(size = 14),
        strip.background = element_blank(),
        strip.text = element_blank(),
        legend.title = element_blank(),
        legend.position = "none") +
  labs(x = "", y = "Simpson"),
  ncol = 1, axis = c("lrt"), align = c("vh"))
}

9.2.1 Plot Divnet results

Below commands use output from DivNet at different taxonomic levels to determine if pattern of Shannon index is consistent across taxonomic levels, which it appears to be. For final work, will use the species level (ASV level).

load("data-input/SPECIES.Rdata", verbose = T)
## Loading objects:
##   spp_divnet
##   spp_divnet_label
##   spp_divnet_TYPE
##   spp_divnet_TYPE_SITE
spp_alpha_18s <- fxn_extract_divet(spp_divnet)
spp_alpha_label <- fxn_extract_divet(spp_divnet_label)
spp_alpha_TYPE <- fxn_extract_divet(spp_divnet_TYPE)
spp_alpha_TYPE_SITE <- fxn_extract_divet(spp_divnet_TYPE_SITE)

plot_sampletype(spp_alpha_18s)

spp_alpha_label$SITE_ORDER <- factor(spp_alpha_label$SITE, levels = site_order)
spp_alpha_label$SAMPLETYPE_ORDER <- factor(spp_alpha_label$SAMPLETYPE, levels = sampletype_order)
names(site_symbol) <- site_order
sampletype_symbol<- c(21, 23, 24)
# names(sampletype_symbol) <- sampletype_order
# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/fig4b-shannon.svg", h = 4, w = 5)
spp_alpha_label %>%
  ggplot(aes(x = SAMPLETYPE_ORDER, y = Shannon, group = SAMPLETYPE_ORDER)) +
    geom_violin(color = "#525252", fill = "#525252", alpha = 0.2, width = 0.6, draw_quantiles = c(0.25, 0.5, 0.75)) +
    geom_jitter(color = "white", fill = "black", width = 0.2, size = 2, aes(shape = SITE_ORDER)) +
    # scale_fill_manual(values = site_color) +
    scale_shape_manual(values = site_symbol) +
  theme_linedraw() +
  theme(axis.text.y = element_text(size = 12, color = "black", face = "bold"),
        axis.text.x = element_text(size = 12, color = "black", face = "bold"),
        axis.title = element_text(size = 12, color = "black", face = "bold"),
        strip.background = element_blank(),
        strip.text = element_text(color = "black", face = "bold"),
        legend.position = "none",
        # legend.position = "right",
        legend.title = element_blank(),
        axis.ticks.x = element_blank()) +
  labs(x = "", y = "Shannon")

# dev.off()

What vents or samples have the highest Shannon index?

# View(spp_alpha_label)

Visualize with estimator variance included.

# head(spp_alpha_18s)
# View(spp_alpha_label)
# View(spp_alpha_TYPE_SITE)
##
plot_grid(spp_alpha_label %>% 
  select(VENT, SITE, SAMPLETYPE, starts_with("Shannon"), starts_with("Simpson")) %>% 
  distinct() %>% 
  ggplot(aes(x = VENT, y = Shannon, fill = SITE, shape = SAMPLETYPE)) +
  geom_errorbar(aes(ymin = `Shannon-lower`, ymax = `Shannon-upper`), color = "#525252", width = 0.2) +
  geom_point(color = "#525252", size = 2, aes(fill = SITE, shape = SAMPLETYPE)) +
  scale_shape_manual(values = sampletype_symbol) +
  scale_fill_manual(values = site_color) +
  # coord_flip() +
  theme_linedraw() +
  theme(axis.text.x = element_text(angle = 0),
        panel.background = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_text(color = "black", angle = 0, vjust = 0.5, hjust = 0.5),
        legend.position = "none",
        panel.grid = element_blank(),
        axis.ticks.y = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line()) +
  facet_grid(SITE + SAMPLETYPE ~ ., scale = "free", space = "free") +
    labs(x = "", y = "Shannon"),
#
spp_alpha_label %>% 
  select(VENT, SITE, SAMPLETYPE, starts_with("Shannon"), starts_with("Simpson")) %>% 
  distinct() %>% 
  ggplot(aes(x = VENT, y = Simpson, fill = SITE, shape = SAMPLETYPE)) +
  geom_errorbar(aes(ymin = `Simpson-lower`, ymax = `Simpson-upper`), color = "#525252", width = 0.2) +
  geom_point(color = "#525252", size = 2, aes(fill = SITE, shape = SAMPLETYPE)) +
  scale_shape_manual(values = sampletype_symbol) +
  scale_fill_manual(values = site_color) +
  # coord_flip() +
  theme_linedraw() +
  theme(axis.text.y = element_blank(),
        panel.background = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_text(color = "black", angle = 0, vjust = 0.5, hjust = 0.5),
        legend.position = "none",
        panel.grid = element_blank(),
        axis.ticks.y = element_blank(),
        panel.border = element_blank(),
        axis.line.x = element_line()) +
  facet_grid(SITE + SAMPLETYPE ~ ., scale = "free", space = "free") +
    labs(x = "", y = "Simpson"),
ncol = 2, rel_widths = c(1,0.7))

shannon_divnet <- spp_alpha_label %>% 
  select(VENT, SITE, SAMPLETYPE, starts_with("Shannon"), starts_with("Simpson")) %>% 
  distinct() %>% 
  ggplot(aes(x = VENT, y = Shannon, fill = SITE, shape = SAMPLETYPE)) +
  geom_errorbar(aes(ymin = `Shannon-lower`, ymax = `Shannon-upper`), color = "#525252", width = 0.2) +
  geom_point(color = "#525252", size = 2, aes(fill = SITE, shape = SAMPLETYPE)) +
  scale_shape_manual(values = sampletype_symbol) +
  scale_fill_manual(values = site_color) +
  # coord_flip() +
  theme_linedraw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.background = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_text(color = "black", angle = 0, vjust = 0.5, hjust = 0.5),
        legend.position = "none",
        panel.grid = element_blank(),
        axis.ticks.y = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line()) +
  facet_grid(. ~ SITE + SAMPLETYPE, scale = "free", space = "free") +
    labs(x = "", y = "Shannon")

Figure S6.

# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/figS6.svg", h=4, w = 9)

shannon_divnet

# dev.off()

10 Network analysis

Import data frames below and format as phyloseq. Subsample to select ASVs that appear in more than 1 sample and have a total of 100 or more sequences. This leaves 2577 total ASVs across 43 samples.

Run below commands on HPC

load("data-input/asv-tables-processed-27012022.RData", verbose = T)
library(phyloseq)
# library(SpiecEasi)
# Select eukaryotes only and create wide format dataframe
insitu_wide_nosingle <- asv_insitu_qc %>%
  filter(Domain == "Eukaryota") %>%
  filter(!grepl("_Plume001_", SAMPLE)) %>% #removing "near vent background", not relevant in other data sets
  select(FeatureID, Taxon, SAMPLE, value) %>%
  pivot_wider(names_from = SAMPLE, values_from = value, values_fill = 0) %>%
  mutate(PREVALENCE = rowSums(select_if(., is.numeric) > 0),
         SEQ_TOTAL = rowSums(select_if(., is.numeric))) %>%
  filter(PREVALENCE >= 1) %>% 
  filter(SEQ_TOTAL >= 100)
#
# ASVs had to appear in more than 1 sample and have at least 100 sequences
length(unique(insitu_wide_nosingle$FeatureID))
length(unique(asv_insitu_qc$FeatureID))

# head(insitu_wide)
insitu_samples <- as.character(colnames(insitu_wide_nosingle %>% select(-Taxon, -FeatureID)))


# make matrices for phyloseq
insitu_tax_matrix <- insitu_wide_nosingle %>%
  select(FeatureID, Taxon) %>%
  separate(Taxon, c("Domain", "Supergroup",
                  "Phylum", "Class", "Order",
                  "Family", "Genus", "Species"), sep = ";") %>%
  column_to_rownames(var = "FeatureID") %>%
  as.matrix

insitu_asv_matrix <- insitu_wide_nosingle %>%
  select(-Taxon) %>%
  column_to_rownames(var = "FeatureID") %>%
  as.matrix


# Align row names for each matrix
rownames(insitu_tax_matrix) <- row.names(insitu_asv_matrix)

metadata_insitu <- metadata %>%
  filter(SAMPLE %in% insitu_samples) %>% # from reformatting df above
  select(SAMPLE, VENT, SITE, SAMPLETYPE, YEAR) %>%
  unite(SAMPLELABEL, VENT, SITE, SAMPLETYPE, YEAR, sep = "_", remove = FALSE) %>%
  unite(TYPE_SITE, SITE, SAMPLETYPE, sep = "_", remove = FALSE)

rownames(metadata_insitu) <- metadata_insitu$SAMPLE

# Import asv and tax matrices
ASV = otu_table(insitu_asv_matrix, taxa_are_rows = TRUE)
TAX = tax_table(insitu_tax_matrix)

phylo_obj <- phyloseq(ASV, TAX)

# Import metadata as sample data in phyloseq
samplenames <- sample_data(metadata_insitu)

## Check
physeq_insitu

Run SPIEC-EASI with phyloseq object.

# Run spiec easi with glasso
pargs2 <- list(rep.num = 50, seed = 10010, ncores = 10)
spec_glasso_microeuk <- spiec.easi(physeq_insitu, method = 'glasso', lambda.min.ratio=1e-2, nlambda=20,pulsar.params=pargs2)

# save(spec_glasso_microeuk, file = "spiec-easi-output-03-12-21.RData")

10.1 Results from 12-8-2021

Isolate ASV-ASV pairs of interest

# load("spiec-easi-output-03-12-21.RData", verbose = T) # almost 5GB file!

getStability(spec_glasso_microeuk) # Target == 0.05
# [1] 0.03827056
sum(getRefit(spec_glasso_microeuk))/2
# [1] 45904.5

# spec_glasso_microeuk
# Pulsar-selected refit of sparseiCov 
# Path length: 20 
# Graph dim:   2577 
# Criterion:
#   stars... sparsity 0.0138

Extract weighted matrix

# se_beta <- as.matrix(symBeta(getOptBeta(spec_glasso_microeuk)))
# df_beta <- as.data.frame(se_beta)

# Extract weight information
glasso_weight  <- cov2cor(as.matrix(getOptCov(spec_glasso_microeuk)))

colnames(glasso_weight) <- rownames(glasso_weight)# <- colnames(Networ_taxa_DF_WideMat) #here i may be able to give the verticies the taxa names if i feed it the vector of names from the levels.

weighted_adj_mat <- glasso_weight*getRefit(spec_glasso_microeuk)
df_weighted <- as.data.frame(as.matrix(weighted_adj_mat))
# Assign column and row names - from original glasso output matrix data
colnames(df_weighted) <- colnames(spec_glasso_microeuk$est$data)
row.names(df_weighted) <- colnames(spec_glasso_microeuk$est$data)

Work with weighted dataframe

key <- insitu_asv_wClass %>%
  select(FeatureID, Taxon, Domain:Species, CLASS, SITE_CLASS) %>% 
  distinct()
# head(key)

df_spieceasi <- df_weighted %>% 
 rownames_to_column(var = "sideA") %>% 
  pivot_longer(cols = -sideA, names_to = "sideB") %>% 
  left_join(key, by = c(sideA = "FeatureID")) %>% 
  left_join(key, by = c(sideB = "FeatureID"), suffix = c("_sideA", "_sideB")) %>% 
  distinct()

# 6640929 total interactions
## 6 million edged?
df_spieceasi_filtered <- df_spieceasi %>% 
  filter(abs(value) > 0.01) %>% 
  mutate(Interaction = case_when(
    value < 0 ~ "negative",
    value > 0 ~ "positive"
  ))
# Leaves 91,288 interactions
## Interaction type
# negative positive 
    # 3363    87925 

save(df_spieceasi_filtered, file = "filtered-spieceasi-result-08122021.RData")  

10.2 Bring results locally

load("data-input/filtered-spieceasi-result-08122021.RData", verbose = TRUE)
## Loading objects:
##   df_spieceasi_filtered

H0: the majority of protist-protist pairs will reveal host-parasite interactions, and then predator-prey.

Questions to ask regarding the SPIEC EASI results. - What is the overall taxonomic composition of negative and positive co-occurring ASVs? - What percentage of putative interactions include likely parasitic protists?

Format, get summary stats from network analyss.

# head(df_spieceasi_filtered)

spieceasi_rm_reps <- df_spieceasi_filtered %>% 
  mutate(TMP_ASV_REP = purrr::map2_chr(sideA, sideB, ~toString(sort(c(.x, .y))))) %>%
  select(TMP_ASV_REP, value, Interaction) %>% 
    group_by(TMP_ASV_REP, value, Interaction) %>% 
    distinct() %>% 
  ungroup() %>% 
  separate(TMP_ASV_REP, c("sideA", "sideB"), sep = ", ") %>% 
  left_join((select(df_spieceasi_filtered, ends_with("sideA")) %>% distinct())) %>% 
  left_join((select(df_spieceasi_filtered, ends_with("sideB")) %>% distinct()))

head(spieceasi_rm_reps)
## # A tibble: 6 × 26
##   sideA      sideB value Inter…¹ Taxon…² Domai…³ Super…⁴ Phylu…⁵ Class…⁶ Order…⁷
##   <chr>      <chr> <dbl> <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>  
## 1 000ee3774… 0413… 0.449 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 2 000ee3774… 0769… 0.395 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 3 000ee3774… 15d8… 0.449 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 4 000ee3774… 1849… 0.432 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 5 000ee3774… 1a9b… 0.443 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 6 000ee3774… 1c51… 0.416 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## # … with 16 more variables: Family_sideA <chr>, Genus_sideA <chr>,
## #   Species_sideA <chr>, CLASS_sideA <chr>, SITE_CLASS_sideA <chr>,
## #   Taxon_sideB <chr>, Domain_sideB <chr>, Supergroup_sideB <chr>,
## #   Phylum_sideB <chr>, Class_sideB <chr>, Order_sideB <chr>,
## #   Family_sideB <chr>, Genus_sideB <chr>, Species_sideB <chr>,
## #   CLASS_sideB <chr>, SITE_CLASS_sideB <chr>, and abbreviated variable names
## #   ¹​Interaction, ²​Taxon_sideA, ³​Domain_sideA, ⁴​Supergroup_sideA, …

Look at highest percentage of ASV-ASV pairs by various categories.

# Get stats
totaloccur <- dim(spieceasi_rm_reps)[1]

spieceasi_rm_reps %>% 
  unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
  unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
  unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
  group_by(CLASS_joined, Interaction) %>%
  summarise(COUNT = n(),
            PERC = 100*(COUNT/totaloccur)) %>% 
  arrange(desc(PERC))
## # A tibble: 63 × 4
## # Groups:   CLASS_joined [43]
##    CLASS_joined                                        Interaction COUNT  PERC
##    <chr>                                               <chr>       <int> <dbl>
##  1 Vent only-Vent only                                 positive    16727 36.6 
##  2 Vent, plume, & background-Vent, plume, & background positive     7076 15.5 
##  3 Vent only-Vent & background                         positive     1518  3.32
##  4 Vent & background-Vent only                         positive     1435  3.14
##  5 Vent only-Vent, plume, & background                 positive     1319  2.89
##  6 Vent, plume, & background-Vent only                 positive     1180  2.58
##  7 Plume only-Plume only                               positive     1099  2.41
##  8 Vent & plume-Vent only                              positive     1090  2.39
##  9 Vent only-Vent & plume                              positive      932  2.04
## 10 Vent & plume-Vent, plume, & background              positive      897  1.96
## # … with 53 more rows

ASV-ASV pairs were primarily among ASVs classified as ‘Vent only’ - both ASVs were resident. 36%

Secondly, 15% of the ASV pairs were from ASVs specifically found in all sample types.

Again, mostly all positive. The highest occurence of negative interactions were between Vent only- Vent, plume, and background.

asv_asv <- c("Vent only-Vent only", "Vent, plume, & background-Vent, plume, & background")

# Get stats
totaloccur <- dim(spieceasi_rm_reps)[1]

spieceasi_rm_reps %>% 
  unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
  unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
  unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
  filter(CLASS_joined %in% asv_asv) %>% 
  group_by(CLASS_joined) %>% 
    mutate(TOTALCOUNT = n()) %>% 
  group_by(CLASS_joined, PHYLUM_joined, Interaction, TOTALCOUNT) %>%
  summarise(COUNT = n(),
            PERC = 100*(COUNT/TOTALCOUNT)) %>%
  ungroup() %>% 
  arrange(desc(PERC)) %>% 
  distinct() %>% 
  select(CLASS_joined, PHYLUM_joined, PERC, Interaction) %>% 
  pivot_wider(names_from = CLASS_joined, values_from = PERC)
## # A tibble: 721 × 4
##    PHYLUM_joined                 Interaction `Vent only-Vent only` Vent, plume…¹
##    <chr>                         <chr>                       <dbl>         <dbl>
##  1 Ciliophora-Ciliophora         positive                   15.0          2.80  
##  2 Dinoflagellata-Dinoflagellata positive                    2.95        12.6   
##  3 Dinoflagellata-Radiolaria     positive                    0.837        5.56  
##  4 Radiolaria-Dinoflagellata     positive                    0.705        5.41  
##  5 Radiolaria-Radiolaria         positive                    0.209        3.74  
##  6 Dinoflagellata-Ciliophora     positive                    3.34         3.39  
##  7 Ciliophora-Dinoflagellata     positive                    2.86         2.87  
##  8 Cercozoa-Ciliophora           positive                    2.43         0.0519
##  9 Ciliophora-Cercozoa           positive                    2.20         0.182 
## 10 Haptophyta-Haptophyta         positive                    1.97         0.441 
## # … with 711 more rows, and abbreviated variable name
## #   ¹​`Vent, plume, & background-Vent, plume, & background`

Within ‘Vent only-Vent only’ ASV pairs, almost 15% were among ciliates (ciliate-ciliate), while for the ‘Vent, plume, & backgroun’ ASV pairs, almost 13% were between dinoflagellates.

Isolate the ASV-ASV pairs that appeared most frequently. These include ciliates, dinoflagellates, and radiolaria within the resident and cosmopolitan (the latter includes ASVs that appears at least once ALL sample types).

rad_dino <- c("Dinoflagellata-Radiolaria", "Radiolaria-Dinoflagellata", "Radiolaria-Radiolaria")

ciliate_ciliate <- spieceasi_rm_reps %>% 
  unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
  unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
  unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
  filter(CLASS_joined %in% asv_asv) %>%
  filter(PHYLUM_joined == "Ciliophora-Ciliophora")

dino_dino <- spieceasi_rm_reps %>% 
  unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
  unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
  unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
  filter(CLASS_joined %in% asv_asv) %>%
  filter(PHYLUM_joined == "Dinoflagellata-Dinoflagellata")

rad_dino <- spieceasi_rm_reps %>% 
  unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
  unite(SITE_CLASS_joined, SITE_CLASS_sideA, SITE_CLASS_sideB, sep = "-") %>%
  unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
  filter(CLASS_joined %in% asv_asv) %>%
  filter(PHYLUM_joined %in% rad_dino)
total <- dim(spieceasi_rm_reps)[1]
head(dino_dino)
## # A tibble: 6 × 25
##   sideA     sideB  value Inter…¹ Taxon…² Domai…³ Super…⁴ PHYLU…⁵ Phylu…⁶ Class…⁷
##   <chr>     <chr>  <dbl> <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>  
## 1 00878368… 6e00… 0.0964 positi… Eukary… Eukary… Alveol… Dinofl… Dinofl… Syndin…
## 2 00878368… 7fc5… 0.0947 positi… Eukary… Eukary… Alveol… Dinofl… Dinofl… Syndin…
## 3 00d8341a… 0177… 0.450  positi… Eukary… Eukary… Alveol… Dinofl… Dinofl… Syndin…
## 4 00d8341a… 1ab8… 0.449  positi… Eukary… Eukary… Alveol… Dinofl… Dinofl… Syndin…
## 5 00d8341a… 4029… 0.449  positi… Eukary… Eukary… Alveol… Dinofl… Dinofl… Syndin…
## 6 00d8341a… 60d4… 0.449  positi… Eukary… Eukary… Alveol… Dinofl… Dinofl… Syndin…
## # … with 15 more variables: Order_sideA <chr>, Family_sideA <chr>,
## #   Genus_sideA <chr>, Species_sideA <chr>, CLASS_joined <chr>,
## #   SITE_CLASS_joined <chr>, Taxon_sideB <chr>, Domain_sideB <chr>,
## #   Supergroup_sideB <chr>, Phylum_sideB <chr>, Class_sideB <chr>,
## #   Order_sideB <chr>, Family_sideB <chr>, Genus_sideB <chr>,
## #   Species_sideB <chr>, and abbreviated variable names ¹​Interaction,
## #   ²​Taxon_sideA, ³​Domain_sideA, ⁴​Supergroup_sideA, ⁵​PHYLUM_joined, …
(dim(dino_dino)[1]/total) * 100
## [1] 3.346547
(dim(ciliate_ciliate)[1]/total) * 100
## [1] 5.959395
by_phylum <- (spieceasi_rm_reps %>% unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-"))

10.2.1 Ciliate-ciliate

Isolate associations with ciliates

# head(ciliate_ciliate)
# table(ciliate_ciliate$SITE_CLASS_joined)
# table(ciliate_ciliate
ciliate_ciliate %>% 
  filter(CLASS_joined == "Vent only-Vent only") %>% 
  # filter(!is.na(Phylum_sideA) | !is.na(Phylum_sideB)) %>%
  ggplot(aes(x = Taxon_sideA, y = Taxon_sideB, fill = value)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "#bfd3e6", high = "#810f7c") +
  theme_linedraw() +
  facet_grid(Class_sideB +  Order_sideB ~ Class_sideA + Order_sideA, space = "free", scales = "free") +
  theme(axis.text = element_blank(),
      panel.grid.minor = element_blank(),
      strip.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
      strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5),
      strip.text = element_text(color = "black", face = "bold"),
      strip.background = element_blank())

Isolate functional traits from these groups of ciliates

# order <- c("Plagiopylea", "Nassophorea", "Litostomatea")
# family <- c("Scuticociliatia", "Euplotia", "Suctoria", "Peritrichia")
# genus <- c("Strombidiidae")

Tile plot of vent-only (resident) ciliate-ciliate ASV co-occurrences.

# head(ciliate_ciliate)
# hist(ciliate_ciliate$value)

ciliate_ciliate %>% 
       filter(value >= 0.4) %>% 
       select(starts_with(c("Class_", "Order_")), PHYLUM_joined) %>% 
       select(-CLASS_joined) %>% 
       # select(Taxon_sideA, Taxon_sideB, PHYLUM_joined) %>% 
       unite(SIDEA, Class_sideA, Order_sideA, sep = ";", remove = FALSE) %>%
        unite(SIDEB, Class_sideB, Order_sideB, sep = ";", remove = FALSE) %>% 
      group_by(SIDEA, SIDEB, Class_sideA, Class_sideB) %>% 
      summarise(COUNT = n()) %>% 
  ggplot(aes(x = SIDEA, y = SIDEB, fill = COUNT)) +
    geom_tile(color = "black") +
  scale_fill_gradient(low = "#bfd3e6", high = "#810f7c") +
  theme_linedraw() +
  facet_grid(Class_sideB ~ Class_sideA, space = "free", scales = "free") +
  theme(panel.grid.minor = element_blank(),
      strip.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
      strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5),
      strip.text = element_text(color = "black", face = "bold"),
      strip.background = element_blank(),
      axis.title = element_blank(),
      axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5))

10.2.2 Dinoflagellate

dino_dino %>% 
  filter(CLASS_joined == "Vent only-Vent only") %>% 
  # filter(!is.na(Phylum_sideA) | !is.na(Phylum_sideB)) %>%
  ggplot(aes(x = Taxon_sideA, y = Taxon_sideB, fill = value)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "#bfd3e6", high = "#810f7c") +
  theme_linedraw() +
  facet_grid(Class_sideB +  Order_sideB ~ Class_sideA + Order_sideA, space = "free", scales = "free") +
  theme(axis.text = element_blank(),
      panel.grid.minor = element_blank(),
      strip.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
      strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5),
      strip.text = element_text(color = "black", face = "bold"),
      strip.background = element_blank())

Dinos and others Get stats on Ciliate-other ASV pairs for a table

10.2.3 Retaria and dino

rad_dino %>% 
  filter(CLASS_joined == "Vent only-Vent only") %>% 
  # filter(!is.na(Phylum_sideA) | !is.na(Phylum_sideB)) %>%
  ggplot(aes(x = Taxon_sideA, y = Taxon_sideB, fill = value)) +
  geom_tile(color = "black") +
  scale_fill_gradient(low = "#bfd3e6", high = "#810f7c") +
  theme_linedraw() +
  facet_grid(Class_sideB +  Order_sideB ~ Class_sideA + Order_sideA, space = "free", scales = "free") +
  theme(axis.text = element_blank(),
      panel.grid.minor = element_blank(),
      strip.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5),
      strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5),
      strip.text = element_text(color = "black", face = "bold"),
      strip.background = element_blank())

10.3 Isolate most frequent co-occurrences

Information for table 3.

head(spieceasi_rm_reps)
## # A tibble: 6 × 26
##   sideA      sideB value Inter…¹ Taxon…² Domai…³ Super…⁴ Phylu…⁵ Class…⁶ Order…⁷
##   <chr>      <chr> <dbl> <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>  
## 1 000ee3774… 0413… 0.449 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 2 000ee3774… 0769… 0.395 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 3 000ee3774… 15d8… 0.449 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 4 000ee3774… 1849… 0.432 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 5 000ee3774… 1a9b… 0.443 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## 6 000ee3774… 1c51… 0.416 positi… Eukary… Eukary… Alveol… Ciliop… Nassop… Nassop…
## # … with 16 more variables: Family_sideA <chr>, Genus_sideA <chr>,
## #   Species_sideA <chr>, CLASS_sideA <chr>, SITE_CLASS_sideA <chr>,
## #   Taxon_sideB <chr>, Domain_sideB <chr>, Supergroup_sideB <chr>,
## #   Phylum_sideB <chr>, Class_sideB <chr>, Order_sideB <chr>,
## #   Family_sideB <chr>, Genus_sideB <chr>, Species_sideB <chr>,
## #   CLASS_sideB <chr>, SITE_CLASS_sideB <chr>, and abbreviated variable names
## #   ¹​Interaction, ²​Taxon_sideA, ³​Domain_sideA, ⁴​Supergroup_sideA, …
freq <- spieceasi_rm_reps %>% 
  unite(CLASS_joined, CLASS_sideA, CLASS_sideB, sep = "-") %>%
  filter(!(is.na(Phylum_sideA)) | !is.na(Phylum_sideB)) %>% 
  unite(PHYLUM_joined, Phylum_sideA, Phylum_sideB, remove = FALSE, sep = "-") %>%
  group_by(CLASS_joined, PHYLUM_joined, Taxon_sideA, Taxon_sideB) %>% 
    summarise(FREQUENCY = n()) %>% 
  arrange(desc(FREQUENCY)) %>% 
  filter(FREQUENCY > 2)

# View(freq %>% filter(grepl("Cercozoa-", PHYLUM_joined)))
# View(freq %>% filter(grepl("Haptophyta-", PHYLUM_joined)))
# View(freq %>% filter(grepl("Opalozoa-", PHYLUM_joined) | grepl("Pseudofungi-", PHYLUM_joined)))
# View(freq %>% filter(grepl("Radiolaria-", PHYLUM_joined)))
# range(freq$FREQUENCY)

10.4 Test influence of abiotic/biotic factors on community composition.

10.4.1 Set up data and test for outliers

Import phyloseq-saved RObjects.

# library(tidyverse); library(phyloseq); library(compositions); library(vegan)
load("phyloseq-objs-180122.RData", verbose = TRUE)
## Loading objects:
##   physeq_insitu
##   metadata_insitu

Isolate ASV count table using phyloseq commands and perform a CLR transformation.

# Extract phyloseq object & CLR transform
count_table_clr <- compositions::clr(otu_table(physeq_insitu))
# sample_data(physeq_insitu)
?clr()
count_table_notrans <- otu_table(physeq_insitu)

Check all samples considered and estimate distance matrix (Euclidean)

# transpose and make dist matrix
sample_data(physeq_insitu)
##                                                                                                          SAMPLE
## GordaRidge_Vent039_SUPRS1_2019                                                   GordaRidge_Vent039_SUPRS1_2019
## GordaRidge_Vent040_SUPRS2_2019                                                   GordaRidge_Vent040_SUPRS2_2019
## GordaRidge_BSW081_sterivex_2019                                                 GordaRidge_BSW081_sterivex_2019
## Axial_Boca_FS905_2013                                                                     Axial_Boca_FS905_2013
## Axial_Skadi_FS910_2014                                                                   Axial_Skadi_FS910_2014
## GordaRidge_BSW056_sterivex_2019_REPb                                       GordaRidge_BSW056_sterivex_2019_REPb
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021                     70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021
## Axial_N3Area_FS898_2013                                                                 Axial_N3Area_FS898_2013
## GordaRidge_Vent088_SUPRS3_2019                                                   GordaRidge_Vent088_SUPRS3_2019
## GordaRidge_Plume036_sterivex_2019_REPb                                   GordaRidge_Plume036_sterivex_2019_REPb
## GordaRidge_Plume096_sterivex_2019                                             GordaRidge_Plume096_sterivex_2019
## GordaRidge_Vent105_SUPRS9_2019                                                   GordaRidge_Vent105_SUPRS9_2019
## Axial_ElGuapo_FS896_2013                                                               Axial_ElGuapo_FS896_2013
## Axial_Marker33_FS908_2014                                                             Axial_Marker33_FS908_2014
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021                     80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021                             65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021         71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021           66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021       78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021
## GordaRidge_Vent041_SUPRS3_2019                                                   GordaRidge_Vent041_SUPRS3_2019
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021         72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021                       76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021
## Axial_Anemone_FS891_2013                                                               Axial_Anemone_FS891_2013
## Axial_Marker113_FS915_2015                                                           Axial_Marker113_FS915_2015
## Axial_Marker33_FS904_2013                                                             Axial_Marker33_FS904_2013
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021                                 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021
## GordaRidge_Vent086_SUPRS1_2019                                                   GordaRidge_Vent086_SUPRS1_2019
## Axial_AnemonePlume_AnemonePlume_2015                                       Axial_AnemonePlume_AnemonePlume_2015
## Axial_Marker113_FS903_2013                                                           Axial_Marker113_FS903_2013
## Axial_Marker113_FS906_2014                                                           Axial_Marker113_FS906_2014
## GordaRidge_Vent009_SUPRS1_2019                                                   GordaRidge_Vent009_SUPRS1_2019
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021                                 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021
## GordaRidge_Vent011_SUPRS3_2019                                                   GordaRidge_Vent011_SUPRS3_2019
## Axial_BSW1500m_BSW1500m_2015                                                       Axial_BSW1500m_BSW1500m_2015
## GordaRidge_Vent106_SUPRS10_2019                                                 GordaRidge_Vent106_SUPRS10_2019
## GordaRidge_Vent107_SUPRS11_2019                                                 GordaRidge_Vent107_SUPRS11_2019
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021                             74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021
## GordaRidge_Vent010_SUPRS2_2019                                                   GordaRidge_Vent010_SUPRS2_2019
## GordaRidge_Vent087_SUPRS2_2019                                                   GordaRidge_Vent087_SUPRS2_2019
## Axial_Skadi_FS902_2013                                                                   Axial_Skadi_FS902_2013
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021       67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021     69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021
##                                                                                         SAMPLELABEL
## GordaRidge_Vent039_SUPRS1_2019                                     Venti Latte_GordaRidge_Vent_2019
## GordaRidge_Vent040_SUPRS2_2019                                     Venti Latte_GordaRidge_Vent_2019
## GordaRidge_BSW081_sterivex_2019                         Shallow seawater_GordaRidge_Background_2019
## Axial_Boca_FS905_2013                                                          Boca_Axial_Vent_2013
## Axial_Skadi_FS910_2014                                                     Escargot_Axial_Vent_2014
## GordaRidge_BSW056_sterivex_2019_REPb                       Deep seawater_GordaRidge_Background_2019
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021                                Rav2_VonDamm_Vent_2020
## Axial_N3Area_FS898_2013                                                      N3Area_Axial_Vent_2013
## GordaRidge_Vent088_SUPRS3_2019                                      Candelabra_GordaRidge_Vent_2019
## GordaRidge_Plume036_sterivex_2019_REPb                       Candelabra Plume_GordaRidge_Plume_2019
## GordaRidge_Plume096_sterivex_2019                            Mt Edwards Plume_GordaRidge_Plume_2019
## GordaRidge_Vent105_SUPRS9_2019                                   Sir Ventsalot_GordaRidge_Vent_2019
## Axial_ElGuapo_FS896_2013                                                   El Guapo_Axial_Vent_2013
## Axial_Marker33_FS908_2014                                                  Marker33_Axial_Vent_2014
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021                                Rav2_VonDamm_Vent_2020
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021                                  Plume_VonDamm_Plume_2020
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021                    OldManTree_VonDamm_Vent_2020
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021                      ArrowLoop_VonDamm_Vent_2020
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021            Shrimpocalypse_Piccard_Vent_2020
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021                  LotsOShrimp_Piccard_Vent_2020
## GordaRidge_Vent041_SUPRS3_2019                                     Venti Latte_GordaRidge_Vent_2019
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021                    ShrimpHole_VonDamm_Vent_2020
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021                                  X18_VonDamm_Vent_2020
## Axial_Anemone_FS891_2013                                                    Anemone_Axial_Vent_2013
## Axial_Marker113_FS915_2015                                                Marker113_Axial_Vent_2015
## Axial_Marker33_FS904_2013                                                  Marker33_Axial_Vent_2013
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021                                 BSW_Piccard_Background_2020
## GordaRidge_Vent086_SUPRS1_2019                                      Candelabra_GordaRidge_Vent_2019
## Axial_AnemonePlume_AnemonePlume_2015                                 Anemone Plume_Axial_Plume_2015
## Axial_Marker113_FS903_2013                                                Marker113_Axial_Vent_2013
## Axial_Marker113_FS906_2014                                                Marker113_Axial_Vent_2014
## GordaRidge_Vent009_SUPRS1_2019                                      Mt Edwards_GordaRidge_Vent_2019
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021                                 BSW_VonDamm_Background_2020
## GordaRidge_Vent011_SUPRS3_2019                                      Mt Edwards_GordaRidge_Vent_2019
## Axial_BSW1500m_BSW1500m_2015                                    Deep seawater_Axial_Background_2015
## GordaRidge_Vent106_SUPRS10_2019                                  Sir Ventsalot_GordaRidge_Vent_2019
## GordaRidge_Vent107_SUPRS11_2019                                  Sir Ventsalot_GordaRidge_Vent_2019
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021                                  Plume_Piccard_Plume_2020
## GordaRidge_Vent010_SUPRS2_2019                                      Mt Edwards_GordaRidge_Vent_2019
## GordaRidge_Vent087_SUPRS2_2019                                      Candelabra_GordaRidge_Vent_2019
## Axial_Skadi_FS902_2013                                                        Skadi_Axial_Vent_2013
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021                  WhiteCastle_VonDamm_Vent_2020
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021                MustardStand_VonDamm_Vent_2020
##                                                                     VENT
## GordaRidge_Vent039_SUPRS1_2019                               Venti Latte
## GordaRidge_Vent040_SUPRS2_2019                               Venti Latte
## GordaRidge_BSW081_sterivex_2019                         Shallow seawater
## Axial_Boca_FS905_2013                                               Boca
## Axial_Skadi_FS910_2014                                          Escargot
## GordaRidge_BSW056_sterivex_2019_REPb                       Deep seawater
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021                       Rav2
## Axial_N3Area_FS898_2013                                           N3Area
## GordaRidge_Vent088_SUPRS3_2019                                Candelabra
## GordaRidge_Plume036_sterivex_2019_REPb                  Candelabra Plume
## GordaRidge_Plume096_sterivex_2019                       Mt Edwards Plume
## GordaRidge_Vent105_SUPRS9_2019                             Sir Ventsalot
## Axial_ElGuapo_FS896_2013                                        El Guapo
## Axial_Marker33_FS908_2014                                       Marker33
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021                       Rav2
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021                          Plume
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021           OldManTree
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021             ArrowLoop
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021   Shrimpocalypse
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021         LotsOShrimp
## GordaRidge_Vent041_SUPRS3_2019                               Venti Latte
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021           ShrimpHole
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021                         X18
## Axial_Anemone_FS891_2013                                         Anemone
## Axial_Marker113_FS915_2015                                     Marker113
## Axial_Marker33_FS904_2013                                       Marker33
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021                              BSW
## GordaRidge_Vent086_SUPRS1_2019                                Candelabra
## Axial_AnemonePlume_AnemonePlume_2015                       Anemone Plume
## Axial_Marker113_FS903_2013                                     Marker113
## Axial_Marker113_FS906_2014                                     Marker113
## GordaRidge_Vent009_SUPRS1_2019                                Mt Edwards
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021                              BSW
## GordaRidge_Vent011_SUPRS3_2019                                Mt Edwards
## Axial_BSW1500m_BSW1500m_2015                               Deep seawater
## GordaRidge_Vent106_SUPRS10_2019                            Sir Ventsalot
## GordaRidge_Vent107_SUPRS11_2019                            Sir Ventsalot
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021                          Plume
## GordaRidge_Vent010_SUPRS2_2019                                Mt Edwards
## GordaRidge_Vent087_SUPRS2_2019                                Candelabra
## Axial_Skadi_FS902_2013                                             Skadi
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021         WhiteCastle
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021       MustardStand
##                                                                         COORDINATES
## GordaRidge_Vent039_SUPRS1_2019                           42.7548145 N 126.7088945 W
## GordaRidge_Vent040_SUPRS2_2019                             42.754858 N 126.708922 W
## GordaRidge_BSW081_sterivex_2019                                 42.7546 N 126.743 W
## Axial_Boca_FS905_2013                                      45.927692 N 129.982482 W
## Axial_Skadi_FS910_2014                                        45.9264 N, 129.9791 W
## GordaRidge_BSW056_sterivex_2019_REPb                    42.76060928 N 126.7047891 W
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021                 18.375112, -81.797180
## Axial_N3Area_FS898_2013                                    45.943716 N 129.985163 W
## GordaRidge_Vent088_SUPRS3_2019                          42.75503414 N 126.7094585 W
## GordaRidge_Plume036_sterivex_2019_REPb                    42.7551105 N 126.709442 W
## GordaRidge_Plume096_sterivex_2019                       42.75465646 N 126.7091669 W
## GordaRidge_Vent105_SUPRS9_2019                            42.761202 N 126.7054775 W
## Axial_ElGuapo_FS896_2013                                   45.926575 N 129.979479 W
## Axial_Marker33_FS908_2014                                      45.9332 N 129.9822 W
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021                 18.375254, -81.797176
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021                     18.377600, -81.799317
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021           18.375069, -81.797678
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021            18.376659, -81.797986
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021       18.546674, -81.717806
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021          18.546789, -81.718356
## GordaRidge_Vent041_SUPRS3_2019                             42.754858 N 126.708922 W
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021           18.374893, -81.797441
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021                  18.374810, -81.797411
## Axial_Anemone_FS891_2013                                       45.9332 N 130.0137 W
## Axial_Marker113_FS915_2015                                     45.9227 N 129.9882 W
## Axial_Marker33_FS904_2013                                      45.9332 N 129.9822 W
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021                       18.547980, -81.718180
## GordaRidge_Vent086_SUPRS1_2019                           42.75506794 N 126.709613 W
## Axial_AnemonePlume_AnemonePlume_2015                      45.9335667 N 130.013667 W
## Axial_Marker113_FS903_2013                                     45.9227 N 129.9882 W
## Axial_Marker113_FS906_2014                                     45.9227 N 129.9882 W
## GordaRidge_Vent009_SUPRS1_2019                          42.75464576 N 126.7090451 W
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021                       18.374183, -81.781533
## GordaRidge_Vent011_SUPRS3_2019                            42.754692 N 126.7090115 W
## Axial_BSW1500m_BSW1500m_2015                                 46.27389 N 129.79548 W
## GordaRidge_Vent106_SUPRS10_2019                         42.76131802 N 126.7054541 W
## GordaRidge_Vent107_SUPRS11_2019                         42.76131802 N 126.7054541 W
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021                     18.546767, -81.718200
## GordaRidge_Vent010_SUPRS2_2019                            42.754692 N 126.7090115 W
## GordaRidge_Vent087_SUPRS2_2019                          42.75503414 N 126.7094585 W
## Axial_Skadi_FS902_2013                                     45.923383 N 129.982853 W
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021          18.377005, -81.798088
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021         18.375130, -81.797488
##                                                                     TYPE_SITE
## GordaRidge_Vent039_SUPRS1_2019                                GordaRidge_Vent
## GordaRidge_Vent040_SUPRS2_2019                                GordaRidge_Vent
## GordaRidge_BSW081_sterivex_2019                         GordaRidge_Background
## Axial_Boca_FS905_2013                                              Axial_Vent
## Axial_Skadi_FS910_2014                                             Axial_Vent
## GordaRidge_BSW056_sterivex_2019_REPb                    GordaRidge_Background
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021                    VonDamm_Vent
## Axial_N3Area_FS898_2013                                            Axial_Vent
## GordaRidge_Vent088_SUPRS3_2019                                GordaRidge_Vent
## GordaRidge_Plume036_sterivex_2019_REPb                       GordaRidge_Plume
## GordaRidge_Plume096_sterivex_2019                            GordaRidge_Plume
## GordaRidge_Vent105_SUPRS9_2019                                GordaRidge_Vent
## Axial_ElGuapo_FS896_2013                                           Axial_Vent
## Axial_Marker33_FS908_2014                                          Axial_Vent
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021                    VonDamm_Vent
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021                       VonDamm_Plume
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021              VonDamm_Vent
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021               VonDamm_Vent
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021          Piccard_Vent
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021             Piccard_Vent
## GordaRidge_Vent041_SUPRS3_2019                                GordaRidge_Vent
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021              VonDamm_Vent
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021                     VonDamm_Vent
## Axial_Anemone_FS891_2013                                           Axial_Vent
## Axial_Marker113_FS915_2015                                         Axial_Vent
## Axial_Marker33_FS904_2013                                          Axial_Vent
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021                    Piccard_Background
## GordaRidge_Vent086_SUPRS1_2019                                GordaRidge_Vent
## Axial_AnemonePlume_AnemonePlume_2015                              Axial_Plume
## Axial_Marker113_FS903_2013                                         Axial_Vent
## Axial_Marker113_FS906_2014                                         Axial_Vent
## GordaRidge_Vent009_SUPRS1_2019                                GordaRidge_Vent
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021                    VonDamm_Background
## GordaRidge_Vent011_SUPRS3_2019                                GordaRidge_Vent
## Axial_BSW1500m_BSW1500m_2015                                 Axial_Background
## GordaRidge_Vent106_SUPRS10_2019                               GordaRidge_Vent
## GordaRidge_Vent107_SUPRS11_2019                               GordaRidge_Vent
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021                       Piccard_Plume
## GordaRidge_Vent010_SUPRS2_2019                                GordaRidge_Vent
## GordaRidge_Vent087_SUPRS2_2019                                GordaRidge_Vent
## Axial_Skadi_FS902_2013                                             Axial_Vent
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021             VonDamm_Vent
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021            VonDamm_Vent
##                                                               SITE     SAMPLEID
## GordaRidge_Vent039_SUPRS1_2019                          GordaRidge      Vent039
## GordaRidge_Vent040_SUPRS2_2019                          GordaRidge      Vent040
## GordaRidge_BSW081_sterivex_2019                         GordaRidge       BSW081
## Axial_Boca_FS905_2013                                        Axial        FS905
## Axial_Skadi_FS910_2014                                       Axial        FS910
## GordaRidge_BSW056_sterivex_2019_REPb                    GordaRidge       BSW056
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021              VonDamm         <NA>
## Axial_N3Area_FS898_2013                                      Axial        FS898
## GordaRidge_Vent088_SUPRS3_2019                          GordaRidge      Vent088
## GordaRidge_Plume036_sterivex_2019_REPb                  GordaRidge     Plume036
## GordaRidge_Plume096_sterivex_2019                       GordaRidge     Plume096
## GordaRidge_Vent105_SUPRS9_2019                          GordaRidge      Vent105
## Axial_ElGuapo_FS896_2013                                     Axial        FS896
## Axial_Marker33_FS908_2014                                    Axial        FS908
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021              VonDamm         <NA>
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021                  VonDamm         <NA>
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021        VonDamm         <NA>
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021         VonDamm         <NA>
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021    Piccard         <NA>
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021       Piccard         <NA>
## GordaRidge_Vent041_SUPRS3_2019                          GordaRidge      Vent041
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021        VonDamm         <NA>
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021               VonDamm         <NA>
## Axial_Anemone_FS891_2013                                     Axial        FS891
## Axial_Marker113_FS915_2015                                   Axial        FS915
## Axial_Marker33_FS904_2013                                    Axial        FS904
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021                    Piccard         <NA>
## GordaRidge_Vent086_SUPRS1_2019                          GordaRidge      Vent086
## Axial_AnemonePlume_AnemonePlume_2015                         Axial AnemonePlume
## Axial_Marker113_FS903_2013                                   Axial        FS903
## Axial_Marker113_FS906_2014                                   Axial        FS906
## GordaRidge_Vent009_SUPRS1_2019                          GordaRidge      Vent009
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021                    VonDamm         <NA>
## GordaRidge_Vent011_SUPRS3_2019                          GordaRidge      Vent011
## Axial_BSW1500m_BSW1500m_2015                                 Axial     BSW1500m
## GordaRidge_Vent106_SUPRS10_2019                         GordaRidge      Vent106
## GordaRidge_Vent107_SUPRS11_2019                         GordaRidge      Vent107
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021                  Piccard         <NA>
## GordaRidge_Vent010_SUPRS2_2019                          GordaRidge      Vent010
## GordaRidge_Vent087_SUPRS2_2019                          GordaRidge      Vent087
## Axial_Skadi_FS902_2013                                       Axial        FS902
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021       VonDamm         <NA>
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021      VonDamm         <NA>
##                                                          DEPTH SAMPLETYPE YEAR
## GordaRidge_Vent039_SUPRS1_2019                          2708.0       Vent 2019
## GordaRidge_Vent040_SUPRS2_2019                          2708.0       Vent 2019
## GordaRidge_BSW081_sterivex_2019                          150.0 Background 2019
## Axial_Boca_FS905_2013                                   1516.9       Vent 2013
## Axial_Skadi_FS910_2014                                  1517.0       Vent 2014
## GordaRidge_BSW056_sterivex_2019_REPb                    2010.0 Background 2019
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021           2389.6       Vent 2020
## Axial_N3Area_FS898_2013                                 1523.0       Vent 2013
## GordaRidge_Vent088_SUPRS3_2019                          2730.0       Vent 2019
## GordaRidge_Plume036_sterivex_2019_REPb                  2725.0      Plume 2019
## GordaRidge_Plume096_sterivex_2019                       2707.0      Plume 2019
## GordaRidge_Vent105_SUPRS9_2019                          2732.0       Vent 2019
## Axial_ElGuapo_FS896_2013                                1502.0       Vent 2013
## Axial_Marker33_FS908_2014                               1514.0       Vent 2014
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021           2388.9       Vent 2020
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021               1979.0      Plume 2020
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021     2375.8       Vent 2020
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021      2309.0       Vent 2020
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 4945.0       Vent 2020
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021    4967.0       Vent 2020
## GordaRidge_Vent041_SUPRS3_2019                          2708.0       Vent 2019
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021     2376.0       Vent 2020
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021            2377.0       Vent 2020
## Axial_Anemone_FS891_2013                                1542.0       Vent 2013
## Axial_Marker113_FS915_2015                              1520.0       Vent 2015
## Axial_Marker33_FS904_2013                               1516.0       Vent 2013
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021                 4776.0 Background 2020
## GordaRidge_Vent086_SUPRS1_2019                          2730.0       Vent 2019
## Axial_AnemonePlume_AnemonePlume_2015                    1500.0      Plume 2015
## Axial_Marker113_FS903_2013                              1520.3       Vent 2013
## Axial_Marker113_FS906_2014                              1518.0       Vent 2014
## GordaRidge_Vent009_SUPRS1_2019                          2707.0       Vent 2019
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021                 2400.0 Background 2020
## GordaRidge_Vent011_SUPRS3_2019                          2707.0       Vent 2019
## Axial_BSW1500m_BSW1500m_2015                            1520.0 Background 2015
## GordaRidge_Vent106_SUPRS10_2019                         2732.0       Vent 2019
## GordaRidge_Vent107_SUPRS11_2019                         2732.0       Vent 2019
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021               4944.0      Plume 2020
## GordaRidge_Vent010_SUPRS2_2019                          2707.0       Vent 2019
## GordaRidge_Vent087_SUPRS2_2019                          2730.0       Vent 2019
## Axial_Skadi_FS902_2013                                  1561.6       Vent 2013
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021    2307.0       Vent 2020
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021   2374.0       Vent 2020
##                                                            TEMP   pH
## GordaRidge_Vent039_SUPRS1_2019                           11.000 6.40
## GordaRidge_Vent040_SUPRS2_2019                           11.000 6.40
## GordaRidge_BSW081_sterivex_2019                           8.600   NA
## Axial_Boca_FS905_2013                                     6.800 6.88
## Axial_Skadi_FS910_2014                                    6.600 5.84
## GordaRidge_BSW056_sterivex_2019_REPb                      1.800 7.80
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021            94.000 5.81
## Axial_N3Area_FS898_2013                                  18.900 4.96
## GordaRidge_Vent088_SUPRS3_2019                           79.000 5.50
## GordaRidge_Plume036_sterivex_2019_REPb                    1.700   NA
## GordaRidge_Plume096_sterivex_2019                         1.800   NA
## GordaRidge_Vent105_SUPRS9_2019                           72.000   NA
## Axial_ElGuapo_FS896_2013                                 25.700 5.42
## Axial_Marker33_FS908_2014                                18.500 5.60
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021            98.200 5.81
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021                 4.208   NA
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021     121.600 5.69
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021      137.000 5.69
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021  85.000 5.11
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021     36.000 5.92
## GordaRidge_Vent041_SUPRS3_2019                           11.000 6.40
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021      21.000 7.72
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021             48.000 6.99
## Axial_Anemone_FS891_2013                                 28.200 5.50
## Axial_Marker113_FS915_2015                               25.400 6.60
## Axial_Marker33_FS904_2013                                27.300 5.50
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021                   4.460   NA
## GordaRidge_Vent086_SUPRS1_2019                           79.000 5.50
## Axial_AnemonePlume_AnemonePlume_2015                         NA   NA
## Axial_Marker113_FS903_2013                               24.800 6.17
## Axial_Marker113_FS906_2014                               24.300 5.80
## GordaRidge_Vent009_SUPRS1_2019                           40.000 6.00
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021                   4.181   NA
## GordaRidge_Vent011_SUPRS3_2019                           40.000 6.00
## Axial_BSW1500m_BSW1500m_2015                              2.000 7.80
## GordaRidge_Vent106_SUPRS10_2019                          72.000   NA
## GordaRidge_Vent107_SUPRS11_2019                          72.000   NA
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021                 4.460   NA
## GordaRidge_Vent010_SUPRS2_2019                           40.000 6.00
## GordaRidge_Vent087_SUPRS2_2019                           79.000 5.50
## Axial_Skadi_FS902_2013                                   35.600 6.23
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021    108.000 5.49
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021   108.000 5.63
##                                                         PercSeawater   Mg
## GordaRidge_Vent039_SUPRS1_2019                                  97.0 52.1
## GordaRidge_Vent040_SUPRS2_2019                                  97.0 52.1
## GordaRidge_BSW081_sterivex_2019                                100.0 53.0
## Axial_Boca_FS905_2013                                           98.2 53.0
## Axial_Skadi_FS910_2014                                          97.4 52.6
## GordaRidge_BSW056_sterivex_2019_REPb                           100.0 53.0
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021                   33.4 18.0
## Axial_N3Area_FS898_2013                                         98.3 53.0
## GordaRidge_Vent088_SUPRS3_2019                                  88.0 36.5
## GordaRidge_Plume036_sterivex_2019_REPb                         100.0   NA
## GordaRidge_Plume096_sterivex_2019                              100.0   NA
## GordaRidge_Vent105_SUPRS9_2019                                  98.0 52.0
## Axial_ElGuapo_FS896_2013                                        92.3 49.8
## Axial_Marker33_FS908_2014                                       91.3 49.2
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021                   33.4 18.0
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021                         NA   NA
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021             25.6 13.7
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021              33.8 18.1
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021         81.7 43.9
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021            94.9 51.0
## GordaRidge_Vent041_SUPRS3_2019                                  97.0 52.1
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021             96.4 51.8
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021                    52.0 28.0
## Axial_Anemone_FS891_2013                                        88.9 47.6
## Axial_Marker113_FS915_2015                                      95.8 51.4
## Axial_Marker33_FS904_2013                                       87.1 47.0
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021                        100.0 53.7
## GordaRidge_Vent086_SUPRS1_2019                                  88.0 36.5
## Axial_AnemonePlume_AnemonePlume_2015                           100.0   NA
## Axial_Marker113_FS903_2013                                      96.0 51.5
## Axial_Marker113_FS906_2014                                      96.4 50.0
## GordaRidge_Vent009_SUPRS1_2019                                  83.0 43.6
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021                        100.0   NA
## GordaRidge_Vent011_SUPRS3_2019                                  83.0 43.6
## Axial_BSW1500m_BSW1500m_2015                                   100.0 53.6
## GordaRidge_Vent106_SUPRS10_2019                                 98.0 52.0
## GordaRidge_Vent107_SUPRS11_2019                                 98.0 52.0
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021                         NA   NA
## GordaRidge_Vent010_SUPRS2_2019                                  83.0 43.6
## GordaRidge_Vent087_SUPRS2_2019                                  88.0 36.5
## Axial_Skadi_FS902_2013                                          90.4 48.7
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021            16.6  8.9
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021           36.2 19.4
##                                                                   H2
## GordaRidge_Vent039_SUPRS1_2019                                    NA
## GordaRidge_Vent040_SUPRS2_2019                                    NA
## GordaRidge_BSW081_sterivex_2019                                   NA
## Axial_Boca_FS905_2013                                   2.763720e+00
## Axial_Skadi_FS910_2014                                  4.697134e-02
## GordaRidge_BSW056_sterivex_2019_REPb                              NA
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021           1.020000e+04
## Axial_N3Area_FS898_2013                                 5.340191e-02
## GordaRidge_Vent088_SUPRS3_2019                          2.190000e+01
## GordaRidge_Plume036_sterivex_2019_REPb                            NA
## GordaRidge_Plume096_sterivex_2019                                 NA
## GordaRidge_Vent105_SUPRS9_2019                                    NA
## Axial_ElGuapo_FS896_2013                                2.992534e-01
## Axial_Marker33_FS908_2014                               1.500000e+00
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021           1.020000e+04
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021                         NA
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021     1.160000e+04
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021      1.150000e+04
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021 0.000000e+00
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021    2.270000e+04
## GordaRidge_Vent041_SUPRS3_2019                                    NA
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021     5.527440e+00
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021                      NA
## Axial_Anemone_FS891_2013                                1.390000e+01
## Axial_Marker113_FS915_2015                              3.000000e-01
## Axial_Marker33_FS904_2013                               1.500000e+00
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021                           NA
## GordaRidge_Vent086_SUPRS1_2019                          2.190000e+01
## Axial_AnemonePlume_AnemonePlume_2015                              NA
## Axial_Marker113_FS903_2013                              1.400000e+00
## Axial_Marker113_FS906_2014                              1.000000e+00
## GordaRidge_Vent009_SUPRS1_2019                          1.270000e+02
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021                           NA
## GordaRidge_Vent011_SUPRS3_2019                          1.270000e+02
## Axial_BSW1500m_BSW1500m_2015                            2.000000e-03
## GordaRidge_Vent106_SUPRS10_2019                                   NA
## GordaRidge_Vent107_SUPRS11_2019                                   NA
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021                         NA
## GordaRidge_Vent010_SUPRS2_2019                          1.270000e+02
## GordaRidge_Vent087_SUPRS2_2019                          2.190000e+01
## Axial_Skadi_FS902_2013                                  1.023600e+00
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021    1.450000e+04
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021   9.800000e+03
##                                                                 H2S
## GordaRidge_Vent039_SUPRS1_2019                                   NA
## GordaRidge_Vent040_SUPRS2_2019                                   NA
## GordaRidge_BSW081_sterivex_2019                                  NA
## Axial_Boca_FS905_2013                                   0.007664609
## Axial_Skadi_FS910_2014                                  0.035242393
## GordaRidge_BSW056_sterivex_2019_REPb                             NA
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021           1.370000000
## Axial_N3Area_FS898_2013                                 0.512918204
## GordaRidge_Vent088_SUPRS3_2019                                   NA
## GordaRidge_Plume036_sterivex_2019_REPb                           NA
## GordaRidge_Plume096_sterivex_2019                                NA
## GordaRidge_Vent105_SUPRS9_2019                                   NA
## Axial_ElGuapo_FS896_2013                                0.218077087
## Axial_Marker33_FS908_2014                               0.267671400
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021           1.370000000
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021                        NA
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021     1.770000000
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021      1.740000000
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021          NA
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021             NA
## GordaRidge_Vent041_SUPRS3_2019                                   NA
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021              NA
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021            2.110000000
## Axial_Anemone_FS891_2013                                1.060449600
## Axial_Marker113_FS915_2015                              0.591845520
## Axial_Marker33_FS904_2013                               0.557862000
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021                          NA
## GordaRidge_Vent086_SUPRS1_2019                                   NA
## Axial_AnemonePlume_AnemonePlume_2015                             NA
## Axial_Marker113_FS903_2013                              0.746204400
## Axial_Marker113_FS906_2014                              0.569121600
## GordaRidge_Vent009_SUPRS1_2019                          1.010000000
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021                          NA
## GordaRidge_Vent011_SUPRS3_2019                          1.010000000
## Axial_BSW1500m_BSW1500m_2015                            0.000000000
## GordaRidge_Vent106_SUPRS10_2019                                  NA
## GordaRidge_Vent107_SUPRS11_2019                                  NA
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021                        NA
## GordaRidge_Vent010_SUPRS2_2019                          1.010000000
## GordaRidge_Vent087_SUPRS2_2019                                   NA
## Axial_Skadi_FS902_2013                                  0.086783554
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021    1.960000000
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021   1.790000000
##                                                                  CH4   ProkConc
## GordaRidge_Vent039_SUPRS1_2019                             0.9000000  111192.50
## GordaRidge_Vent040_SUPRS2_2019                             0.9000000  111192.50
## GordaRidge_BSW081_sterivex_2019                                   NA         NA
## Axial_Boca_FS905_2013                                      1.6377600  850000.00
## Axial_Skadi_FS910_2014                                     2.1191968         NA
## GordaRidge_BSW056_sterivex_2019_REPb                              NA   39100.00
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021           1893.6600000         NA
## Axial_N3Area_FS898_2013                                   65.5214790  257000.00
## GordaRidge_Vent088_SUPRS3_2019                            23.7000000   55076.66
## GordaRidge_Plume036_sterivex_2019_REPb                            NA   76900.00
## GordaRidge_Plume096_sterivex_2019                                 NA         NA
## GordaRidge_Vent105_SUPRS9_2019                                    NA   52998.29
## Axial_ElGuapo_FS896_2013                                   2.8743807  168000.00
## Axial_Marker33_FS908_2014                                  6.5510400  390000.00
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021           1893.6600000         NA
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021                         NA   16478.31
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021     1985.7840000         NA
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021      1893.6600000   10369.79
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021   27.5348400  238585.68
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021      11.4643200   53878.14
## GordaRidge_Vent041_SUPRS3_2019                             0.9000000  111192.50
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021      218.0268000         NA
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021            1310.2080000  111429.78
## Axial_Anemone_FS891_2013                                  15.1492800  410000.00
## Axial_Marker113_FS915_2015                                22.8262800 1500000.00
## Axial_Marker33_FS904_2013                                 19.4484000  420000.00
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021                           NA   11860.19
## GordaRidge_Vent086_SUPRS1_2019                            23.7000000   55076.66
## Axial_AnemonePlume_AnemonePlume_2015                              NA         NA
## Axial_Marker113_FS903_2013                                17.2988400  460000.00
## Axial_Marker113_FS906_2014                                38.8968000  680000.00
## GordaRidge_Vent009_SUPRS1_2019                            10.1000000   51439.52
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021                           NA   34705.92
## GordaRidge_Vent011_SUPRS3_2019                            10.1000000   51439.52
## Axial_BSW1500m_BSW1500m_2015                               0.0020472   25000.00
## GordaRidge_Vent106_SUPRS10_2019                                   NA   52998.29
## GordaRidge_Vent107_SUPRS11_2019                                   NA   52998.29
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021                         NA   51429.13
## GordaRidge_Vent010_SUPRS2_2019                            10.1000000   51439.52
## GordaRidge_Vent087_SUPRS2_2019                            23.7000000   55076.66
## Axial_Skadi_FS902_2013                                     4.4884860  565000.00
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021    2251.9200000         NA
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021   1819.9608000   56677.00
##                                                         Sample_or_Control
## GordaRidge_Vent039_SUPRS1_2019                                     Sample
## GordaRidge_Vent040_SUPRS2_2019                                     Sample
## GordaRidge_BSW081_sterivex_2019                                    Sample
## Axial_Boca_FS905_2013                                              Sample
## Axial_Skadi_FS910_2014                                             Sample
## GordaRidge_BSW056_sterivex_2019_REPb                               Sample
## 70_MCR_VonDamm_HOG_Rav2_J21238HOG14_5_Jun2021                      Sample
## Axial_N3Area_FS898_2013                                            Sample
## GordaRidge_Vent088_SUPRS3_2019                                     Sample
## GordaRidge_Plume036_sterivex_2019_REPb                             Sample
## GordaRidge_Plume096_sterivex_2019                                  Sample
## GordaRidge_Vent105_SUPRS9_2019                                     Sample
## Axial_ElGuapo_FS896_2013                                           Sample
## Axial_Marker33_FS908_2014                                          Sample
## 80_MCR_VonDamm_HOG_Rav2_J21244HOG20_0_Jun2021                      Sample
## 65_MCR_VonDamm_CTD_Plume_CTD003_0_Jun2021                          Sample
## 71_MCR_VonDamm_HOG_OldManTree_J21238HOG20_5_Jun2021                Sample
## 66_MCR_VonDamm_HOG_ArrowLoop_J21243HOG18_5_Jun2021                 Sample
## 77_MCR_Piccard_HOG_Shrimpocalypse_J21240HOG14_0_Jun2021            Sample
## 78_MCR_Piccard_HOG_LotsOShrimp_J21241HOG14_0_Jun2021               Sample
## GordaRidge_Vent041_SUPRS3_2019                                     Sample
## 72_MCR_VonDamm_HOG_ShrimpHole_J21244HOG18_0_Jun2021                Sample
## 76_MCR_VonDamm_HOG_X18_J21235HOG20_0_Jun2021                       Sample
## Axial_Anemone_FS891_2013                                           Sample
## Axial_Marker113_FS915_2015                                         Sample
## Axial_Marker33_FS904_2013                                          Sample
## 73_MCR_Piccard_CTD_BSW_CTD005_0_Jun2021                            Sample
## GordaRidge_Vent086_SUPRS1_2019                                     Sample
## Axial_AnemonePlume_AnemonePlume_2015                               Sample
## Axial_Marker113_FS903_2013                                         Sample
## Axial_Marker113_FS906_2014                                         Sample
## GordaRidge_Vent009_SUPRS1_2019                                     Sample
## 63_MCR_VonDamm_CTD_BSW_CTD002_0_Jun2021                            Sample
## GordaRidge_Vent011_SUPRS3_2019                                     Sample
## Axial_BSW1500m_BSW1500m_2015                                       Sample
## GordaRidge_Vent106_SUPRS10_2019                                    Sample
## GordaRidge_Vent107_SUPRS11_2019                                    Sample
## 74_MCR_Piccard_CTD_Plume_CTD004_5_Jun2021                          Sample
## GordaRidge_Vent010_SUPRS2_2019                                     Sample
## GordaRidge_Vent087_SUPRS2_2019                                     Sample
## Axial_Skadi_FS902_2013                                             Sample
## 67_MCR_VonDamm_HOG_WhiteCastle_J21235HOG12_5_Jun2021               Sample
## 69_MCR_VonDamm_HOG_MustardStand_J21243HOG14_5_Jun2021              Sample
# CLR-transformed data, transpose and create euclidean dist matrix
dist_euc <- vegdist(t(count_table_clr), method = "euclidean")
# ?vegdist()
# ?betadisper

# dist_jac <- vegdist(t(count_table_notrans), method = "jaccard")
# ?betadisper()
beta_SITE <- betadisper(dist_euc, sample_data(physeq_insitu)$SITE)
beta_SAMPLETYPE <- betadisper(dist_euc, sample_data(physeq_insitu)$SAMPLETYPE)
beta_TYPE_SITE <- betadisper(dist_euc, sample_data(physeq_insitu)$TYPE_SITE)

SITE

anova(beta_SITE)
## Analysis of Variance Table
## 
## Response: Distances
##           Df Sum Sq Mean Sq F value  Pr(>F)  
## Groups     3 112.43  37.475  2.7555 0.05527 .
## Residuals 39 530.41  13.600                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(beta_SITE)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                          diff        lwr      upr     p adj
## GordaRidge-Axial   -1.4140515 -5.1091060 2.281003 0.7348370
## Piccard-Axial       0.5897262 -5.0684479 6.247900 0.9922407
## VonDamm-Axial       2.8385758 -1.3238428 7.000994 0.2750595
## Piccard-GordaRidge  2.0037777 -3.5281759 7.535731 0.7660453
## VonDamm-GordaRidge  4.2526273  0.2634789 8.241776 0.0328264
## VonDamm-Piccard     2.2488496 -3.6056198 8.103319 0.7326007
plot(beta_SITE)

# View(beta_SITE$vectors) # to confirm sample that is an outlier

Sampletype

anova(beta_SAMPLETYPE)
## Analysis of Variance Table
## 
## Response: Distances
##           Df Sum Sq Mean Sq F value Pr(>F)
## Groups     2  15.19  7.5974  0.4238 0.6574
## Residuals 40 716.99 17.9247
TukeyHSD(beta_SAMPLETYPE)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                       diff       lwr      upr     p adj
## Plume-Background 1.5609149 -4.956307 8.078137 0.8299904
## Vent-Background  1.8695977 -3.075585 6.814781 0.6308843
## Vent-Plume       0.3086828 -4.636500 5.253866 0.9873595
plot(beta_SAMPLETYPE)

# View(beta_SAMPLETYPE$vectors)

Based on dispersion testing, it is clear that Boca (Axial, snowblower vent) is an outlier. Secondary to Boca, however are the plume and CTD (Background samples).

10.4.1.1 Remove outlier

Repeat dispersion test with removed samples, Boca and background.

count_table_clr_noboca <- compositions::clr(otu_table(subset_samples(physeq_insitu, VENT != "Boca")))

count_table_clr_nobocaBSW <- compositions::clr(otu_table(subset_samples(physeq_insitu, (VENT != "Boca" & SAMPLETYPE == "Vent"))))
# names(count_table_clr_nobocaBSW)

# CLR-transformed data, transpose and create euclidean dist matrix
dist_euc_noboca <- vegdist(t(count_table_clr_noboca), method = "euclidean")

dist_euc_nobocaBSW <- vegdist(t(count_table_clr_nobocaBSW), method = "euclidean")
# ?vegdist()

Re-check dispersion without Boca sample.

beta_SITE <- betadisper(dist_euc_noboca, sample_data(subset_samples(physeq_insitu, VENT != "Boca"))$SITE)
anova(beta_SITE)
## Analysis of Variance Table
## 
## Response: Distances
##           Df Sum Sq Mean Sq F value   Pr(>F)   
## Groups     3 142.05  47.351  5.0841 0.004668 **
## Residuals 38 353.92   9.314                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(beta_SITE)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                           diff        lwr      upr     p adj
## GordaRidge-Axial   -0.01488429 -3.1457852 3.116017 0.9999992
## Piccard-Axial       2.04455322 -2.6889240 6.778030 0.6550081
## VonDamm-Axial       4.33568392  0.8252432 7.846125 0.0103625
## Piccard-GordaRidge  2.05943751 -2.5237321 6.642607 0.6261842
## VonDamm-GordaRidge  4.35056821  1.0455976 7.655539 0.0057447
## VonDamm-Piccard     2.29113069 -2.5592402 7.141502 0.5878419
plot(beta_SITE)

# View(beta_SITE$vectors) # to confirm samples that are outliers

Check dispersion without Boca or background samples.

beta_SITE <- betadisper(dist_euc_nobocaBSW, sample_data(subset_samples(physeq_insitu, (VENT != "Boca" & SAMPLETYPE == "Vent")))$SITE)
anova(beta_SITE)
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq Mean Sq F value Pr(>F)
## Groups     3  51.705 17.2351  2.1788 0.1128
## Residuals 28 221.495  7.9105
TukeyHSD(beta_SITE)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                          diff        lwr      upr     p adj
## GordaRidge-Axial    0.1538572 -3.1341755 3.441890 0.9992363
## Piccard-Axial      -0.5993290 -6.5476041 5.348946 0.9925605
## VonDamm-Axial       2.9344569 -0.7081027 6.577017 0.1480828
## Piccard-GordaRidge -0.7531862 -6.6182645 5.111892 0.9848926
## VonDamm-GordaRidge  2.7805997 -0.7244550 6.285654 0.1576322
## VonDamm-Piccard     3.5337860 -2.5371468 9.604719 0.4007670
plot(beta_SITE)

# View(beta_SITE$vectors) # to confirm sample that is an outlier

10.5 Distance-based redundancy analysis

Legendre and Anderson (1999), RDA that looks for linear relationships on dissimilarity matrices. Constrained ordination with non-Euclidean distance measures. (Legendre P, Anderson MJ (1999) Distance-based redundancy analysis: testing multispecies responses in multifactorial ecological experiments. Ecol Monogr. 69:1–24.)

Below dbrda() statement inputs all samples as a distance matrix (Euclidean), with the RH side variables as numeric data following the ~.

10.5.1 Function for DBRDA analysis

With various outliers depending on the question posed, below function takes subseted data and runs dbrda() analysis.

dbrda() analysis input different subsets of phyloseq object, perform Jaccard dissimilarity.

dbrda_subset <- function(subset_df){
  # category <- enquo(category)
  # subset_df <- subset_samples(physeq_insitu, category %in% selection)
  clr_out <- compositions::clr(otu_table(subset_df))
  euc_out <- vegdist(t(clr_out), method = "euclidean") #CLR with Euclidean is aitchison
  # jac_out <- vegdist(t(otu_table(subset_df)), method = "jaccard")
  # Params
  # ?vegdist
  temp <- sample_data(subset_df)$TEMP
  ph <- sample_data(subset_df)$pH
  persea <- sample_data(subset_df)$PercSeawater
  mg <- sample_data(subset_df)$Mg
  h2 <- sample_data(subset_df)$H2
  h2s <- sample_data(subset_df)$H2S
  ch4 <- sample_data(subset_df)$CH4
  prok <- sample_data(subset_df)$ProkConc
  depth <- sample_data(subset_df)$DEPTH
  # DBRDA
   rda_out <- dbrda(euc_out ~ temp + persea + prok + h2s + mg + h2 + ch4 + depth + ph, dist = vegdist, na.action = na.omit)
  # Overall significance
  # anova(rda_out)
  # Print results by margin
  anova(rda_out, by = 'margin')
  # Plot
  # xy <- anova(rda_out, by = 'axis')
  # x <- round(xy$Variance[1], 1); y <- round(xy$Variance[2], 1)
  # plot(rda_out, xlab = x, ylab = y)
}

Usage of function, input phyloseq object with all data, it will be transformed (CLR) and dist matrix will be estimates (Euclidean). Parameters from the metadata will be formatted and dbrda() will run on all.

CLR transformed data with Euclidean distance estimate is Aitchison’s

10.6 Test sample subsets by sample type and taxa

Removed Boca outlier and isolate vent-only sites, test for significant parameters.

dbrda_subset(subset_samples(physeq_insitu, (VENT != "Boca" & SAMPLETYPE == "Vent")))
## Permutation test for dbrda under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 999
## 
## Model: dbrda(formula = euc_out ~ temp + persea + prok + h2s + mg + h2 + ch4 + depth + ph, distance = vegdist, na.action = na.omit)
##          Df Variance      F Pr(>F)
## temp      1   21.341 0.9219  0.594
## persea    1   21.864 0.9446  0.575
## prok      1   24.184 1.0448  0.443
## h2s       1   23.247 1.0043  0.476
## mg        1   21.461 0.9271  0.598
## h2        1   23.585 1.0189  0.442
## ch4       1   26.973 1.1653  0.254
## depth     1   24.958 1.0782  0.378
## ph        1   25.775 1.1135  0.338
## Residual  4   92.590

Isolate subsets of data frame by ASV or taxonomic ID.

noNAs <- filter(insitu_asv_wClass, 
                (Supergroup != "Bacteria_X" | 
                   !is.na(Supergroup) | 
                   Supergroup != "Terrabacteria" | 
                   Supergroup != "Archaea_X" | 
                   Supergroup != "Opisthokonta"))
list_of_noNAs <- as.character(noNAs$FeatureID)
# length(list_of_noNAs)
noNAs_phy <- prune_taxa(list_of_noNAs, physeq_insitu)

###
cosmo <- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")

res <- filter(insitu_asv_wClass, CLASS == "Vent only")
cos <- filter(insitu_asv_wClass, CLASS %in% cosmo)

list_of_resident <- as.character(unique(res$FeatureID))
list_of_cos <- as.character(unique(cos$FeatureID))
# length(list_of_resident); length(list_of_cos)
cosmo_only <- prune_taxa(list_of_cos, physeq_insitu)
res_vent <- prune_taxa(list_of_resident, physeq_insitu)

###
# sample_data(insitu_asv_wClass)
alvs <- filter(insitu_asv_wClass, Supergroup == "Alveolata")
list_of_alvs <- as.character(alvs$FeatureID)
alvs_phy <- prune_taxa(list_of_alvs, physeq_insitu)

##
# sample_data(insitu_asv_wClass)
dino_syn <- filter(insitu_asv_wClass, (Phylum == "Dinoflagellata" | Class == "Syndiniales"))
list_of_dino_syn <- as.character(dino_syn$FeatureID)
dino_syn_phy <- prune_taxa(list_of_dino_syn, physeq_insitu)
##
# sample_data(insitu_asv_wClass)
cili <- filter(insitu_asv_wClass, Phylum == "Ciliophora")
list_of_cili <- as.character(cili$FeatureID)
cili_phy <- prune_taxa(list_of_cili, physeq_insitu)

##
# sample_data(insitu_asv_wClass)
rhiz <- filter(insitu_asv_wClass, (Supergroup == "Rhizaria"))
list_of_rhiz <- as.character(rhiz$FeatureID)
rhiz_phy <- prune_taxa(list_of_rhiz, physeq_insitu)

##
# sample_data(insitu_asv_wClass)
stram <- filter(insitu_asv_wClass, (Supergroup == "Stramenopiles"))
list_of_stram <- as.character(stram$FeatureID)
stram_phy <- prune_taxa(list_of_stram, physeq_insitu)

10.6.1 Table 2 tests

set.seed(1245)

Test with data set, vents only. Populate Table 2.

# All samples, without Boca as an outlier

dbrda_output <- function(df, inputdata){
  allsamples <- dbrda_subset(subset_samples(df, (VENT != "Boca")))
    allsamples_df <- data.frame(variables = row.names(allsamples), sig = allsamples$`Pr(>F)`, samples = "All samples") %>% drop_na()
    
    # vent samples only, Boca removed
    ventonly <- dbrda_subset(subset_samples(df, (SAMPLETYPE == "Vent" & VENT != "Boca")))
    ventonly_df <- data.frame(variables = row.names(ventonly), sig = ventonly$`Pr(>F)`, samples = "Vent only") %>% drop_na()
    
    # MCR: Piccard and Von Damm, vents only
    mcr <- dbrda_subset(subset_samples(df, (SITE == "VonDamm" | SITE == "Piccard" | SAMPLETYPE == "Vent")))
    mcr_df <- data.frame(variables = row.names(mcr), sig = mcr$`Pr(>F)`, samples = "MCR") %>% drop_na()
    
    # GR and Axial
    NEpac <- dbrda_subset(subset_samples(df, (SITE == "GordaRidge" | SITE == "Axial" & VENT != "Boca" | SAMPLETYPE == "Vent")))
    NEpac_df <- data.frame(variables = row.names(NEpac), sig = NEpac$`Pr(>F)`, samples = "NE Pacific") %>% drop_na()
    
    # Von Damm and then Piccard only
    vd <- dbrda_subset(subset_samples(df, (SITE == "VonDamm" | SAMPLETYPE == "Vent")))
    vd_df <- data.frame(variables = row.names(vd), sig = vd$`Pr(>F)`, samples = "Von Damm") %>% drop_na()
    
    picc <- dbrda_subset(subset_samples(df, (SITE == "Piccard" | SAMPLETYPE == "Vent")))
    picc_df <- data.frame(variables = row.names(picc), sig = picc$`Pr(>F)`, samples = "Piccard") %>% drop_na()
    
    # Axial and then Gorda only
    axial <- dbrda_subset(subset_samples(df, (VENT != "Boca" & SITE == "Axial" | SAMPLETYPE == "Vent")))
    axial_df <- data.frame(variables = row.names(axial), sig = axial$`Pr(>F)`, samples = "Axial Seamount") %>% drop_na()
    
    gr <- dbrda_subset(subset_samples(df, (SITE == "GordaRidge" | SAMPLETYPE == "Vent"))); gr
    gr_df <- data.frame(variables = row.names(gr), sig = gr$`Pr(>F)`, samples = "Gorda Ridge") %>% drop_na()
    
    table2_output <- bind_rows(
      allsamples_df, ventonly_df, mcr_df, NEpac_df, vd_df, picc_df, axial_df, gr_df) %>% 
      add_column(Taxa = inputdata)
    return(table2_output)
}
table2_stats <- bind_rows(
  dbrda_output(physeq_insitu, "All taxa"),
  dbrda_output(noNAs_phy, "No NAs, no metazoa"),
  dbrda_output(res_vent, "Resident only"),
  dbrda_output(cosmo_only, "Cosmopolitan only"),
  dbrda_output(alvs_phy, "Alveolates (all)"),
  dbrda_output(dino_syn_phy, "Dinoflagelaltes & Syndiniales"),
  dbrda_output(cili_phy, "Ciliates"),
  dbrda_output(rhiz_phy, "Rhizaria"),
  dbrda_output(stram_phy, "Stramenopiles"),
)
# View(table2_stats)
# head(table2_stats)
# unique(table2_stats$Taxa)
# range(table2_output$sig)
reds <- c("#f7f7f7","#fee5d9", "#fcae91", "#fb6a4a", "#a50f15")

# Factor variables
# rm <- c("Von Damm", "Piccard", "Axial Seamount", "Gorda Ridge")

table2_stats$SAMPLES <- factor(table2_stats$samples, levels = c("All samples", "Vent only", "MCR", "NE Pacific", "Von Damm", "Piccard", "Axial Seamount", "Gorda Ridge"), labels = c("All samples", "Vent only", "Mid-Cayman Rise (vents)", "NE Pacific (vents)", "Von Damm", "Piccard", "Axial Seamount", "Gorda Ridge"))

order_taxa <- c("All taxa", "No NAs, no metazoa", "Resident only", "Cosmopolitan only", "Alveolates (all)", "Dinoflagelaltes & Syndiniales", "Ciliates", "Rhizaria", "Stramenopiles")
table2_stats$TAXA <- factor(table2_stats$Taxa, levels = rev(order_taxa))

table2_stats$VARIABLES <- factor(table2_stats$variables, 
                                 levels = c("depth", "temp", "prok", "ph", "persea", "mg", "h2", "h2s", "ch4"), 
                                 labels = c("Depth (m)", "Temperature (C)", "Cells ml^-1", "pH", "Seawater %", "Mg", "H2", "H2S", "CH4"))

# svg("dbrda-outcome-heatmap.svg", h = 4, w = 14)
table2_stats %>% 
  # filter(!(samples %in% rm)) %>% 
  ggplot(aes(x = VARIABLES, y = TAXA, fill = sig)) +
    geom_tile(stat = "identity", color = "white") +
  facet_grid(. ~ SAMPLES) +
  scale_fill_stepsn(
    limits = c(0,0.25),
    breaks = c(0.01, 0.05, 0.1),
    show.limits = TRUE,
    colors = rev(reds)) +
  theme_bw() +
  scale_x_discrete(position = "top") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0, hjust = 0, color = "black"),
        strip.background = element_blank(),
        panel.border = element_blank(),
        axis.text.y = element_text(color = "black"),
        axis.ticks = element_blank(),
        panel.grid = element_blank(),
        strip.placement = "outside") +
  labs(x = "Parameters by dataset", y = "Taxa")

# dev.off()

11 Profiles by individual vent site

axial <- c("Axial")
mcr <- c("VonDamm", "Piccard")
gr <- c("GordaRidge")
all <- c("Axial", "VonDamm", "Piccard", "GordaRidge")

11.1 Bar plot function

Create a bar plot showing the relative sequence abundance of 18S results to the Supergroup and Phylum level. Function averages across replicates and then sums to the phylum and supergroup level. Bar plot shows the relative sequence abundance.

make_bar_relabun <- function(df, selection){
  df_out <- df %>% 
  filter(SITE %in% selection) %>% 
  filter(Domain == "Eukaryota") %>%
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species,
           VENT, SITE, SAMPLETYPE, YEAR, DATASET) %>% 
    summarise(SEQ_AVG_REP = mean(value)) %>% 
  ungroup()
  supergroup <- df_out %>% 
  group_by(SITE, SAMPLETYPE, VENT, YEAR, Supergroup) %>% 
    summarise(SEQ_SUM = sum(SEQ_AVG_REP)) %>% 
  ggplot(aes(x = VENT, y = SEQ_SUM, fill = Supergroup)) +
    geom_bar(stat = "identity", position = "fill", color = "black", width = 0.9) +
    facet_grid(. ~ SITE +YEAR + SAMPLETYPE, scale = "free", space = "free") +
  theme_linedraw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        strip.background = element_blank(), strip.text = element_text(color = "black"),
        legend.position = "right") +
  scale_y_continuous(expand = c(0,0)) +
  # scale_fill_brewer(palette = "Set2") +
  scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")) +
  labs(x = "", y = "Relative abundance")
  
  phylum <- df_out %>% 
    unite(SupergroupPhylum, Supergroup, Phylum, sep = "-") %>% 
  group_by(SITE, SAMPLETYPE, VENT, YEAR, SupergroupPhylum) %>% 
    summarise(SEQ_SUM = sum(SEQ_AVG_REP)) %>% 
  ggplot(aes(x = VENT, y = SEQ_SUM, fill = SupergroupPhylum)) +
    geom_bar(stat = "identity", position = "fill", color = "black", width = 0.9) +
    facet_grid(. ~ SITE +YEAR + SAMPLETYPE, scale = "free", space = "free") +
  theme_linedraw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        strip.background = element_blank(), strip.text = element_text(color = "black"),
        legend.position = "right") +
  scale_y_continuous(expand = c(0,0)) +
  scale_fill_manual(values = c("#f1eef6", "#d7b5d8", "#df65b0", "#ce1256", "#fc9272", "#ef3b2c", 
    "#800026", "#fff7bc", "#fec44f", "#d95f0e", "#a63603", "#74c476", "#238b45", 
    "#00441b", "#7fcdbb", "#084081", "#c6dbef", "#2b8cbe", "#016c59", "#bcbddc", 
    "#807dba", "#54278f", "#bdbdbd", "black", "white", "#969696", "#525252", "#f1eef6", "#d7b5d8", "#df65b0", "#ce1256", "#fc9272", "#ef3b2c", 
    "#800026", "#fff7bc", "#fec44f", "#d95f0e", "#a63603", "#74c476", "#238b45", 
    "#00441b", "#7fcdbb", "#084081", "#c6dbef", "#2b8cbe", "#016c59", "#bcbddc", 
    "#807dba", "#54278f", "#bdbdbd", "black", "white")) +
  labs(x = "", y = "Relative abundance")
  supergroup + phylum + patchwork::plot_layout(ncol = 1)
}

# make_bar_relabun(insitu_asv_wClass, axial)

11.2 Tile plot

Relative abundance plots are misleading, as this tag-sequence data is compositional. To combat this, we can also perform a center log-ratio transformation of the sequence counts. This tile plot (or heat map) will show the relationship from the data mean. Positive values thus demonstrate an increase in the taxa, while negative values illustrate the opposite.

Ahead of the CLR transformation, average across replicates, then sum to the Class level. THEN perform CLR transformation and plot as heat map.

make_clr_trans_tile <- function(df, selection){
  df_wide <- df %>%
    filter(SITE %in% selection) %>%
  # df_wide <- insitu_asv_wClass %>% 
    # filter(SITE %in% axial) %>% 
    filter(Domain == "Eukaryota") %>% 
  # Average across replicates
      group_by(FeatureID, SAMPLENAME, VENT, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species) %>% 
        summarise(AVG = mean(value)) %>% 
      ungroup() %>% 
    # Sum to the Order taxonomic classification
    unite(SAMPLENAME_2, SAMPLENAME, VENT, sep = "_") %>% 
      group_by(SAMPLENAME_2, Supergroup, Phylum, Class) %>% 
        summarise(CLASS_SUM = sum(AVG)) %>% 
    unite(CLASS, Supergroup, Phylum, Class, sep = " ") %>% 
    select(CLASS, SAMPLENAME_2, CLASS_SUM) %>% 
    pivot_wider(names_from = SAMPLENAME_2, values_from = CLASS_SUM, values_fill = 0) %>% 
    column_to_rownames(var = "CLASS")
  ## Take wide data frame and CLR transform, pivot to wide, and plot
  data.frame(compositions::clr(df_wide)) %>% 
      rownames_to_column(var = "CLASS") %>%
      pivot_longer(cols = starts_with(selection), values_to = "CLR", names_to = "SAMPLENAME_2") %>% 
      separate(SAMPLENAME_2, c("SAMPLENAME", "VENT"), sep = "_") %>% 
      separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
      mutate(VENT = str_replace_all(VENT, "\\.", " ")) %>% 
      mutate(REGION = case_when(
        SITE == "GordaRidge" ~ "Gorda Ridge",
        SITE %in% mcr ~ "Mid-Cayman Rise",
        # SITE == "Piccard" ~ "Piccard",
        # SITE == "VonDamm" ~ "Von Damm",
        SITE == "Axial" ~ "Axial")) %>% 
      mutate(VENTNAME = case_when(
        REGION == "Gorda Ridge" ~ VENT,
        REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
        REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
      )) %>% select(-Sample_tmp) %>% 
    unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>% 
    separate(CLASS, c("Supergroup", "Phylum", "Class"), sep = " ", remove = FALSE) %>%
    ggplot(aes(x = SAMPLE, y = Class, fill = CLR)) +
      geom_tile(color = "#252525") + 
      theme(legend.position = "right", 
            panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(), 
            panel.border = element_blank(), 
            panel.background = element_blank(),
            axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, color = "black",size = 8), 
            axis.text.y = element_text(color = "black", size = 8),
            strip.background = element_blank(), 
            strip.text.y = element_text(hjust = 0, vjust = 0.5, angle = 0),
            # strip.text.x = element_blank(), 
            legend.title = element_blank()) + 
      labs(x = "", y = "") +
      scale_fill_gradient2(low = "#4575b4", mid = "white", high = "#d73027", na.value = "grey50") +
      facet_grid(Supergroup + Phylum ~ SAMPLETYPE, space = "free", scales = "free")
}

11.3 PCA

Similar to aove, the first step in this function transforms data using CLR (to ASV level though). First plot will show eigen values (scree plot to determine if 2 vs. 3 dimensions is best for data). Then function extracts data points and creates PCA plot.

make_pca <- function(df, selection){
 df_wide_asv <- df %>%
    # df_wide_asv <- insitu_asv_wClass %>%
  filter(SITE %in% selection) %>%
    # filter(SITE %in% axial) %>%
  filter(Domain == "Eukaryota") %>% 
  # Average across replicates
    group_by(FeatureID, SAMPLENAME, VENT) %>% 
    summarise(AVG = mean(value)) %>% 
    ungroup() %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION = case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  unite(SAMPLE, SAMPLETYPE, REGION, VENTNAME, sep = "_", remove = FALSE) %>% 
   group_by(FeatureID, SAMPLE) %>% 
   summarise(SUM = sum(AVG)) %>% 
  pivot_wider(names_from = FeatureID, values_from = SUM, values_fill = 0) %>% 
  column_to_rownames(var = "SAMPLE")
  # look at eigenvalues
  pca_lr <- prcomp(data.frame(compositions::clr(df_wide_asv)))
  variance_lr <- (pca_lr$sdev^2)/sum(pca_lr$sdev^2)
  ## View bar plot
  barplot(variance_lr, main = "Log-Ratio PCA Screeplot", xlab = "PC Axis", ylab = "% Variance", 
    cex.names = 1.5, cex.axis = 1.5, cex.lab = 1.5, cex.main = 1.5)
  ## Extract PCR points
  data.frame(pca_lr$x, SAMPLE = rownames(pca_lr$x)) %>% 
      separate(SAMPLE, c("SAMPLETYPE", "REGION", "VENTNAME"), sep = "_", remove = FALSE) %>% 
  ## Generate PCA plot
  ggplot(aes(x = PC1, y = PC2, shape = SAMPLETYPE, fill = VENTNAME)) +
    geom_hline(yintercept = 0) + geom_vline(xintercept = 0, color = "#525252") +
    geom_point(size=4, stroke = 1, aes(fill = VENTNAME)) +
    ylab(paste0('PC2 ',round(variance_lr[2]*100,2),'%')) +
    xlab(paste0('PC1 ',round(variance_lr[1]*100,2),'%')) +
    scale_shape_manual(values = c(21, 23, 24)) +
    scale_fill_viridis(discrete = TRUE, option = "turbo") +
    # scale_fill_manual(values = fill_color) +
    # scale_color_manual(values = color_color) +
    theme_bw() +
    theme(axis.text = element_text(color="black", size=12),
          legend.title = element_blank(),
          axis.title = element_text(color="black", size=14),
          legend.text = element_text(color = "black", size = 14),
          plot.margin = margin(2, 1, 2, 1, "cm")) +
    guides(fill = guide_legend(override.aes = list(shape = 21) ),
              shape = guide_legend(override.aes = list(fill = "black")))
  }

11.4 ASV richness

From complete dataset, average across replicates, then sum the total number of ASVs in each sample. Then plot a data point for total number of ASVs (ASV richness) by sample type - where sample type represents the vent, plume, vs. background. Box plots show the median and range.

make_asv_rich <- function(df, selection){
  df %>% 
  filter(SITE %in% selection) %>%
  filter(Domain == "Eukaryota") %>% 
  # Average across replicates
    group_by(FeatureID, SAMPLENAME, VENT, Supergroup) %>% 
    summarise(AVG = mean(value)) %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION = case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  unite(SAMPLE, REGION, VENTNAME, sep = " ", remove = FALSE) %>% 
  ungroup() %>% 
  group_by(SITE, REGION, SAMPLE, SAMPLETYPE) %>% 
    summarise(NUM_ASV = n()) %>% 
  ggplot(aes(x = SAMPLETYPE, y = NUM_ASV, shape = SAMPLETYPE)) +
  geom_boxplot(aes(group = SAMPLETYPE), alpha = 0.8, width = 0.4) +
  geom_jitter(size=2, aes(fill = SITE)) +
  scale_shape_manual(values = c(21, 23, 24)) +
  theme_bw() +
  theme(axis.text = element_text(color="black", size=12),
        legend.title = element_blank(),
        axis.title = element_text(color="black", size=14),
        legend.text = element_text(color = "black", size = 14)) +
  guides(fill = guide_legend(override.aes = list(shape = 21) ),
            shape = guide_legend(override.aes = list(fill = "black") ) ) +
  labs(x = "", y = "Total number of ASVs")
}

11.5 Presence-absence

Bar plot (colors correspond to Supergroup) represents the number of ASVs shared or unique to each sample. Combination matrix below bars shows which samples are considered for the bar plot.

make_upset_plot <- function(df, selection){ 
  df %>% 
  filter(SITE %in% selection) %>%
  filter(Domain == "Eukaryota") %>% 
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  # Taxa to supergroup
  mutate(SupergroupPhylum = SUPERGROUP) %>%
  # Average across replicates
    group_by(FeatureID, SAMPLENAME, VENT, SupergroupPhylum) %>% 
    summarise(AVG = mean(value)) %>% 
  ungroup() %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION = case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  unite(SAMPLE, SITE, SAMPLETYPE, VENT, sep = " ", remove = FALSE) %>% 
  group_by(FeatureID, SupergroupPhylum, SAMPLE) %>% 
    summarise(SUM = sum(AVG)) %>%
  # filter(SUM > 200) %>% 
  ungroup() %>%
  distinct(FeatureID, SupergroupPhylum, SUM, SAMPLE, .keep_all = TRUE) %>% 
  group_by(FeatureID, SupergroupPhylum) %>% 
  summarise(SAMPLE = list(SAMPLE)) %>% 
  ggplot(aes(x = SAMPLE)) +
    geom_bar(color = "black", width = 0.7, aes(fill = SupergroupPhylum)) +
    scale_x_upset(n_intersections = 25) +
  scale_y_continuous(expand = c(0,0)) +
  labs(x = "", y = "Shared at ASV level") +
  theme_linedraw() +
  theme(axis.text.y = element_text(color="black", size=14, face = "bold"),
        axis.text.x = element_text(color="black", size=14, face = "bold"),
        axis.title = element_text(color="black", size=14, face = "bold"),
        legend.text = element_text(color = "black", size = 12, face = "bold"),
        legend.title = element_blank(),
        panel.grid.minor = element_blank(),
        plot.margin = margin(1, 1, 1, 5, "cm")) +
  scale_fill_manual(values = all_taxa_color)
}
# all_taxa_color
#values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#deebf7", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")

Filter data to reduce noise and show sample type to vent ecosystem variability.

alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
bkgd <- c("Deep seawater", "BSW", "Shallow seawater")
plume <- c("Candelabra Plume", "Mt Edwards Plume", "Plume", "Near vent BW")
cosmo <- c("Vent, plume, & background", "Vent & background", "Vent & plume", "Plume & background")

##
tree_res_v_cosmo <- function(df, site){
  to_supergroup_vent_only <- df %>% 
    filter(SITE %in% site) %>% 
    mutate(DISTRIBUTION = case_when(
    CLASS %in% cosmo ~ "Cosmopolitan",
    CLASS == "Vent only" ~ "Resident"
    )) %>% 
    filter(Domain == "Eukaryota") %>%
    filter(Supergroup != "Opisthokonta") %>% 
    mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
           Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
           Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
           Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
    mutate(SUPERGROUP = case_when(
      Supergroup %in% alv ~ "Other Alveolata",
      Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
      Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
      Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
      Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
      Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
      Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
      TRUE ~ Supergroup
    )) %>% 
    mutate(SAMPLETYPEORDER = case_when(
      VENT %in% bkgd ~ "Background",
      VENT %in% plume ~ "Plume",
      TRUE ~ "Vent"
    )) %>% 
    filter((DISTRIBUTION == "Resident" | DISTRIBUTION == "Cosmopolitan")) %>% 
    group_by(FeatureID, Taxon, SUPERGROUP,
             VENT, SITE, SAMPLETYPE, YEAR, DATASET, SAMPLETYPEORDER, DISTRIBUTION) %>% 
    summarise(SEQ_AVG_REP = mean(value)) %>% 
    ungroup() %>% 
    group_by(SITE, SUPERGROUP, DISTRIBUTION) %>% 
    summarise(ASV_COUNT = n(),
              SEQ_SUM = sum(SEQ_AVG_REP))
  ##
  to_supergroup_vent_only %>% 
    ggplot(aes(area = SEQ_SUM, fill = SUPERGROUP, subgroup = SUPERGROUP)) +
    geom_treemap(color = "white") +
    geom_treemap_subgroup_border(colour = "white", size = 2) +
    # geom_bar(stat = "identity", position = "fill", color = "black", width = 0.9) +
    facet_grid(SITE ~ DISTRIBUTION) +
    theme_linedraw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
          strip.background = element_blank(), strip.text = element_text(color = "black"),
          legend.position = "right",
          legend.title = element_blank(),
          panel.border = element_blank()) +
    scale_y_continuous(expand = c(0,0)) +
    scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#737373", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525")) +
    labs(x = "", y = "Sequence proportion by Supergroup")
}

11.6 Top 15 taxa

Isolate the top 15 taxa

# head(insitu_asv_wClass)

all_class <- as.character(unique(insitu_asv_wClass$CLASS))

all_class_site <- as.character(unique(insitu_asv_wClass$SITE_CLASS))

top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax){
  level <- enquo(level)
  class <- enquo(class)
  plot_tax <- enquo(plot_tax)
  all_class <- as.character(unique(insitu_asv_wClass$CLASS))
  all_class_site <- as.character(unique(insitu_asv_wClass$SITE_CLASS))
out_table <- df %>% 
  # filter(SITE %in% site) %>%
  filter(!!level == taxa) %>%
  # filter(Domain %in% "Eukaryota") %>% 
  # filter(CLASS %in% all_class) %>% 
  filter(!!class %in% category) %>%
  # Average across replicates
  group_by(FeatureID, Taxon, Domain, Supergroup, Phylum, Class, Order, Family, Genus, Species,
           VENT, SITE, SAMPLETYPE, YEAR, DATASET, !!class) %>% 
    summarise(SEQ_AVG_REP = mean(value)) %>% 
  ungroup() %>% 
  # Select top 15 ASVs
  group_by(VENT, SITE, SAMPLETYPE, YEAR, DATASET) %>% 
  top_n(NUM, wt = SEQ_AVG_REP) %>% 
  unite(SAMPLENAME, SITE, SAMPLETYPE, VENT, YEAR, sep = " ", remove = FALSE)
  # 
  plot <- ggplot(out_table, aes(x = SAMPLENAME, fill = !!plot_tax)) +
    geom_bar(stat = "count", color = "black", width = 0.7) +
    coord_flip() +
    facet_grid(SAMPLETYPE + SITE ~ ., space = "free", scales = "free") +
    theme_classic() +
    scale_fill_viridis(discrete = TRUE, option = "turbo")
  ##
  plotly::ggplotly(plot)
}
  
# Function usage:
# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
# top15(insitu_asv_wClass, 10, all, Phylum, "Ciliophora", CLASS, all_class, Class)
# all_class

12 Axial Seamount analysis

  • What should I do with the year to year comparison? Individual analyses here for Axial, MCR, and GR will go in supplementary.

12.1 Bar plot relative abundance: Axial

make_bar_relabun(insitu_asv_wClass, axial)

Axial Seamount samples from archived material - span 2013, 2014, and 2015. First, the background and plume (from 2015 only, and from plume associated with the Anemone vent) are different from the vent samples - overwhelmingly stramneopile and rhizaria. For the background and plume, the stramenopiles appear to be associated with ochrophyta or opalozoa. For the plume, the rhizaria population was associated with cercozoa, while the background seawater was identified as belonging to radiolaria.

The major difference between the background/plume and vent sites was the higher relative sequence abundance of ciliates and opisthokonta. For the opisthokonta, these are primarily metazoa - and I will need to investigate this further. Exceptions for this include the ‘Dependable’ vent from 2013, which had a completely different composition, and ‘Marker 113’ in 2015, which the opisthokonta sequences were assigned choanoflagellate and fungi.

Further questions to consider

Any geochemical changes to Marker 113 from 2013/2014 to 2015? Could attribute difference of opisthokonta colonization.

12.2 Tile plot CLR: Axial

# make_clr_trans_tile(insitu_asv_wClass, axial)

Tile plot goes to the Class taxonomic level. Here at Axial, mostly the ciliate class had higher CLR values (more enriched relative to the data mean). Second to ciliates were cercozoa. Also noticing how Marker 113 2013 and 2015 are more similar to each other than 2014?

12.3 PCA: Axial

make_pca(insitu_asv_wClass, axial)
## Warning: Expected 4 pieces. Additional pieces discarded in 7717 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

While we only have 1 plume and bsw each for Axial, they are grouping together - away from vents. So that is an expected signature and likely consistent with the other sites. These colors are a little confusing, it does look like Boca is an outlier.

12.4 ASV richness: Axial

# make_asv_rich(insitu_asv_wClass, axial)

We only have 1 sample for background and plume from Axial Seamount. But this shows that the vent sites have varied ASV richness,

12.5 Presence-absence: Axial

make_upset_plot(insitu_asv_wClass, axial)
## Warning: Expected 4 pieces. Additional pieces discarded in 6856 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 723 rows containing non-finite values (stat_count).

axial_upset <- make_upset_plot(insitu_asv_wClass, axial)
## Warning: Expected 4 pieces. Additional pieces discarded in 6856 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

12.5.1 Presence-absence at Axial - year and site

alv <- c("Alveolata-Ellobiopsidae", "Alveolata-Perkinsea", "Alveolata-Unknown", "Alveolata-Chrompodellids", "Alveolata-Apicomplexa")
# svg("upsetR-bysite-sampletype-nov2.svg", h=9, w=15)
axial_loc_yr <- insitu_asv_wClass %>% 
  filter(SITE %in% axial) %>%
  filter(Domain == "Eukaryota") %>% 
  filter(Supergroup != "Opisthokonta") %>% 
  mutate(Supergroup = ifelse(is.na(Supergroup), "Unknown Eukaryota", Supergroup),
         Phylum = ifelse(is.na(Phylum), "Unknown", Phylum),
         Phylum = ifelse(Phylum == "Alveolata_X", "Ellobiopsidae", Phylum),
         Supergroup = ifelse(Supergroup == "Alveolata", paste(Supergroup, Phylum, sep = "-"), Supergroup)) %>% 
  mutate(SUPERGROUP = case_when(
    Supergroup %in% alv ~ "Other Alveolata",
    Supergroup == "Eukaryota_X" ~ "Unknown Eukaryota",
    Phylum == "Cercozoa" ~ "Rhizaria-Cercozoa",
    Phylum == "Radiolaria" ~ "Rhizaria-Radiolaria",
    Phylum == "Ochrophyta" ~ "Stramenopiles-Ochrophyta",
    Phylum == "Opalozoa" ~ "Stramenopiles-Opalozoa",
    Phylum == "Sagenista" ~ "Stramenopiles-Sagenista",
    TRUE ~ Supergroup
  )) %>% 
  # Taxa to supergroup
  mutate(SupergroupPhylum = SUPERGROUP) %>%
  # Average across replicates
    group_by(FeatureID, SAMPLENAME, VENT, SupergroupPhylum) %>% 
    summarise(AVG = mean(value)) %>% 
  ungroup() %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION = case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  filter(REGION == "Axial") %>% 
  unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>% 
  unite(YR_LOC, SAMPLETYPE, YEAR, sep = " ", remove = FALSE) %>% 
  group_by(FeatureID, SupergroupPhylum, YR_LOC) %>% 
    summarise(SUM = sum(AVG)) %>%
  # filter(SUM > 200) %>% 
  ungroup()
## Warning: Expected 4 pieces. Additional pieces discarded in 6856 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
##  
axial_loc_yr %>% 
  distinct(FeatureID, SupergroupPhylum, SUM, YR_LOC, .keep_all = TRUE) %>% 
  group_by(FeatureID, SupergroupPhylum) %>% 
  summarise(SAMPLE = list(YR_LOC)) %>% 
  ggplot(aes(x = SAMPLE)) +
    geom_bar(color = "black", width = 0.5, aes(fill = SupergroupPhylum)) +
    scale_x_upset(n_intersections = 25) +
  scale_y_continuous(expand = c(0,0)) +
  labs(x = "", y = "Shared ASVs") +
  theme_linedraw() +
  theme(axis.text.y = element_text(color="black", size=14),
        axis.text.x = element_text(color="black", size=14),
        axis.title = element_text(color="black", size=14),
        legend.text = element_text(color = "black", size = 12),
        plot.margin = margin(1, 1, 1, 5, "cm")) +
  scale_fill_manual(values = c("#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#deebf7", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525", "#fa9fb5", "#c51b8a", "#67000d", "#ef3b2c", "#ffffcc", "#feb24c", "#c7e9b4", "#1d91c0", "#253494", "#9e9ac8", "#238b45", "#54278f", "#bdbdbd", "#252525"))
## Warning: Removed 13 rows containing non-finite values (stat_count).

# dev.off()

Extract total number of ASVs from Axial data, and those that were found within the vent fluid only.

length(unique(axial_loc_yr$FeatureID))
## [1] 3586
tmp <- axial_loc_yr %>% 
  filter(grepl("Vent", YR_LOC)) %>% 
  pivot_wider(names_from = YR_LOC, values_from = SUM, values_fill = NA) %>% 
  drop_na()
head(tmp)  
## # A tibble: 6 × 5
##   FeatureID                        SupergroupPhylum      Vent …¹ Vent …² Vent …³
##   <chr>                            <chr>                   <dbl>   <dbl>   <dbl>
## 1 00b72d1a5fefb03bc39edbfba4566d03 Alveolata-Ciliophora    11403    1817     721
## 2 01a71ee728b1597b04d339505a58ed04 Stramenopiles             368      51     224
## 3 04f5a1d4ab104eeb74573c3b79de10e0 Stramenopiles             273     120     200
## 4 05b6d079805b2bb389fee9b9a7e0feba Stramenopiles-Sageni…     133      50      10
## 5 068cf0d76352f3978c0e4f1dd9457ba0 Stramenopiles             988      40     172
## 6 06cb67e2036452e926aaf2edeef3acc6 Alveolata-Dinoflagel…     544     240     140
## # … with abbreviated variable names ¹​`Vent 2013`, ²​`Vent 2014`, ³​`Vent 2015`
length(unique(tmp$FeatureID))
## [1] 177
tmp %>% 
  pivot_longer(starts_with("Vent ")) %>% 
  group_by(name, SupergroupPhylum) %>% 
  summarise(VALUE = sum(value)) %>% 
ggplot(aes(x = name, y = VALUE, fill = SupergroupPhylum)) +
  geom_bar(stat = "identity")

12.6 Tree map distribution: Axial

Plot only cosmopolitan vs. resident ASV relative abundances by Supergroup.

tree_res_v_cosmo(insitu_asv_wClass, axial)

12.7 Top 15 taxa: Axial

# Function usage:
# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)

top15(insitu_asv_wClass, 15, axial, Domain, "Eukaryota", CLASS, all_class, Phylum)
unique(insitu_asv_wClass$Phylum)
##  [1] "Sagenista"           "Radiolaria"          "Ciliophora"         
##  [4] "Ochrophyta"          "Cercozoa"            "Opalozoa"           
##  [7] "Metazoa"             NA                    "Fungi"              
## [10] "Dinoflagellata"      "Stramenopiles_X"     "Patescibacteria"    
## [13] "Apusomonadidae"      "Pseudofungi"         "Haptophyta"         
## [16] "Chlorophyta"         "Perkinsea"           "Telonemia"          
## [19] "Choanoflagellida"    "Lobosa"              "Conosa"             
## [22] "Centroheliozoa"      "Cryptophyta"         "Streptophyta"       
## [25] "Picozoa"             "Opisthokonta_X"      "Metamonada"         
## [28] "Hilomonadea"         "Mesomycetozoa"       "Chrompodellids"     
## [31] "Prasinodermophyta"   "Apicomplexa"         "Katablepharidophyta"
## [34] "Breviatea"           "Epsilonbacteraeota"  "Proteobacteria"     
## [37] "Aquificae"           "Cyanobacteria"       "Euryarchaeota"      
## [40] "Alveolata_X"         "Bacteroidetes"       "Rhodophyta"         
## [43] "Protalveolata_X"     "Discoba"
vent_only <- c("Vent only")
# Isolate top 15 vent-only ASVs from axial - at the phylum level
top15(insitu_asv_wClass, 15, axial, Domain, "Eukaryota", CLASS, vent_only, Phylum)
# Top 10 ciliate only taxa, as most were ciliates
top15(insitu_asv_wClass, 10, axial, Phylum, "Ciliophora", CLASS, vent_only, Class)
# Top 10 ciliate only taxa, as most were ciliates
top15(insitu_asv_wClass, 10, axial, Phylum, "Opalozoa", CLASS, vent_only, Class)

13 Mid-Cayman Rise

13.1 Bar plot relative abundance: MCR

# make_bar_relabun(insitu_asv_wClass, mcr)

13.2 Tile plot CLR: MCR

# make_clr_trans_tile(insitu_asv_wClass, mcr)

13.3 PCA: MCR

make_pca(insitu_asv_wClass, mcr)
## Warning: Expected 4 pieces. Additional pieces discarded in 8327 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

13.4 ASV richness: MCR

# make_asv_rich(insitu_asv_wClass, mcr)

13.5 Presence-absence: MCR

make_upset_plot(insitu_asv_wClass, mcr)
## Warning: Expected 4 pieces. Additional pieces discarded in 7678 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 1000 rows containing non-finite values (stat_count).

mcr_upset <- make_upset_plot(insitu_asv_wClass, mcr)
## Warning: Expected 4 pieces. Additional pieces discarded in 7678 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

Repeat presence-absence plot, but with a lower resolution.

insitu_asv_wClass %>% 
  filter(SITE %in% mcr) %>%
  filter(Domain == "Eukaryota") %>% 
  # Average across replicates
    group_by(FeatureID, SAMPLENAME, VENT, Supergroup) %>% 
    summarise(AVG = mean(value)) %>% 
  separate(SAMPLENAME, c("SITE", "SAMPLETYPE", "YEAR", "Sample_tmp"), remove = TRUE) %>% 
  mutate(REGION = case_when(
    SITE == "GordaRidge" ~ "Gorda Ridge",
    SITE %in% mcr ~ "Mid-Cayman Rise",
    SITE == "Axial" ~ "Axial")) %>% 
  mutate(VENTNAME = case_when(
    REGION == "Gorda Ridge" ~ VENT,
    REGION == "Mid-Cayman Rise" ~ paste(SITE, VENT, sep = " "),
    REGION == "Axial" ~ paste(VENT, YEAR, sep = " ")
  )) %>% select(-Sample_tmp) %>% 
  unite(SAMPLE, SITE, SAMPLETYPE, sep = " ", remove = FALSE) %>% 
  group_by(FeatureID, Supergroup, SAMPLE) %>% 
    summarise(SUM = sum(AVG)) %>%
  ungroup() %>% 
  distinct(FeatureID, Supergroup, SUM, SAMPLE, .keep_all = TRUE) %>% 
  group_by(FeatureID, Supergroup) %>% 
  summarise(SAMPLE = list(SAMPLE)) %>% 
  ggplot(aes(x = SAMPLE)) +
    geom_bar(color = "black", width = 0.5, aes(fill = Supergroup)) +
    scale_x_upset(n_intersections = 15) +
  scale_y_continuous(expand = c(0,0)) +
  labs(x = "", y = "Shared ASVs") +
  theme_linedraw() +
  theme(axis.text = element_text(color="black", size=10),
        axis.title = element_text(color="black", size=10),
        legend.text = element_text(color = "black", size = 10),
        plot.margin = margin(1, 1, 1, 5, "cm")) +
    scale_fill_manual(values = c("#f1eef6", "#d7b5d8", "#df65b0", "#ce1256", "#fc9272", "#ef3b2c", 
    "#800026", "#fff7bc", "#fec44f", "#d95f0e", "#a63603", "#74c476", "#238b45", 
    "#00441b", "#7fcdbb", "#084081", "#c6dbef", "#2b8cbe", "#016c59", "#bcbddc", 
    "#807dba", "#54278f", "#bdbdbd", "black"))
## Warning: Expected 4 pieces. Additional pieces discarded in 8327 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 331 rows containing non-finite values (stat_count).

13.6 Top 15 taxa: MCR

# Function usage:
# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
top15(insitu_asv_wClass, 15, mcr, Domain, "Eukaryota", CLASS, all_class, Phylum)
# unique(insitu_asv_wClass$CLASS)
vent_only <- c("Vent only")
# Isolate top 15 vent-only ASVs from MCR - at the phylum level
top15(insitu_asv_wClass, 15, mcr, Domain, "Eukaryota", CLASS, vent_only, Phylum)
# Top 10 ciliate only taxa, as most were ciliates
top15(insitu_asv_wClass, 10, mcr, Phylum, "Ciliophora", CLASS, vent_only, Class)

14 Gorda Ridge

## Bar plot relative abundance: GR

make_bar_relabun(insitu_asv_wClass, gr)

14.1 Tile plot CLR: GR

# make_clr_trans_tile(insitu_asv_wClass, gr)

14.2 PCA: GR

make_pca(insitu_asv_wClass, gr)
## Warning: Expected 4 pieces. Additional pieces discarded in 9456 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

14.3 ASV richness: GR

# make_asv_rich(insitu_asv_wClass, gr)

14.4 Presence-absence: GR

make_upset_plot(insitu_asv_wClass, gr)
## Warning: Expected 4 pieces. Additional pieces discarded in 8710 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Warning: Removed 731 rows containing non-finite values (stat_count).

gr_upset <- make_upset_plot(insitu_asv_wClass, gr)
## Warning: Expected 4 pieces. Additional pieces discarded in 8710 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

14.5 Compile upsetR Fig S3.

# svg("../../../Manuscripts_presentations_reviews/deepsea-microeuk-ME/vectorgraphic-figures/figs3-upsetRpanels.svg", h = 17, w = 20)
(genus_upset + theme(legend.position = "none")) + (axial_upset + theme(legend.position = "none")) + (gr_upset + theme(legend.position = "none")) + (mcr_upset + theme(legend.position = "none")) + patchwork::plot_layout(nrow = 2, ncol = 2) + plot_annotation(tag_levels = "a")
## Warning: Removed 316 rows containing non-finite values (stat_count).
## Warning: Removed 723 rows containing non-finite values (stat_count).
## Warning: Removed 731 rows containing non-finite values (stat_count).
## Warning: Removed 1000 rows containing non-finite values (stat_count).

# dev.off()

14.6 Top 15 taxa: GR

# Function usage:
# top15 <- function(df, NUM, site, level, taxa, class, category, plot_tax)
top15(insitu_asv_wClass, 15, gr, Domain, "Eukaryota", CLASS, all_class, Phylum)
# unique(insitu_asv_wClass$CLASS)
vent_only <- c("Vent only")
# Isolate top 15 vent-only ASVs from GR - at the phylum level
top15(insitu_asv_wClass, 15, gr, Domain, "Eukaryota", CLASS, vent_only, Phylum)
# Top 10 ciliate only taxa, as most were ciliates
top15(insitu_asv_wClass, 10, gr, Phylum, "Ciliophora", CLASS, vent_only, Class)

15 Session Info

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS/LAPACK: /Users/sarahhu/anaconda3/envs/r_4.1/lib/libopenblasp-r0.3.15.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] DivNet_0.3.7       breakaway_4.7.3    geosphere_1.5-14   ggdendro_0.1.22   
##  [5] vegan_2.5-7        lattice_0.20-45    permute_0.9-5      viridis_0.6.2     
##  [9] viridisLite_0.4.1  plotly_4.10.0      treemapify_2.5.5   gt_0.3.1          
## [13] ggupset_0.3.0      ggpubr_0.4.0       patchwork_1.1.1    compositions_2.0-4
## [17] decontam_1.12.0    phyloseq_1.36.0    cowplot_1.1.1      forcats_0.5.1     
## [21] stringr_1.4.0      dplyr_1.0.9        purrr_0.3.4        readr_2.1.2       
## [25] tidyr_1.2.0        tibble_3.1.7       ggplot2_3.3.6      tidyverse_1.3.1   
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1           backports_1.4.1        plyr_1.8.7            
##   [4] igraph_1.3.1           lazyeval_0.2.2         sp_1.5-0              
##   [7] splines_4.1.0          crosstalk_1.2.0        GenomeInfoDb_1.28.4   
##  [10] digest_0.6.29          foreach_1.5.1          htmltools_0.5.2       
##  [13] fansi_1.0.3            magrittr_2.0.3         checkmate_2.0.0       
##  [16] doParallel_1.0.16      cluster_2.1.3          tzdb_0.2.0            
##  [19] Biostrings_2.60.2      ggfittext_0.9.1        modelr_0.1.8          
##  [22] bayesm_3.1-4           colorspace_2.0-3       rvest_1.0.2           
##  [25] haven_2.4.3            xfun_0.29              crayon_1.5.1          
##  [28] RCurl_1.98-1.3         jsonlite_1.8.0         lme4_1.1-27.1         
##  [31] survival_3.3-1         iterators_1.0.13       ape_5.6-2             
##  [34] glue_1.6.2             gtable_0.3.0           zlibbioc_1.38.0       
##  [37] XVector_0.32.0         car_3.0-12             Rhdf5lib_1.14.2       
##  [40] BiocGenerics_0.38.0    DEoptimR_1.0-11        abind_1.4-5           
##  [43] scales_1.2.1           DBI_1.1.2              rstatix_0.7.0         
##  [46] Rcpp_1.0.8             stats4_4.1.0           htmlwidgets_1.5.4     
##  [49] httr_1.4.3             ellipsis_0.3.2         pkgconfig_2.0.3       
##  [52] farver_2.1.0           sass_0.4.0             dbplyr_2.2.1          
##  [55] utf8_1.2.2             tidyselect_1.1.2       labeling_0.4.2        
##  [58] rlang_1.0.4            reshape2_1.4.4         polynom_1.4-0         
##  [61] munsell_0.5.0          cellranger_1.1.0       tools_4.1.0           
##  [64] cachem_1.0.6           cli_3.3.0              generics_0.1.3        
##  [67] ade4_1.7-18            broom_1.0.0            evaluate_0.15         
##  [70] biomformat_1.20.0      fastmap_1.1.0          yaml_2.3.5            
##  [73] knitr_1.39             fs_1.5.2               robustbase_0.93-9     
##  [76] nlme_3.1-158           mvnfast_0.2.7          xml2_1.3.3            
##  [79] compiler_4.1.0         rstudioapi_0.13        ggsignif_0.6.3        
##  [82] reprex_2.0.1           bslib_0.4.0            stringi_1.7.5         
##  [85] highr_0.9              Matrix_1.4-1           nloptr_1.2.2.3        
##  [88] tensorA_0.36.2         multtest_2.48.0        vctrs_0.4.1           
##  [91] pillar_1.8.1           lifecycle_1.0.1        rhdf5filters_1.4.0    
##  [94] jquerylib_0.1.4        data.table_1.14.0      bitops_1.0-7          
##  [97] R6_2.5.1               gridExtra_2.3          IRanges_2.26.0        
## [100] codetools_0.2-18       boot_1.3-28            MASS_7.3-57           
## [103] assertthat_0.2.1       rhdf5_2.36.0           withr_2.5.0           
## [106] S4Vectors_0.30.2       GenomeInfoDbData_1.2.6 mgcv_1.8-36           
## [109] parallel_4.1.0         hms_1.1.1              grid_4.1.0            
## [112] minqa_1.2.4            rmarkdown_2.13         carData_3.0-5         
## [115] Biobase_2.52.0         lubridate_1.8.0