All analyses were performed in R version 3.6.2. This is the code that accompanies the publication XXX. Here you will find all the code to repeat the statistical analyses performed in R and the figures created for the manuscript. The accompanying library preparation protocol and bioinformatic walkthrough can be found by clicking on the links.
Code to produce these figures was generated with help from Ryan Eckert, if you think this walkthrough is helpful check out similar ones for Symbiodiniaceae ITS2 analyses here.
If you have any issues or questions about the code please feel free to send an email to lexie.sturm@gmail.com.
Make sure to set the working directory: setwd("~/path/to/directory/with/data")
setwd("../data")
For the following analyses we will require the use of multiple different R packages, we can use the package pacman to quickly load them.
if (!require("pacman")) install.packages("pacman")
pacman::p_load("adegenet", "dendextend", "flextable", "ggdendro", "hierfstat", "Imap", "kableExtra", "paletteer", "patchwork", "officer", "poppr", "RColorBrewer", "reshape2", "StAMPP", "tidyverse", "vcfR", "vegan", "WGCNA")
This figure was created in ArcMap and Adobe Illustrator
Sampling site | Sampling date | Depth range (m) | ng | Latitude | Longitude |
Banco de San Antonio | 21-May-17 | 40-75 | 2 | 22.01502 | -84.99913 |
Guanahacabibes | 23-May-17 | 3-5 | 12 | 21.79668 | -84.51667 |
Isla de la Juventud | 25-May-17 | 3-5 | 3 | 21.63063 | -83.21420 |
Cayo Anclitas | 1-Jun-17 | 3-5 | 14 | 20.80792 | -78.95550 |
Chivirico | 3-Jun-17 | 3-5 | 11 | 19.92550 | -76.40367 |
Cabo Lucrecia | 6-Jun-17 | 3-5 | 16 | 21.07500 | -75.63833 |
Cayo Sabinal | 7-Jun-17 | 1-4 | 12 | 21.67833 | -77.16667 |
Cayo Jutias | 9-Jun-17 | 1-2 | 8 | 22.98263 | -79.80660 |
The following chunk of code uses the IBS matrix generated from ANGSD to produce an Identity-By-State Dendrogram which we can use to ensure that technical replicates are clustering together and to identify natural clones from the SNP dataset.
cloneBams = read.table("sample_list")[,1] # list of bam files
cloneMa = as.matrix(read.table("trimmed_mc_w_clones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(cloneMa) = list(cloneBams,cloneBams)
clonesHc = hclust(as.dist(cloneMa),"ave")
cloneMeta = read.table("clonePops.txt", sep="\t") # list of bams files and their populations
clonePops = cloneMeta$V2
cloneDend = cloneMa %>% as.dist() %>% hclust(.,"ave") %>% as.dendrogram()
cloneDData = cloneDend %>% dendro_data()
plotPops = c("Banco de San Antonio", "Cabo Lucrecia", "Cayo Anclitas", "Cayo Jutías",
"Cayo Sabinal", "Chivirico", "Guanahacabibes", "Isla de la Juventud")
westSites = c("Banco de San Antonio", "Guanahacabibes", "Isla de la Juventud", "Cayo Jutias")
# Making the branches hang shorter so we can easily see clonal groups
cloneDData$segments$yend2 = cloneDData$segments$yend
for(i in 1:nrow(cloneDData$segments)) {
if (cloneDData$segments$yend2[i] == 0) {
cloneDData$segments$yend2[i] = (cloneDData$segments$y[i] - 0.03)}}
cloneDendPoints = cloneDData$labels
cloneDendPoints$pop = clonePops[order.dendrogram(cloneDend)]
rownames(cloneDendPoints) = cloneDendPoints$label
# Making points at the leaves to place symbols for populations
point = as.vector(NA)
for(i in 1:nrow(cloneDData$segments)) {
if (cloneDData$segments$yend[i] == 0) {
point[i] = cloneDData$segments$y[i] - 0.03
} else {
point[i] = NA}}
cloneDendPoints$y = point[!is.na(point)]
cloneDendPoints = cloneDendPoints %>%
mutate(location = ifelse(.$pop %in% westSites, "Western", "Eastern"))
head(cloneDendPoints)
## x y label pop location
## 1 1 0.1007140 44-1 Chivirico Eastern
## 2 2 0.0973660 44-2 Chivirico Eastern
## 3 3 0.0973660 44-3 Chivirico Eastern
## 4 4 0.2535695 45 Chivirico Eastern
## 5 5 0.1114820 41 Chivirico Eastern
## 6 6 0.1114820 43 Chivirico Eastern
techReps = c("44-1", "44-2", "44-3", "61-1", "61-2", "61-3", "18-1", "18-2", "18-3")
cloneDendA = ggplot() +
geom_segment(data = segment(cloneDData), aes(x = x, y = y, xend = xend, yend = yend2), size = 0.5) +
geom_point(data = cloneDendPoints, aes(x = x, y = y, fill = pop, shape = location), size = 4, stroke = 0.25) +
geom_hline(yintercept = 0.15, color = "red", lty = 5, size = 0.75) + # creating a dashed line to indicate a clonal distance threshold
geom_text(data = subset(cloneDendPoints, subset = label %in% techReps), aes(x = x, y = (y - .015), label = label), angle = 90) + # spacing technical replicates further from leaf
geom_text(data = subset(cloneDendPoints, subset = !label %in% techReps), aes(x = x, y = (y - .010), label = label), angle = 90) +
geom_text(data = subset(cloneDendPoints, subset = label %in% c("03", "04", "41", "43")), aes(x = (x + .5), y = (y - .018), label = "*"), angle = 90, size = 6) + # labeling natural clones with asterisks
scale_shape_manual(values = c(21, 23)) +
scale_fill_brewer(palette = "Dark2", name = "Population", labels = plotPops) +
guides(fill = guide_legend(override.aes = list(shape = c(23, 21, 21, 23, 21, 21, 23, 23), size = 4), order = 1, ncol = 2), shape = F) +
coord_cartesian(xlim = c(3, 84)) +
labs(y = "Genetic distance (1 - IBS)") +
theme_classic()
cloneDend = cloneDendA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 12, color = "black", angle = 90),
axis.text.y = element_text(size = 10, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
legend.key = element_blank(),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
legend.position = c(.5, .12))
cloneDend
From this dendrogram you can see that there are two pairs of samples (indicated by their asterisks) that exhibit similar distances as the technical replicates. These are natural clones which agree with the clones identified in the microsatellite dataset based on multi-locus genotypes.
Save the dendrogram:
ggsave("../figures/Figure2.eps", plot = cloneDend, height = 4.75, width = 13, units = "in", dpi = 300)
ggsave("../figures/Figure2.tiff", plot = cloneDend, height = 4.75, width = 13, units = "in", dpi = 300)
Observed and expected heterozygosity values were calculated for microsatellites in GenAlEx and for SNPs using the populations function in the popular RADseq analysis program, STACKS. These values and their standard error’s are incorporated in the “Heterozygosity_Values.csv” file.
cubaHetero = read.csv("Heterozygosity_Values.csv", header = TRUE)
hetPlotA = ggplot(cubaHetero, aes(x = Analysis.Type, y = Heterozygosity, fill = Population,
alpha = Type)) +
geom_bar(position = position_dodge(), stat = "identity", color = 'black') +
scale_y_continuous(expand = c(0, 0)) +
geom_errorbar(aes(ymin = Heterozygosity-Error, ymax = Heterozygosity+Error),
width = 0.6, position = position_dodge(0.9)) +
scale_fill_brewer(palette = "Dark2", labels = plotPops) +
scale_alpha_manual(values = c(1, 0.75), name = "Heterozygosity") +
labs(x ="Analysis Type", y = "Heterozygosity") +
guides(fill = guide_legend(order = 1)) +
theme_classic()
hetPlot = hetPlotA + theme(panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(color = "black"),
axis.text = element_text(size = 14, color = "black"),
axis.title.y = element_text(size = 14),
legend.title = element_text(size = 14),
legend.text = element_text(size = 14),
axis.title.x = element_blank())
hetPlot
The plot was edited in Illustrator to give the bars crosshatching texture instead of transparency.
ggsave("../figures/Figure3.tiff", plot = hetPlot, width = 25, height = 15, units = "cm", dpi = 300)
ggsave("../figures/Figure3.eps", plot = hetPlot, width = 25, height = 15, units = "cm", dpi = 300)
The program poppr can read in microsatallite multi-locus genotypes exported from GenAlEx as .csv files and can calculate a variety of distance metrics from this data. For microsatellite data with some missing loci, they recommend using Prevosti’s distance.
genAlEx = read.genalex("cuba_trimmed_microsats_genalex.csv")
cubaMicrosatDist = prevosti.dist(genAlEx)
msNoClones = read.table("microsat-list")[,1] # list of bam file
msMa = cubaMicrosatDist
msI2P = read.table("samples2pops",sep = "\t") # 2-column tab-delimited table of individual assignments to populations; must be in the same order as samples in the bam list or vcf file.
row.names(msI2P) = msI2P[,1]
msDend = msMa %>% scale %>% dist %>%
hclust %>% as.dendrogram
westSites = c("Banco de San Antonio", "Guanahacabibes", "Isla de la Juventud", "Cayo Jutias")
msDData = dendro_data(msDend)
msDendPoints = msDData$labels %>%
mutate(site = msI2P[,2][order.dendrogram(msDend)]) %>%
mutate(location = ifelse(.$site %in% westSites, "Western", "Eastern"))
msDendA = ggplot() +
geom_segment(data = segment(msDData), aes(x = x, y = y, xend = xend, yend = yend)) +
geom_point(data = msDendPoints, aes(x = x, y = y, fill = site, shape = location), size = 5) +
scale_shape_manual(values = c(21, 23)) +
scale_fill_brewer(palette = "Dark2", name = "Population", labels = plotPops) +
guides(fill = guide_legend(override.aes = list(shape = c(23, 21, 21, 23, 21, 21, 23, 23), size = 4), order = 1), shape = F) +
ggtitle(" Microsatellite") +
theme_dendro()
msDend = msDendA + theme(
legend.key = element_blank(),
plot.title = element_text(size = 18, color = "black"),
legend.title = element_text(size = 18,color = "black"),
legend.text = element_text(size = 16,color = "black"))
msDend
This dendrogram is similar to Figure 1 but the natural clones and technical replicates are removed allowing us to better identify other patterns.
bamsNoClones = read.table("bams_noclones")[,1] # list of bam file
snpMa = as.matrix(read.table("trimmed_mc.ibsMat"))
snpI2P = read.table("inds2pops_noclones.txt",sep = "\t") # 2-column tab-delimited table of individual assignments to populations; must be in the same order as samples in the bam list or vcf file.
row.names(snpI2P) = snpI2P[,1]
snpDend = snpMa %>% scale %>% dist %>%
hclust %>% as.dendrogram
snpDData = dendro_data(snpDend)
snpDendPoints = snpDData$labels %>%
mutate(site = snpI2P[,2][order.dendrogram(snpDend)]) %>%
mutate(location = ifelse(.$site %in% westSites, "Western", "Eastern"))
snpDendA = ggplot() +
geom_segment(data = segment(snpDData), aes(x = x, y = y, xend = xend, yend = yend)) +
geom_point(data = snpDendPoints, aes(x = x, y = y, fill = site, shape = location), size = 5) +
scale_shape_manual(values = c(21, 23)) +
scale_fill_brewer(palette = "Dark2", name = "Population", labels = plotPops) +
guides(fill = guide_legend(override.aes = list(shape = c(23, 21, 21, 23, 21, 21, 23, 23), size = 4), order = 1), shape = F) +
ggtitle(" SNP") +
theme_dendro()
snpDend = snpDendA + theme(
legend.key = element_blank(),
plot.title = element_text(size = 18, color = "black"),
legend.title = element_text(size = 18,color = "black"),
legend.text = element_text(size = 16,color = "black"))
snpDend
patchwork is a really easy program to combine ggplot figures, collapse legends, and add letter annotations to the plots.
Please note that for all of these figures, figures generated from the microsatellite dataset will be labeled a and those generated from the SNP dataset will be labeled b.
combinedDendro = (msDend / snpDend) +
plot_layout(guides = "collect") + # collapses the legends into one
plot_annotation(tag_level = "a") & # labels the plots a and b
theme(plot.tag = element_text(size = 20, face = "bold"))
combinedDendro
ggsave("../figures/Figure4.tiff", plot = combinedDendro, width = 45, height = 16, units = "cm", dpi = 300)
ggsave("../figures/Figure4.eps", plot = combinedDendro, width = 45, height = 16, units = "cm", dpi = 300)
cubaVcf = read.vcfR("trimmed_mc_renamed.vcf.gz") #This vcf file has all of your SNP genotype likelihoods and is generated by ANGSD
## Scanning file to determine attributes.
## File attributes:
## meta lines: 12
## header_line: 13
## variant count: 9720
## column count: 87
##
Meta line 12 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 9720
## Character matrix gt cols: 87
## skip: 0
## nrows: 9720
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant: 9720
## All variants processed
cubaGenlight = vcfR2genlight(cubaVcf, n.cores = 2) # Converts the vcf file into a file format that poppr uses the "genlight" format
locNames(cubaGenlight) = paste(cubaVcf@fix[,1],cubaVcf@fix[,2],sep="_")
westSites = c("BancodeSanAntonio", "Guanahacabibes", "IsladelaJuventud", "CayoJutias")
popData = read.table("vcf_popmap") # Reads in population data for each sample
popData$depth = factor("Shallow", levels = c("Shallow", "Mesophotic")) # Stratifies by depth zone
popData$depth[c(1:2)] = "Mesophotic"
popData$location = factor(ifelse(popData$V2 %in% westSites, "Western", "Eastern"))
levels(popData$depth)
## [1] "Shallow" "Mesophotic"
colnames(popData) = c("sample","population", "depth", "location")
strata(cubaGenlight) = data.frame(popData)
setPop(cubaGenlight) = ~population
head(cubaGenlight)
## /// GENLIGHT OBJECT /////////
##
## // 1 genotypes, 9,720 binary SNPs, size: 991.3 Kb
## 0 (0 %) missing data
##
## // Basic content
## @gen: list of 1 SNPbin
##
## // Optional content
## @ind.names: 1 individual labels
## @loc.names: 9720 locus labels
## @chromosome: factor storing chromosomes of the SNPs
## @position: integer storing positions of the SNPs
## @pop: population of each individual (group size range: 1-1)
## @strata: a data frame with 4 columns ( sample, population, depth, location )
## @other: a list containing: elements without names
snpPca1 = glPca(cubaGenlight, nf = 77, n.cores = 1)
snpPcaValues = as.data.frame(snpPca1$scores)
snpPca = data.frame(row.names = rownames(snpPcaValues), sample = rownames(snpPcaValues),
site = popData$population, depth = popData$depth, location = popData$location,
PC1 = snpPcaValues[,1], PC2 = snpPcaValues[,2])
head(snpPca)
## sample site depth location PC1 PC2
## 01 01 BancodeSanAntonio Mesophotic Western -14.567230 -21.893995
## 02 02 BancodeSanAntonio Mesophotic Western -15.510039 -20.838103
## 04 04 Guanahacabibes Shallow Western 2.966255 -2.134265
## 06 06 Guanahacabibes Shallow Western 3.606531 -1.301699
## 09 09 Guanahacabibes Shallow Western -9.017592 5.144301
## 10 10 Guanahacabibes Shallow Western -15.855929 4.296339
# percentage of explained variance by axes
snpPcVar = round(snpPca1$eig/sum(snpPca1$eig)*100, 1)
# merge centroid locations into ggplot dataframe
snpPca = merge(snpPca, aggregate(cbind(mean.x=PC1,mean.y=PC2)~site,snpPca, mean), by="site")
# SNP PCA biplot
plotPops = c("Banco de San Antonio", "Cabo Lucrecia", "Cayo Anclitas", "Cayo Jutías",
"Cayo Sabinal", "Chivirico", "Guanahacabibes", "Isla de la Juventud")
cubaSnpPcaPlotA = ggplot(snpPca, aes(x = PC1, y = PC2, color = site, fill = site, shape = location)) +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
stat_ellipse(data = subset(snpPca, subset = !site %in% c("BancodeSanAntonio", "IsladelaJuventud")), type = "t", geom = "polygon", alpha = 0.1) + #ellipse
geom_segment(data = subset(snpPca, subset = site %in% c("BancodeSanAntonio", "IsladelaJuventud")), aes(xend = mean.x, yend = mean.y, color = site)) + #spider
geom_point(shape = 4, stroke = 0.75, size = 3) + #individual's points indicated by circles
geom_point(aes(x = mean.x, y = mean.y, shape = location), size = 4, color = "black") + #population centroids indicated by triangles
scale_shape_manual(values = c(21, 23), name = "Location") +
scale_fill_manual(values = brewer.pal(name = "Dark2", n = 8), name = " Population", labels = plotPops) +
scale_color_manual(values = brewer.pal(name = "Dark2", n = 8), name = " Population", labels = plotPops) +
xlab(paste ("PC 1 (", snpPcVar[1],"%)", sep = "")) + #Prints percent variation explained by first axis
ylab(paste ("PC 2 (", snpPcVar[2],"%)", sep = "")) + #Prints percent variation explained by second axis
guides(fill = guide_legend(override.aes = list(shape = c(23, 21, 21, 23, 21, 21, 23, 23), size = 4), order = 1), shape = F) +
ggtitle("SNP") +
theme_bw()
cubaSnpPcaPlot = cubaSnpPcaPlotA +
theme(axis.title.x = element_text(color = "black", size = 10),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.y = element_text(color = "black", size = 10),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "none",
panel.border = element_rect(color = "black", size = 1.2),
panel.background = element_rect(fill = "white"),
plot.background = element_rect(fill = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
cubaSnpPcaPlot
Principal coordinates analysis (PCoA) based on Nei’s genetic distance calculated among the populations.
#First calculate Nei's genetic distance using the program STAMPP
cubaSnpDpop = stamppNeisD(cubaGenlight, pop = TRUE) # Nei's 1972 distance between pops
#This mimics the by population PCoA of GenAlEx
# This is the actual PCoA step; you can use any distance matrix here (euclidean = PCA),
# We are using Nei's genetic distance
cubaMds = cmdscale(cubaSnpDpop, eig = TRUE, x.ret = TRUE)
# Determine percent variation captured on each axis
# Calculate the eigenvalues so later we can figure out % variation shown on each Principal Coordinate
cubaSnpPcoaVar = round(cubaMds$eig/sum(cubaMds$eig)*100, 1)
cubaSnpPcoaVar
## [1] 86.4 9.1 2.0 0.9 0.8 0.5 0.1 0.0
# Format data to plot
cubaSnpPcoaValues = cubaMds$points
cubaSnpPcoaValues = as.data.frame(cubaSnpPcoaValues, sample = rownames(cubaSnpPcoaValues))
cubaSnpPcoa = data.frame(site = rownames(cubaSnpPcoaValues)) %>%
mutate(depth = factor(c("Mesophotic","Shallow","Shallow","Shallow","Shallow","Shallow","Shallow","Shallow"), levels=c("Shallow","Mesophotic"))) %>%
mutate(location = ifelse(.$site %in% westSites, "Western", "Eastern")) %>%
mutate(PCo1 = cubaSnpPcoaValues[,1], PCo2 = cubaSnpPcoaValues[,2])
# SNP PCoA biplot
cubaSnpPcoaPlotA = ggplot(cubaSnpPcoa, aes(x = PCo1, y = PCo2, fill = site, shape = location)) +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
geom_point(size = 4) +
scale_y_reverse() +
scale_shape_manual(values = c(21, 23), name = "Location") +
scale_fill_manual(values = brewer.pal(name = "Dark2", n = 8), name = "Population", labels = plotPops) +
xlab(paste ("PCo 1 (", cubaSnpPcoaVar[1],"%)", sep = "")) +
ylab(paste ("PCo 2 (", cubaSnpPcoaVar[2],"%)", sep = "")) +
guides(fill = guide_legend(override.aes = list(shape = c(23, 21, 21, 23, 21, 21, 23, 23), size = 4), order = 1), shape = F) +
ggtitle("SNP") +
theme_bw()
cubaSnpPcoaPlot = cubaSnpPcoaPlotA +
theme(axis.title.x = element_text(color = "black", size = 10),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.y = element_text(color = "black", size = 10),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "left",
legend.title = element_text(color = "black", size = 10),
legend.text = element_text(color = "black", size = 10),
legend.key = element_blank(),
panel.border = element_rect(color = "black", size = 1.2),
panel.background = element_rect(fill = "white"),
plot.background = element_rect(fill = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
cubaSnpPcoaPlot
cubaMsat = read.genalex("cuba_trimmed_microsats_genalex.csv", genclone = FALSE)
xMsat = tab(cubaMsat, freq = TRUE, NA.method = "mean")
mSatPca1 = dudi.pca(xMsat, center = TRUE, scale = FALSE, scannf = FALSE, nf = 77)
mSatPcaValues = as.data.frame(mSatPca1$li)
mSatPca = data.frame(row.names = rownames(mSatPcaValues), sample = rownames(mSatPcaValues),
site = as.factor(popData$population), depth = popData$depth,
location = popData$location, PC1 = mSatPcaValues[,1],
PC2 = mSatPcaValues[,2])
# percentage of explained variance by axes
mSatPcVar = round(mSatPca1$eig/sum(mSatPca1$eig)*100, 1)
mSatPcVar
## [1] 9.6 6.6 6.0 5.4 5.3 4.5 4.2 3.9 3.4 3.2 3.0 2.9 2.8 2.6 2.4 2.3 2.1 2.0 1.9 1.8 1.6
## [22] 1.4 1.4 1.3 1.2 1.1 1.1 1.0 1.0 0.9 0.9 0.8 0.7 0.7 0.6 0.6 0.6 0.5 0.5 0.5 0.4 0.4
## [43] 0.4 0.4 0.3 0.3 0.3 0.3 0.2 0.2 0.2 0.2 0.2 0.2 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1
## [64] 0.1 0.1 0.1 0.1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
# merge centroid locations into ggplot dataframe
mSatPca = merge(mSatPca, aggregate(cbind(mean.x = PC1, mean.y = PC2) ~ site, mSatPca, mean), by = "site")
# construct PCA biplot
cubaMsatPcaPlotA = ggplot(data = mSatPca, aes(x = PC1, y = PC2, color = site, fill = site, shape = location)) +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
stat_ellipse(data = subset(mSatPca, subset = !site %in% c("BancodeSanAntonio", "IsladelaJuventud")), type = "t", geom = "polygon", alpha = 0.1) + #ellipse
geom_segment(data = subset(mSatPca, subset = site %in% c("BancodeSanAntonio", "IsladelaJuventud")), aes(xend = mean.x, yend = mean.y, color = site)) + #spider
#geom_segment(aes(xend = mean.x, yend = mean.y, color = site)) + #spider
geom_point(data = mSatPca, aes(x = PC1, y = PC2, color = site), shape = 4, stroke = 0.75, size = 3) + #individuals
geom_point(data = mSatPca, aes(x = mean.x, y = mean.y, shape = location, fill = site), size = 4, color = "black") + #centroids
scale_shape_manual(values = c(21, 23), name = "Location") +
scale_fill_manual(values = brewer.pal(name = "Dark2", n = 8), name = " Population", labels = plotPops) +
scale_color_manual(values = brewer.pal(name = "Dark2", n = 8), name = " Population", labels = plotPops) +
xlab(paste ("PC 1 (", mSatPcVar[1],"%)", sep = "")) +
ylab(paste ("PC 2 (", mSatPcVar[2],"%)", sep = "")) +
guides(fill = guide_legend(override.aes = list(shape = c(23, 21, 21, 23, 21, 21, 23, 23), size = 4), order = 1), shape = F) +
ggtitle("Microsatellite") +
theme_bw()
cubaMsatPcaPlot = cubaMsatPcaPlotA +
theme(axis.title.x = element_text(color = "black", size = 10),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.y = element_text(color = "black", size = 10),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "none",
panel.border = element_rect(color = "black", size = 1.2),
panel.background = element_rect(fill = "white"),
plot.background = element_rect(fill = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
cubaMsatPcaPlot
cubaMsatPcoa = read.csv("cubaMsatPcoa.csv", header = TRUE, check.names = FALSE)
cubaMsatPcoa$location = c("Western", "Western", "Western", "Eastern", "Eastern", "Eastern","Eastern", "Western")
cubaMsatPcoaPlotA = ggplot(cubaMsatPcoa, aes(x = PCo1, y = PCo2, fill = site, shape = location)) +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
geom_point(size = 4) +
scale_shape_manual(values = c(21, 23), name = "Location") +
scale_fill_manual(values = brewer.pal(name = "Dark2", n = 8), name = "Population", labels = plotPops) +
xlab("PCo 1 (58.0%)") +
ylab("PCo 2 (14.1%)") +
guides(fill = guide_legend(override.aes = list(shape = c(23, 21, 21, 23, 21, 21, 23, 23), size = 4), order = 1), shape = F) +
ggtitle("Microsatellite") +
theme_bw()
cubaMsatPcoaPlot = cubaMsatPcoaPlotA +
theme(axis.title.x = element_text(color = "black", size = 10),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.y = element_text(color = "black", size = 10),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "none",
panel.border = element_rect(color = "black", size = 1.2),
panel.background = element_rect(fill = "white"),
plot.background = element_rect(fill = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
cubaMsatPcoaPlot
cubaOrdPlots = (cubaMsatPcoaPlot|cubaSnpPcoaPlot)/(cubaMsatPcaPlot|cubaSnpPcaPlot) +
plot_layout(guides = 'collect') +
plot_annotation(tag_level = "a") &
theme(plot.tag = element_text(size = 14, face = "bold"))
cubaOrdPlots
#ggsave("../figures/Figure5.eps", plot = cubaOrdPlots, height = 20, width = 25, unit = "cm", dpi = 300)
ggsave("../figures/Figure5.png", plot = cubaOrdPlots, height = 20, width = 25, unit = "cm", dpi = 300)
ggsave("../figures/Figure5.tiff", plot = cubaOrdPlots, height = 20, width = 25, unit = "cm", dpi = 300)
Taking a break from making figures, here is how AMOVA and significance was calculated from the SNP dataset using poppr. The AMOVA for microsatellites was run in GenAlEx.
cubaVcf = read.vcfR("trimmed_mc_renamed.vcf.gz") #Read in vcf
cubaGenlight = vcfR2genlight(cubaVcf, n.cores = 2) #Convert to genlight
locNames(cubaGenlight) = paste(cubaVcf@fix[,1],cubaVcf@fix[,2],sep="_")
popData = read.table("vcf_popmap") #Strata by population and depth zone
popData$depth = factor("Shallow", levels = c("Shallow", "Mesophotic"))
popData$depth[c(1:2)] = "Mesophotic"
levels(popData$depth)
colnames(popData) = c("sample","population", "depth")
levels(popData$population) = plotPops
strata(cubaGenlight) = data.frame(popData)
setPop(cubaGenlight) = ~population
cubaGenlight
amova <- poppr.amova(cubaGenlight, ~population) #Runs AMOVA
amova
## $call
## ade4::amova(samples = xtab, distances = xdist, structures = xstruct)
##
## $results
## Df Sum Sq Mean Sq
## Between population 7 12345.90 1763.700
## Between samples Within population 70 91858.41 1312.263
## Within samples 78 92293.00 1183.244
## Total 155 196497.31 1267.725
##
## $componentsofcovariance
## Sigma %
## Variations Between population 23.94914 1.883234
## Variations Between samples Within population 64.50970 5.072704
## Variations Within samples 1183.24359 93.044062
## Total variations 1271.70242 100.000000
##
## $statphi
## Phi
## Phi-samples-total 0.06955938
## Phi-samples-population 0.05170068
## Phi-population-total 0.01883234
Partitions amount of variance at each level: among populations, among individuals within a population, and within an individual.
set.seed(1999)
amovasignif <- randtest(amova, nrepet = 999) #Calculates significance levels of the AMOVA with 999 permutations
amovasignif
## class: krandtest lightkrandtest
## Monte-Carlo tests
## Call: randtest.amova(xtest = amova, nrepet = 999)
##
## Number of tests: 3
##
## Adjustment method for multiple comparisons: none
## Permutation number: 999
## Test Obs Std.Obs Alter Pvalue
## 1 Variations within samples 1183.24359 -3.458135 less 0.001
## 2 Variations between samples 64.50970 2.386649 greater 0.011
## 3 Variations between population 23.94914 9.009502 greater 0.001
Pairwise Fst values and associated p-values were calculated in GenAlEx and read in as matrices in csv files.
# reads in fst matrix
msFstMa = read.csv("msFstValues.csv", head = TRUE)
msFstMa
## X Banco.de.San.Antonio Guanahacabibes Isla.de.la.Juventud
## 1 Banco de San Antonio NA NA NA
## 2 Guanahacabibes 0.132 NA NA
## 3 Isla de la Juventud 0.212 0.000 NA
## 4 Cayo Anclitas 0.165 0.008 0.011
## 5 Chivirico 0.145 0.004 0.000
## 6 Cabo Lucrecia 0.126 0.012 0.005
## 7 Cayo Sabinal 0.130 0.005 0.006
## 8 Cayo Jutías 0.157 0.000 0.000
## Cayo.Anclitas Chivirico Cabo.Lucrecia Cayo.Sabinal Cayo.Jutías
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 0.000 NA NA NA NA
## 6 0.001 0.003 NA NA NA
## 7 0.000 0.012 0 NA NA
## 8 0.000 0.000 0 0 NA
pops = plotPops[c(1, 7, 8, 3, 6, 2, 5, 4)]
# renames row headers
names(msFstMa) = c("Pop", pops)
# maintains column order as given
msFstMa$Pop = factor(msFstMa$Pop, levels = unique(msFstMa$Pop))
levels(msFstMa$Pop) = pops
# reads in q value matrix
msQMa = read.csv("msFstPvalues.csv") #q-values are FDR-corrected p-values
names(msQMa) = c("Pop", pops)
msQMa$Pop = factor(msQMa$Pop, levels = unique(pops))
# melts fst and q value matrices into table used by ggplot2
# missing values (NA) not used
msFst= melt(msFstMa, id.vars = "Pop", value.name = "Fst", variable.name = "Pop2", na.rm = TRUE)
msFst = msFst[msFst$Pop != msFst$Pop2,]
msFst$Fst = round(msFst$Fst, 3)
head(msFst)
## Pop Pop2 Fst
## 2 Guanahacabibes Banco de San Antonio 0.132
## 3 Isla de la Juventud Banco de San Antonio 0.212
## 4 Cayo Anclitas Banco de San Antonio 0.165
## 5 Chivirico Banco de San Antonio 0.145
## 6 Cabo Lucrecia Banco de San Antonio 0.126
## 7 Cayo Sabinal Banco de San Antonio 0.130
msQ = melt(msQMa, id.vars = "Pop", value.name = "Pval", variable.name = "Pop2", na.rm = TRUE)
msQ = msQ[msQ$Pop != msQ$Pop2,]
msQ$Qval = p.adjust(msQ$Pval, method = "BH")
head(msQ)
## Pop Pop2 Pval Qval
## 2 Guanahacabibes Banco de San Antonio 0.002 0.009333333
## 3 Isla de la Juventud Banco de San Antonio 0.012 0.048000000
## 4 Cayo Anclitas Banco de San Antonio 0.000 0.000000000
## 5 Chivirico Banco de San Antonio 0.001 0.005600000
## 6 Cabo Lucrecia Banco de San Antonio 0.001 0.005600000
## 7 Cayo Sabinal Banco de San Antonio 0.001 0.005600000
# ggplot2 heatmap using geom_tile
# uses gradient of red color to denote increasing pop differentiation through Fst
# geom_text puts the Fst value in each grid
heatmap=ggplot(data = msFst, aes(Pop, Pop2, fill = Fst))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "white", high ="red", midpoint=0, limit = c(0,0.22), # change the upper threshold based on your highest Fst
space = "Lab", name = expression(paste(italic("F")[ST])))+
geom_text(data = subset(msFst, Fst = 0), aes(Pop, Pop2, label = Fst),
color = ifelse(msQ$Qval <= 0.05, "black", ifelse(msQ$Qval >= 1,"white","darkgrey")),
size = ifelse(msQ$Qval < 0.05, 6, 5)) +
guides(fill = guide_colorbar(barwidth = 1, barheight = 12, title.position = "top", title.hjust = 0.5)) +
scale_y_discrete(position = "right") +
scale_x_discrete(labels = str_wrap(msFst$Pop, width = 6)) +
ggtitle(" Microsatellite") +
theme_minimal()
msHeatmap = heatmap + theme(
axis.text.x = element_text(vjust = 1, size = 16, hjust = 0.5, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = "right",
legend.direction = "vertical",
legend.title = element_text(size = 16),
legend.text = element_text(size = 14),
plot.title = element_text(size = 18)
)
msHeatmap
Calculate pairwise Fst among populations
set.seed(694)
cuba.fst<-stamppFst(cubaGenlight, nboots = 99, percent = 95, nclusters = 4) #99 permutations
cuba.fst$Fsts
## Banco de San Antonio Guanahacabibes Isla de la Juventud
## Banco de San Antonio NA NA NA
## Guanahacabibes 0.1441489 NA NA
## Isla de la Juventud 0.1314266 -0.0069075876 NA
## Cayo Anclitas 0.1826620 0.0114684535 0.015352486
## Chivirico 0.1577730 0.0067429522 0.008967761
## Cabo Lucrecia 0.1833536 0.0105662841 0.021734724
## Cayo Sabinal 0.1688537 0.0059172122 0.009677155
## Cayo Jutías 0.1742252 0.0006910925 0.015400586
## Cayo Anclitas Chivirico Cabo Lucrecia Cayo Sabinal Cayo Jutías
## Banco de San Antonio NA NA NA NA NA
## Guanahacabibes NA NA NA NA NA
## Isla de la Juventud NA NA NA NA NA
## Cayo Anclitas NA NA NA NA NA
## Chivirico 0.003639419 NA NA NA NA
## Cabo Lucrecia 0.005106983 0.003411121 NA NA NA
## Cayo Sabinal 0.004802265 0.002862578 0.001313226 NA NA
## Cayo Jutías 0.025320517 0.017249115 0.012533199 0.01085514 NA
cuba.fst$Pvalues
## Banco de San Antonio Guanahacabibes Isla de la Juventud
## Banco de San Antonio NA NA NA
## Guanahacabibes 0 NA NA
## Isla de la Juventud 0 1.0000000 NA
## Cayo Anclitas 0 0.0000000 0
## Chivirico 0 0.0000000 0
## Cabo Lucrecia 0 0.0000000 0
## Cayo Sabinal 0 0.0000000 0
## Cayo Jutías 0 0.1919192 0
## Cayo Anclitas Chivirico Cabo Lucrecia Cayo Sabinal Cayo Jutías
## Banco de San Antonio NA NA NA NA NA
## Guanahacabibes NA NA NA NA NA
## Isla de la Juventud NA NA NA NA NA
## Cayo Anclitas NA NA NA NA NA
## Chivirico 0 NA NA NA NA
## Cabo Lucrecia 0 0 NA NA NA
## Cayo Sabinal 0 0 0.03030303 NA NA
## Cayo Jutías 0 0 0.00000000 0 NA
FDR correct the p-values to q-values and round the Fst values to 3 digits.
# reads in fst matrix
snpFstMa = as.data.frame(cuba.fst$Fsts)
names(snpFstMa) = pops
row.names(snpFstMa) = pops
snpFstMa$Pop = factor(row.names(snpFstMa), levels = unique(pops))
levels(snpFstMa$Pop)
## [1] "Banco de San Antonio" "Guanahacabibes" "Isla de la Juventud"
## [4] "Cayo Anclitas" "Chivirico" "Cabo Lucrecia"
## [7] "Cayo Sabinal" "Cayo Jutías"
snpQMa = data.frame(cuba.fst$Pvalues)
names(snpQMa)= pops
row.names(snpQMa) = pops
snpQMa$Pop = factor(row.names(snpQMa), levels = unique(pops))
snpFst = melt(snpFstMa, id.vars = "Pop", value.name = "Fst", variable.name = "Pop2", na.rm = TRUE)
snpFst = snpFst[snpFst$Pop != snpFst$Pop2,]
snpFst$Fst = round(snpFst$Fst, 3)
snpFst = snpFst %>% mutate(Fst = replace(Fst, Fst < 0, 0))
head(snpFst)
## Pop Pop2 Fst
## 1 Guanahacabibes Banco de San Antonio 0.144
## 2 Isla de la Juventud Banco de San Antonio 0.131
## 3 Cayo Anclitas Banco de San Antonio 0.183
## 4 Chivirico Banco de San Antonio 0.158
## 5 Cabo Lucrecia Banco de San Antonio 0.183
## 6 Cayo Sabinal Banco de San Antonio 0.169
snpQ = melt(snpQMa, id.vars = "Pop", value.name = "Pval", variable.name = "Pop2", na.rm = TRUE)
snpQ = snpQ[snpQ$Pop != snpQ$Pop2,]
snpQ$Qval = p.adjust(snpQ$Pval, method = "BH")
head(snpQ)
## Pop Pop2 Pval Qval
## 2 Guanahacabibes Banco de San Antonio 0 0
## 3 Isla de la Juventud Banco de San Antonio 0 0
## 4 Cayo Anclitas Banco de San Antonio 0 0
## 5 Chivirico Banco de San Antonio 0 0
## 6 Cabo Lucrecia Banco de San Antonio 0 0
## 7 Cayo Sabinal Banco de San Antonio 0 0
snpHeatmapA = ggplot(data = snpFst, aes(Pop, Pop2, fill = Fst))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "white", high = "red", midpoint = 0, limit = c(0, 0.22),
space = "Lab", name = expression(paste(italic("F")[ST])))+
geom_text(data = snpFst, aes(Pop, Pop2, label = Fst), color = ifelse(snpQ$Qval <= 0.05,"black", "darkgrey"), size = ifelse(snpQ$Qval < 0.05, 6, 5)) +
guides(fill=guide_colorbar(barwidth = 1, barheight = 12, title.position = "top", title.hjust = 0.5))+
scale_y_discrete(position = "right")+
scale_x_discrete(labels = str_wrap(snpFst$Pop, width = 6)) +
ggtitle(" SNP") +
theme_minimal()
snpHeatmap = snpHeatmapA + theme(
axis.text.x = element_text(vjust = 1, size = 16, hjust = 0.5, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = "right",
legend.direction = "vertical",
legend.title = element_text(size = 16),
legend.text = element_text(size = 14),
plot.title = element_text(size = 18)
)
snpHeatmap
combinedHeatmap = (msHeatmap / snpHeatmap) +
plot_layout(guides = 'collect') +
plot_annotation(tag_level = "a") &
theme(plot.tag = element_text(size = 20, face = "bold"))
combinedHeatmap
ggsave("../figures/Figure6.tiff", plot = combinedHeatmap, width = 34, height = 25, units = "cm", dpi = 300)
ggsave("../figures/Figure6.eps", plot = combinedHeatmap, width = 34, height = 25, units = "cm", dpi = 300)
msStr = read.csv("sortedK2-microsat-structure.csv")
msStr$Sample = factor(msStr$Sample,
levels = msStr$Sample[order(msStr$Cluster2)])
msStr$Order = c(1:nrow(msStr))
msStrDat = melt(msStr, id.vars = c("Sample", "Population", "Order"),
variable.name = "Ancestry", value.name = "Fraction")
colPalStr = c("blue", "turquoise")
names(colPalStr) = levels(msStrDat$Ancestry)
msStructureA = ggplot(msStrDat, aes(x = Order, y = Fraction, fill = Ancestry)) +
geom_bar(stat = "identity", position = "fill", width = 1, colour = "grey25") +
facet_grid(~fct_inorder(Population), scales = "free", switch = "x", space = "free") +
xlab("Population") +
scale_x_discrete(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
scale_fill_manual(values = colPalStr) +
labs(y = "Ancestry") +
coord_cartesian(ylim = c(-.01, 1.01), clip = "off") +
ggtitle("Microsatellite") +
theme_bw()
msStructure = msStructureA + theme(
plot.title = element_text(size = 15),
panel.grid = element_blank(),
panel.background = element_rect(fill=NA, colour="black"),
panel.spacing.x = grid:::unit(0, "lines"),
panel.border = element_rect(fill = NA, color = "black", size = 2, linetype = "solid"),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 12, color = "black"),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 16, color = "black"),
axis.ticks.x = element_blank(),
axis.ticks.y = element_line(color = "black"),
strip.background = element_blank(),
strip.text = element_blank(),
legend.key = element_blank(),
legend.title = element_blank(),
legend.position = "none"
)
msStructure
snpStr = read.csv("sorted_K2_admixture.csv")
snpStr$Sample = factor(snpStr$Sample,
levels = snpStr$Sample[order(snpStr$Cluster2)])
strPops = c("Banco de \nSan Antonio", "Cabo Lucrecia", "Cayo Anclitas", "Cayo Jutías",
"Cayo Sabinal", "Chivirico", "Guanahacabibes", "Isla de la \nJuventud")
levels(snpStr$Population) = strPops
snpStr$Order = c(1:nrow(snpStr))
snpStrDat = melt(snpStr, id.vars = c("Sample", "Population", "Order"),
variable.name = "Ancestry", value.name = "Fraction")
popAnnoStr = data.frame(x1 = c(0.55, 62.55, 25.55, 17.55, 39.55, 51.55, 2.55, 14.55),
x2 = c(2.45, 78.45, 39.45, 25.45, 51.45, 62.45, 14.45, 17.45),
y1 = -0.045, y2 = -0.045, Sample = NA, Ancestry = NA,
Population = strPops)
snpAdmixA = ggplot(snpStrDat, aes(x = Order, y = Fraction, fill = Ancestry)) +
geom_bar(stat = "identity", position = "fill", width = 1, colour = "grey25") +
geom_segment(data = popAnnoStr, aes(x = x1, xend = x2, y = y1, yend = y2, color = Population), size = 3) +
geom_text(data = popAnnoStr, aes(x = (x2-.05), y = (y1-.05), label = Population), angle = 75, hjust = 1, vjust = 0, size = 5, lineheight = 0.65) +
facet_grid(~fct_inorder(Population), scales = "free", switch = "x", space = "free") +
labs(x = "Population", y = "Ancestry") +
scale_x_discrete(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
scale_fill_manual(values = colPalStr) +
scale_color_brewer(palette = "Dark2") +
coord_cartesian(ylim = c(-.01,1.01), clip = "off") +
ggtitle("SNP") +
theme_bw()
snpAdmix = snpAdmixA + theme(
plot.title = element_text(size = 16),
plot.background = element_rect(fill = NA, color = NA),
panel.grid = element_blank(),
panel.background = element_rect(fill = NA, color = "black"),
panel.spacing.x = grid:::unit(0, "lines"),
panel.border = element_rect(fill = NA, color = "black", size = 2, linetype = "solid"),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 12, color = "black"),
axis.title.x = element_text(size = 16),
axis.title.y = element_text(size = 16, color = "black"),
axis.ticks.x = element_blank(),
axis.ticks.y = element_line(color = "black"),
strip.background = element_rect(color = NA, fill = NA),
strip.text = element_text(size = 16, angle = 90, hjust = 1, vjust = 0.1, color = NA),
legend.key = element_blank(),
legend.title = element_blank(),
legend.position = "none"
)
snpAdmix
combinedAdmix = (msStructure / snpAdmix) +
plot_annotation(tag_level = "a") &
theme(plot.tag = element_text(size = 18, face = "bold"))
combinedAdmix
ggsave("../figures/Figure7.tiff", plot = combinedAdmix, width = 30, height = 20, units = "cm", dpi = 300)
ggsave("../figures/Figure7.eps", plot = combinedAdmix, width = 30, height = 20, units = "cm", dpi = 300)
dfZoox = read.csv("zooxProportionsUpdated.csv")
dfZoox$Order = c(1:nrow(dfZoox))
strPops = c("Banco de \nSan Antonio", "Cabo Lucrecia", "Cayo Anclitas", "Cayo Jutías", "Cayo Sabinal", "Chivirico", "Guanahacabibes", "Isla de la \nJuventud")
levels(dfZoox$Population) = strPops
zDat = melt(dfZoox, id.vars = c("Sample", "Population", "Order"), variable.name = "Symbiont", value.name = "Fraction")
colPalZoox = brewer.pal(4, "BrBG")
names(colPalZoox) = levels(zDat$Symbiont)
popAnnoZoox = data.frame(x1 = c(0.55, 62.55, 25.55, 17.55, 39.55, 51.55, 2.55, 14.55),
x2 = c(2.45, 78.45, 39.45, 25.45, 51.45, 62.45, 14.45, 17.45),
y1 = -0.045, y2 = -0.045, Sample = NA, Symbiont = NA, Order = NA,
Population = strPops)
zooxPlotA = ggplot(data = zDat, aes(x = Order, y = Fraction, fill = Symbiont, order = Order)) +
geom_bar(stat = "identity", position = "fill", width = 1, colour = "grey25") +
xlab("Population") +
scale_x_discrete(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0), labels = function(x) paste0(x*100, "%")) +
scale_fill_manual(values = colPalZoox, name = "Symbiodiniaceae \ngenus", breaks = zDat$Symbiont) +
geom_segment(data = popAnnoZoox, aes(x = x1, xend = x2, y = y1, yend = y2, color = strPops), size = 3) +
scale_color_brewer(palette = "Dark2") +
geom_text(data = popAnnoZoox, aes(x = (x2-.05), y = (y1-.05), label = Population), angle = 75, hjust = 1, vjust = 0, size = 5, lineheight = 0.65) +
facet_grid(.~fct_inorder(Population), scales = "free", switch = "x", space = "free") +
coord_cartesian(ylim = c(-.01,1.01), clip = "off") +
guides(color = FALSE) +
theme_bw()
zooxPlot = zooxPlotA + theme(
plot.title = element_blank(),
panel.background = element_blank(),
panel.spacing.x = grid:::unit(0, "lines"),
panel.border = element_rect(color = "black", size = 2, linetype = "solid"),
axis.text.x=element_blank(),
axis.text.y=element_text(size = 12, color = "black"),
axis.title.x=element_text(size = 16, color = "black"),
axis.title.y = element_blank(),
axis.ticks.x=element_blank(),
axis.ticks.y = element_line(color = "black"),
strip.text=element_text(size = 16, angle = 90, hjust = 1, vjust = 0, color = NA),
strip.background = element_blank(),
legend.title = element_text(size = 14, color = "black"),
legend.text = element_text(face = "italic", size = 13, color = "black"))
zooxPlot
ggsave("../figures/Figure8.tiff", plot = zooxPlot, width = 34.35, height = 12.5, units = "cm", dpi = 300)
ggsave("../figures/Figure8.eps", plot = zooxPlot, width = 34.35, height = 12.5, units = "cm", dpi = 300)
strctrHrvstr = read.csv("structureHarvesterOut.csv", header = TRUE)
head(strctrHrvstr)
## K Reps Mean.LnP.K. Stdev.LnP.K. Ln..K. X.Ln...K.. Delta.K
## 1 1 10 -2474.84 0.31693 NA NA NA
## 2 2 10 -2598.98 36.96719 -124.14 93.52 2.529811
## 3 3 10 -2629.60 61.42379 -30.62 15.75 0.256415
## 4 4 10 -2644.47 55.90657 -14.87 30.63 0.547878
## 5 5 10 -2628.71 55.80270 15.76 45.47 0.814835
## 6 6 10 -2567.48 37.63207 61.23 53.03 1.409170
bestStr = strctrHrvstr[1,]
logKPlotA = ggplot(strctrHrvstr, aes(x = K, y = Mean.LnP.K.)) +
geom_point(color = "black", size = 3) +
geom_point(data = bestStr, aes(x = K, y = Mean.LnP.K.), fill = "red", size = 3, shape = 21) +
geom_errorbar(aes(ymin = Mean.LnP.K. - Stdev.LnP.K.,
ymax = Mean.LnP.K. + Stdev.LnP.K.), width = 0.65) +
scale_x_continuous(breaks = seq(0,12, by = 2)) +
labs(y = "Mean Ln(probability)") +
ggtitle("Microsatellite") +
theme_bw()
logKPlot = logKPlotA + theme(
axis.title.x = element_blank(),
axis.title.y = element_text(size = 14, color = "black"),
axis.text= element_text(size = 12, color = "black"),
axis.line = element_blank(),
panel.border = element_rect(size = 1))
logKPlot
bestDK = strctrHrvstr[2,]
deltaKPlotA = ggplot(strctrHrvstr, aes(x = K, y = Delta.K), color = "black") +
geom_path(size = 1, aes(x = K)) +
geom_point(size = 3) +
geom_point(data = bestDK, aes(x = K, y = Delta.K), fill = "red", size = 3, shape = 21) +
scale_x_continuous(breaks = seq(0,12, by = 2)) +
labs(y = expression(paste(Delta, italic("K")))) +
ggtitle("Microsatellite") +
theme_bw()
deltaKPlot = deltaKPlotA + theme(
axis.title.x = element_blank(),
axis.title.y = element_text(size = 14, color = "black"),
axis.text= element_text(size = 12, color = "black"),
axis.line = element_blank(),
panel.border = element_rect(size = 1))
deltaKPlot
snpBIC = read.csv("BIC_Values.csv", header = TRUE)
head(snpBIC)
## Number.of.Clusters..K. BIC
## 1 1 511.6884
## 2 2 511.2608
## 3 3 513.4678
## 4 4 516.0718
## 5 5 518.8539
## 6 6 521.6731
bestBIC = snpBIC[2,]
bicPlotA = ggplot(snpBIC, aes(x = Number.of.Clusters..K., y = BIC), color = "black") +
geom_point(size = 3) +
geom_path(size = 1) +
geom_point(data = bestBIC, aes(x = Number.of.Clusters..K., y = BIC), fill = "red", size = 3, shape = 21) +
scale_x_continuous(breaks = seq(0,12, by = 2)) +
labs(y = "BIC") +
ggtitle("SNP") +
theme_bw()
bicPlot = bicPlotA + theme(
axis.title.x = element_blank(),
axis.title.y = element_text(size = 14, color = "black"),
axis.text= element_text(size = 12, color = "black"),
axis.line = element_blank(),
panel.border = element_rect(size = 1))
bicPlot
cvAdmix = read.csv("CV_Values_ADMIXTURE.csv", header =TRUE)
bestCV = cvAdmix[1,]
cvPlotA = ggplot(cvAdmix, aes(x = Number.of.clusters, y = Model.Cross.Validation.Values), color = "black") +
geom_point(size = 3) +
geom_path(size = 1) +
geom_point(data = bestCV, aes(x = Number.of.clusters, y = Model.Cross.Validation.Values), fill = "red", size = 3, shape = 21) +
scale_x_continuous(breaks = seq(0,12, by = 2)) +
labs(y = "Model cross validation") +
ggtitle("SNP") +
theme_bw()
cvPlot = cvPlotA + theme(
axis.title.x = element_blank(),
axis.title.y = element_text(size = 14, color = "black"),
axis.text= element_text(size = 12, color = "black"),
axis.line = element_blank(),
panel.border = element_rect(size = 1))
cvPlot
modelKPlots = (logKPlot | deltaKPlot)/(bicPlot|cvPlot) +
plot_layout(widths = c(20,1,20)) +
plot_annotation(caption = expression(paste("Genetic clusters (",italic(K),")")),
theme = theme(plot.caption = element_text(size = 14, hjust = 0.5))) +
plot_annotation(tag_level = "a") &
theme(plot.tag = element_text(size = 16, face = "bold"))
modelKPlots
ggsave("../figures/FigureS1.tiff", width = 17, height = 16, units = "cm", dpi = 300)
ggsave("../figures/FigureS1.eps", width = 17, height = 16, units = "cm", dpi = 300)
First we can calculate the geographic distances:
ReplaceLowerOrUpperTriangle = function(m, triangle.to.replace) {
if (nrow(m) != ncol(m))
stop("Supplied matrix must be square.")
if (tolower(triangle.to.replace) == "lower")
tri = lower.tri(m)
else if (tolower(triangle.to.replace) == "upper")
tri = upper.tri(m)
else
stop("triangle.to.replace must be set to 'lower' or 'upper'.")
m[tri] = t(m)[tri]
return(m)
}
# If triangle.to.replace="lower", replaces the lower triangle of a square matrix with its upper triangle.
# If triangle.to.replace="upper", replaces the upper triangle of a square matrix with its lower triangle.
GeoDistanceInMetresMatrix = function(df.geopoints) {
# Returns a matrix (M) of distances between geographic points. M[i,j] = M[j,i] = Distance between (df.geopoints$lat[i], df.geopoints$lon[i]) and (df.geopoints$lat[j], df.geopoints$lon[j]). The row and column names are given by df.geopoints$name.
GeoDistanceInMetres = function(g1, g2) {
# Returns a vector of distances. (But if g1$index > g2$index, returns zero.) The 1st value in the returned vector is the distance between g1[[1]] and g2[[1]]. The 2nd value in the returned vector is the distance between g1[[2]] and g2[[2]]. Etc. Each g1[[x]] or g2[[x]] must be a list with named elements "index", "lat" and "lon". E.g. g1 = list(list("index"=1, "lat"=12.1, "lon"=10.1), list("index"=3, "lat"=12.1, "lon"=13.2))
DistM = function(g1, g2) {
require("Imap")
return(ifelse(
g1$index > g2$index,
0,
gdist(lat.1 = g1$lat, lon.1 = g1$lon, lat.2 = g2$lat, lon.2 = g2$lon, units = "m")))
}
return(mapply(DistM, g1, g2))
}
n.geopoints = nrow(df.geopoints)
# The index column is used to ensure we only do calculations for the upper triangle of points
df.geopoints$index = 1:n.geopoints
# Create a list of lists
list.geopoints = by(df.geopoints[, c("index", "lat", "lon")], 1:n.geopoints, function(x) {
return(list(x))
})
# Get a matrix of distances (in metres)
mat.distances = ReplaceLowerOrUpperTriangle(outer(list.geopoints, list.geopoints, GeoDistanceInMetres), "lower")
# Set the row and column names
rownames(mat.distances) = df.geopoints$name
colnames(mat.distances) = df.geopoints$name
return(mat.distances)}
coords = read.csv("xycoords.csv", header=TRUE) # tab-separated file for all pops
dGeo = as.dist(GeoDistanceInMetresMatrix(coords)/1000, diag = T)
msNeiDist = read.csv("neis-distance.csv", header=TRUE)
msNeiMa = as.matrix(msNeiDist)
msNeiDistMa = dist(msNeiMa, diag = FALSE)
# Test IBD
set.seed(694)
msIBD = mantel.randtest(dGeo, msNeiDistMa)
msIBD
## Monte-Carlo test
## Call: mantel.randtest(m1 = dGeo, m2 = msNeiDistMa)
##
## Observation: 0.2412821
##
## Based on 999 replicates
## Simulated p-value: 0.013
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 1.7752265455 0.0004189405 0.0184091282
msNei = melt(as.matrix(msNeiDistMa), varnames = c("row", "col"), value.name = "nei")
msNei = msNei[msNei$row > msNei$col,]
geo = melt(as.matrix(dGeo), varnames = c("row", "col"), value.name = "geo")
geo = geo[geo$row != geo$col,]
msMantelDF = data.frame(cbind(msNei$nei, geo$geo))
colnames(msMantelDF) = c("nei", "geo")
# Plot IBD
msMantelA = ggplot(data = msMantelDF, aes(x = geo, y = nei)) +
scale_fill_gradientn(colors = paletteer_d("wesanderson::Zissou1")) +
stat_density_2d(aes(fill = stat(density)), n = 300, contour = FALSE, geom = "raster") +
geom_smooth(method = lm, col = "black", fill = "gray40", fullrange = TRUE) +
geom_point(shape = 21, fill = "gray40") +
scale_x_continuous(limits = c(0,1000), expand = c(0,0)) +
scale_y_continuous(limits = c(0,2), expand = c(0,0)) +
annotate("label", x = 775, y = 1.91,
label = paste("r = ", round(msIBD$obs, 3), "; p = ", msIBD$pvalue),
size = 4, alpha = 0.6) +
labs(x = "Geographic distance (km)", y = expression(paste("Nei's genetic distance (",italic("D"),")"))) +
ggtitle("Microsatellite") +
theme_bw()
msMantel = msMantelA + theme(
axis.title.x = element_blank(),
axis.text.x = element_text(size = 12, color = "black"),
axis.ticks.x = element_line(color = "black"),
axis.line.x = element_blank(),
axis.title.y = element_text(color = "black", size = 14),
axis.text.y = element_text(size = 12, color = "black"),
axis.ticks.y = element_line(color = "black"),
axis.line.y = element_blank(),
panel.border = element_rect(size = 1.2, color = "black"),
plot.margin = margin(0.2,0.5,0.1,0.1, unit = "cm"),
legend.position = "none")
msMantel
## `geom_smooth()` using formula 'y ~ x'
# Isolation by distance
popdata<-read.table("vcf_popmap")
colnames(popdata)<-c("sample","population")
strata(cubaGenlight) = data.frame(popdata) #cubaGenlight was generated in Odination Analyses above
setPop(cubaGenlight)= ~population
snpNeiDist = as.dist(stamppNeisD(cubaGenlight, pop = TRUE), diag = F)
# Test IBD
set.seed(694)
snpIBD = mantel.randtest(dGeo,snpNeiDist)
snpIBD
## Monte-Carlo test
## Call: mantel.randtest(m1 = dGeo, m2 = snpNeiDist)
##
## Observation: 0.2347208
##
## Based on 999 replicates
## Simulated p-value: 0.016
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 1.7184024033 0.0004719814 0.0185825434
snpNei = melt(as.matrix(snpNeiDist), varnames = c("row", "col"), value.name = "nei")
snpNei = snpNei[snpNei$row != snpNei$col,]
snpMantelDF = data.frame(cbind(snpNei$nei, geo$geo))
colnames(snpMantelDF) = c("nei", "geo")
snpMantelA = ggplot(data = snpMantelDF, aes(x = geo, y = nei)) +
scale_fill_gradientn(colors = paletteer_d("wesanderson::Zissou1")) +
stat_density_2d(aes(fill = stat(density)), n = 300, contour = FALSE, geom = "raster") +
geom_smooth(method = lm, col = "black", fill = "gray40", fullrange = TRUE) +
geom_point(shape = 21, fill = "gray40") +
scale_x_continuous(limits = c(0,1000), expand = c(0,0)) +
scale_y_continuous(limits = c(0,0.15), breaks = seq(0,0.15, by = 0.05), expand = c(0,0)) +
annotate("label", x = 775, y = 0.14325,
label = paste("r = ", round(snpIBD$obs, 3), "; p = ", snpIBD$pvalue),
size = 4, alpha = 0.6) +
labs(x = "Geographic distance (km)", y = expression(paste("Nei's genetic distance (",italic("D"),")"))) +
ggtitle("SNP") +
theme_bw()
snpMantel = snpMantelA + theme(
axis.title.x = element_blank(),
axis.text.x = element_text(size = 12, color = "black"),
axis.ticks.x = element_line(color = "black"),
axis.line.x = element_blank(),
axis.title.y = element_text(color = "white"),
axis.text.y = element_text(size = 12, color = "black"),
axis.ticks.y = element_line(color = "black"),
axis.line.y = element_blank(),
panel.border = element_rect(size = 1.2, color = "black"),
plot.margin = margin(0.2,0.5,0.1,0.1, unit = "cm"),
legend.position = "none")
snpMantel
## `geom_smooth()` using formula 'y ~ x'
mantelPlots = (msMantel | snpMantel) +
plot_annotation(tag_level = "a") +
plot_annotation(caption = "Geographic distance (km)",
theme = theme(plot.caption = element_text(size = 14, hjust = 0.5))) &
theme(plot.tag = element_text(size = 16, face = "bold"))
mantelPlots
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
ggsave("../figures/FigureS2.tiff", height = 12.5, width = 25, units = "cm", dpi = 300)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'