-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGst_4vs68_loci.R
139 lines (117 loc) · 4.77 KB
/
Gst_4vs68_loci.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
library(tidyverse)
library(readxl)
library(adegenet)
library(genepop)
library(mmod)
library(hierfstat)
library(broom)
library(poppr)
# Run GST at just 4 loci to see if they match results from 68 loci
#### Read in data ####
# Read in 2205 genetic data at just 4 loci of interest
Data_2205_4loci <- read.genepop("X:/2205_BKT_feral_broodstock_ID/Thometz_scripts/Erdman_integration/2205_63pops_4loci.gen",
ncode = 3L,
quiet = FALSE)
# 2111 metadata
Samples_2111 <- read_delim("X:/2111_F1F2D_BKT/2111analysis/Thometz_scripts/Samples_2111.csv") %>%
filter(Cohort == "Domestic") %>%
mutate(WaterbodyName = "St. Croix Falls domestic",
HUC_2 = "Hatchery",
HUC_4 = "Hatchery",
HUC_8 = "Hatchery",
.keep = "unused") %>%
select(SampleID, WaterbodyName, HUC_8, HUC_4, HUC_2)
# 2205 metadata
Samples_2205 <- read_delim("X:/2205_BKT_feral_broodstock_ID/Thometz_scripts/Samples_2205.csv") %>%
bind_rows(Samples_2111) %>%
filter(SampleID %in% rownames(Data_2205_4loci@tab)) %>%
arrange(match(SampleID, rownames(Data_2205_4loci@tab)))
fctLevelOrder <- Samples_2205 %>%
distinct(WaterbodyName) %>%
arrange(WaterbodyName)
Samples_2205 <- Samples_2205 %>%
mutate(WaterbodyName = fct_relevel(WaterbodyName, fctLevelOrder$WaterbodyName))
# Fill the pop slots
Data_2205_4loci@pop <- as_factor(Samples_2205$WaterbodyName)
# Run the analysis
gstMatrix <- pairwise_Gst_Nei(Data_2205_4loci)
gst_tidy <- gstMatrix %>%
tidy()
gst_tidy %>% write_csv("X:/2205_BKT_feral_broodstock_ID/Thometz_scripts/Erdman_integration/Locus_testing/Gst_4loci.csv")
# Plot on heatmap
heatmap <- gst_tidy %>%
mutate(distance = round(distance, digits = 2)) %>%
ggplot(aes(x = item1, y = item2, fill = distance)) +
geom_tile(color = "grey") +
scale_fill_gradient(low = "blue",
high = "red",
space = "Lab",
name = bquote("Nei's"~G[ST]),
breaks = c(round(min(gst_tidy$distance), 2),
round(mean(gst_tidy$distance), 2),
round(max(gst_tidy$distance), 2))) +
scale_y_discrete(limits = rev(rownames(as.matrix(gstMatrix)))) +
scale_x_discrete(position = "bottom") +
labs(x = "",
y = "") +
theme_classic() +
theme(axis.text.x = element_text(angle = -60,
vjust = 0.5,
size = 6,
hjust = 0.01,
color = "black"),
axis.text.y = element_text(size = 6,
color = "black"))
ggsave(filename = "GST_heatmap_4loci.pdf",
plot = heatmap,
device = "pdf",
path = "X:/2205_BKT_feral_broodstock_ID/Thometz_scripts/Erdman_integration/Plots_figures",
height = 8,
width = 10,
units = "in")
ggsave(filename = "GST_heatmap_4loci.png",
plot = heatmap,
device = "png",
path = "X:/2205_BKT_feral_broodstock_ID/Thometz_scripts/Erdman_integration/Plots_figures",
height = 8,
width = 10,
units = "in")
#### Join gst estimates with 68 loci estimates to compare #### This is implemented in the "Diversity_4vs68_loci.R" script
gst_68loci <- read_delim("X:/2205_BKT_feral_broodstock_ID/Thometz_scripts/Analyses/Gst_Fst/pwise_Nei_Gst_plus2111.csv") %>%
filter(item1 != "St. Croix domestic",
item2 != "St. Croix domestic")
gst_comparison <- gst_tidy %>%
rename(dist_4loci = distance) %>%
left_join(gst_68loci) %>%
rename(Pop_1 = item1,
Pop_2 = item2,
dist_68loci = distance) %>%
mutate(dist_4loci = round(dist_4loci, 3),
dist_68loci = round(dist_68loci, 3))
# Run linear regression to compare
lm_dist = lm(dist_4loci ~ dist_68loci, data = gst_comparison)
summary(lm_dist)
gst_comparison %>%
mutate(dist_4loci = case_when(dist_4loci < 0 ~ 0,
.default = dist_4loci)) %>%
ggplot(aes(x = dist_68loci, y = dist_4loci)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "red") +
geom_smooth(method = "lm", color = "blue") +
annotate(geom = "text",
label = "(1:1)",
x = 0.2,
y = 0.22,
color = "red") +
annotate(geom = "text",
label = bquote(R^2~"= 0.499"),
x = 0.22,
y = 0.14) +
scale_y_continuous(limits = c(0, 0.25),
expand = c(0,0)) +
scale_x_continuous(limits = c(0, 0.25),
expand = c(0,0)) +
labs(x = "68 loci",
y = "4 loci",
title = bquote("Nei's"~G[ST])) +
theme_classic()