forked from yuliajk/tcremp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval_dist.Rmd
113 lines (98 loc) · 2.29 KB
/
eval_dist.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
---
title: "TCRemp distance properties"
author: "M.S."
date: "2024-11-27"
output:
pdf_document: default
html_document: default
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(reshape2)
library(stringr)
library(ggplot2)
```
```{r}
data <- read_tsv("res_TRB.txt.gz") |>
rename(from = id) |>
mutate(from = as.character(from)) |>
melt() |>
filter(grepl("cdr3",variable))
data$to <- str_split_fixed(data$variable, "_", 2)[,1]
data$variable <- NULL
data <- data |>
mutate(from = paste0("x", from),
to = paste0("x", to)) |>
group_by(from) |>
mutate(value.scaled = (value - mean(value)) / sd(value)) |>
ungroup()
glimpse(data)
```
```{r}
data1 <- data |> filter(as.character(to) > as.character(from))
ggplot(data1, aes(x = value)) +
geom_density()
ggplot(data1, aes(x = scale(value))) +
geom_density()
qqnorm(scale(data1$value))
qqline(scale(data1$value))
```
```{r}
data.m <- data |>
dcast(from ~ to)
rownames(data.m) <- data.m$from
data.m$from <- NULL
data.m <- as.matrix(data.m)
```
```{r}
dists <- dist(data.m) |>
as.matrix() |>
melt() |>
rename(from = Var1, to = Var2, dist_eucl = value) |>
mutate(from = as.character(from), to = as.character(to))
```
```{r}
data.m[data.m > 100] <- 100
data.m[data.m < -100] <- -100
dists2 <- dist(data.m) |>
as.matrix() |>
melt() |>
rename(from = Var1, to = Var2, dist_eucl2 = value) |>
mutate(from = as.character(from), to = as.character(to))
```
```{r}
data.comb <- data1 |>
merge(dists) |>
merge(dists2)
```
```{r}
data.comb |>
ggplot(aes(x = value.scaled,
y = dist_eucl)) +
geom_point(alpha = 0.1) +
geom_density_2d() +
geom_smooth(method = "lm") +
xlab("True distance") +
ylab("TCRemp dist") +
theme_bw()
data.comb |>
ggplot(aes(x = value.scaled,
y = dist_eucl2)) +
geom_point(alpha = 0.1) +
geom_density_2d() +
geom_smooth(method = "lm") +
xlab("True distance") +
ylab("TCRemp dist") +
theme_bw()
```
```{r}
cor.test(data.comb$value.scaled, data.comb$dist_eucl, method = "pearson")
cor.test(data.comb$value.scaled, data.comb$dist_eucl2, method = "pearson")
```
```{r}
cor.test(data.comb$value.scaled, data.comb$dist_eucl, method = "spearman")
cor.test(data.comb$value.scaled, data.comb$dist_eucl2, method = "spearman")
```
```{r}
#FIN