-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.R
48 lines (41 loc) · 1.1 KB
/
utils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# author: @bryn-g
# date: 2023-05-28
library(dplyr)
library(purrr)
library(tibble)
library(tidyr)
library(reticulate)
# use python env
use_virtualenv('./.venv', required = TRUE)
# read env variables
readRenviron(".env")
# import python modules
builtins <- import_builtins()
pd <- import("pandas")
prw <- import("praw")
utils <- import_from_path("utils")
# flatten df list columns
flat_pd_df <- function(x, pd) {
x |>
pd$json_normalize() |>
as_tibble() |>
mutate_if(is.list, ~ paste0(., collapse = ","), na.rm = TRUE)
}
# structure and add users to comment df
str_comment_df <- function(df, df_users) {
if (!"parent_id" %in% names(df)) df <- df |> mutate(parent_id = NA_character_, body = selftext, type = "op")
if (!"title" %in% names(df)) df <- df |> mutate(title = NA_character_, type = "comment")
df |>
select(
"subreddit_id",
"parent_id",
comment_id = name,
"created_utc",
author_id = author_fullname,
"title",
"body",
"type"
) |>
bind_cols(df_users |> select(author_name = name)) |>
relocate("author_name", .after = "author_id")
}