-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #36 from nfdi4health/Sofia
Updated workflow
- Loading branch information
Showing
25 changed files
with
577 additions
and
25 deletions.
There are no files selected for viewing
Binary file not shown.
File renamed without changes.
Binary file not shown.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#### Script for harmonizing data for NFDI4Health | ||
|
||
####Installation of Rmonize and its dependent packages (necessary R Version > 3.4) | ||
# install.packages("Rmonize") | ||
# install.packages("readxl") | ||
# install.packages("tidyverse") | ||
# install.packages("here") | ||
# install.packages("car") | ||
# install.packages("writexl") | ||
|
||
#### Load the package in order to conduct | ||
library(Rmonize) | ||
library(readxl) | ||
library(tidyverse) | ||
library(here) | ||
library(car) | ||
library(writexl) | ||
|
||
|
||
#### Step 0: Name of the study and creation of mock data | ||
dataset_name <- 'DEGS1_FRANZI' | ||
folder_name <- 'Franzi' | ||
|
||
source(here::here("create_mock_data", "mock_data_function.R")) | ||
create_mock_data(studyname = dataset_name, folder_name = folder_name) | ||
|
||
#### Step 1: Import overall DataSchema | ||
dataschema_1 <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_schema/", paste0("Dataschema_", toupper(folder_name), ".xlsx"))), sheet = 1)) | ||
dataschema_2 <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_schema/", paste0("Dataschema_", toupper(folder_name), ".xlsx"))), sheet = 2)) | ||
|
||
dataschema <- list(Variables = dataschema_1, | ||
Categories = dataschema_2) | ||
|
||
#### Step 2: Import Datasets | ||
#### Import check provided in case the csv file is in German style (delim = ";", dec.point = ",") | ||
|
||
input_dataset <- readr::read_csv(here::here(paste0(folder_name, "/data/", paste0("DATA_", dataset_name, ".csv")))) | ||
|
||
if(dim(input_dataset)[2] == 1){ | ||
input_dataset <- read.csv(here::here(paste0(folder_name, "/data", paste0("DATA_", dataset_name, ".csv"))), sep = ";", dec = ",") | ||
} | ||
|
||
#### Step 3: Import Data Dictionaries of the study | ||
dd_var <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_dictionary/"), paste0("DD_",dataset_name, ".xlsx")), sheet = 1)) | ||
dd_cat <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name,"/rmonize/data_dictionary/"), paste0("DD_",dataset_name, ".xlsx")), sheet = 2)) | ||
|
||
dd <- list(Variables = dd_var, | ||
Categories = dd_cat) | ||
|
||
#### Step 4: Import prepared Data Processing Elements (DPE) | ||
data_proc_elem <- readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_proc_elem"), paste0("DPE_",dataset_name, ".xlsx")), sheet = 1) | ||
|
||
#### Step 5: Combine input datasets and data dictionaries into a dossier | ||
dataset <- madshapR::data_dict_apply( | ||
dataset = input_dataset, | ||
data_dict = dd) | ||
|
||
dossier <- madshapR::dossier_create(dataset_list = list( | ||
dataset)) | ||
|
||
#### Step 6: Create the harmonized dossier using the dossier, overall dataschema and DPE's | ||
harmonized_dossier <- Rmonize::harmo_process( | ||
dossier, | ||
dataschema, | ||
data_proc_elem) | ||
|
||
#### Step 9: Extract and save harmonized data into a pre-set folder | ||
harmonized_dataset <- Rmonize::pooled_harmonized_dataset_create(harmonized_dossier) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#### Script for harmonizing data for NFDI4Health | ||
|
||
####Installation of Rmonize and its dependent packages (necessary R Version > 3.4) | ||
# install.packages("Rmonize") | ||
# install.packages("readxl") | ||
# install.packages("tidyverse") | ||
# install.packages("here") | ||
# install.packages("car") | ||
# install.packages("writexl") | ||
|
||
#### Load the package in order to conduct | ||
library(Rmonize) | ||
library(readxl) | ||
library(tidyverse) | ||
library(here) | ||
library(car) | ||
library(writexl) | ||
|
||
|
||
#### Step 0: Name of the study and creation of mock data | ||
dataset_name <- 'KARMEN_FRANZI' | ||
folder_name <- 'Franzi' | ||
|
||
source(here::here("create_mock_data", "mock_data_function.R")) | ||
create_mock_data(studyname = dataset_name, folder_name = folder_name) | ||
|
||
#### Step 1: Import overall DataSchema | ||
dataschema_1 <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_schema/", paste0("Dataschema_", toupper(folder_name), ".xlsx"))), sheet = 1)) | ||
dataschema_2 <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_schema/", paste0("Dataschema_", toupper(folder_name), ".xlsx"))), sheet = 2)) | ||
|
||
dataschema <- list(Variables = dataschema_1, | ||
Categories = dataschema_2) | ||
|
||
#### Step 2: Import Datasets | ||
#### Import check provided in case the csv file is in German style (delim = ";", dec.point = ",") | ||
|
||
input_dataset <- readr::read_csv(here::here(paste0(folder_name, "/data/", paste0("DATA_", dataset_name, ".csv")))) | ||
|
||
if(dim(input_dataset)[2] == 1){ | ||
input_dataset <- read.csv(here::here(paste0(folder_name, "/data", paste0("DATA_", dataset_name, ".csv"))), sep = ";", dec = ",") | ||
} | ||
|
||
#### Step 3: Import Data Dictionaries of the study | ||
dd_var <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_dictionary/"), paste0("DD_",dataset_name, ".xlsx")), sheet = 1)) | ||
dd_cat <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name,"/rmonize/data_dictionary/"), paste0("DD_",dataset_name, ".xlsx")), sheet = 2)) | ||
|
||
dd <- list(Variables = dd_var, | ||
Categories = dd_cat) | ||
|
||
#### Step 4: Import prepared Data Processing Elements (DPE) | ||
data_proc_elem <- readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_proc_elem"), paste0("DPE_",dataset_name, ".xlsx")), sheet = 1) | ||
|
||
#### Step 5: Combine input datasets and data dictionaries into a dossier | ||
dataset <- madshapR::data_dict_apply( | ||
dataset = input_dataset, | ||
data_dict = dd) | ||
|
||
dossier <- madshapR::dossier_create(dataset_list = list( | ||
dataset)) | ||
|
||
#### Step 6: Create the harmonized dossier using the dossier, overall dataschema and DPE's | ||
harmonized_dossier <- Rmonize::harmo_process( | ||
dossier, | ||
dataschema, | ||
data_proc_elem) | ||
|
||
#### Step 9: Extract and save harmonized data into a pre-set folder | ||
harmonized_dataset <- Rmonize::pooled_harmonized_dataset_create(harmonized_dossier) | ||
|
||
|
Binary file not shown.
File renamed without changes.
Binary file not shown.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#### Script for harmonizing data for NFDI4Health | ||
|
||
####Installation of Rmonize and its dependent packages (necessary R Version > 3.4) | ||
# install.packages("Rmonize") | ||
# install.packages("readxl") | ||
# install.packages("tidyverse") | ||
# install.packages("here") | ||
# install.packages("car") | ||
# install.packages("writexl") | ||
|
||
#### Load the package in order to conduct | ||
library(Rmonize) | ||
library(readxl) | ||
library(tidyverse) | ||
library(here) | ||
library(car) | ||
library(writexl) | ||
|
||
|
||
#### Step 0: Name of the study and creation of mock data | ||
dataset_name <- 'DEGS1_INES' | ||
folder_name <- 'Ines' | ||
|
||
source(here::here("create_mock_data", "mock_data_function.R")) | ||
create_mock_data(studyname = dataset_name, folder_name = folder_name) | ||
|
||
#### Step 1: Import overall DataSchema | ||
dataschema_1 <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_schema/", paste0("Dataschema_", toupper(folder_name), ".xlsx"))), sheet = 1)) | ||
dataschema_2 <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_schema/", paste0("Dataschema_", toupper(folder_name), ".xlsx"))), sheet = 2)) | ||
|
||
dataschema <- list(Variables = dataschema_1, | ||
Categories = dataschema_2) | ||
|
||
#### Step 2: Import Datasets | ||
#### Import check provided in case the csv file is in German style (delim = ";", dec.point = ",") | ||
|
||
input_dataset <- readr::read_csv(here::here(paste0(folder_name, "/data/", paste0("DATA_", dataset_name, ".csv")))) | ||
|
||
if(dim(input_dataset)[2] == 1){ | ||
input_dataset <- read.csv(here::here(paste0(folder_name, "/data", paste0("DATA_", dataset_name, ".csv"))), sep = ";", dec = ",") | ||
} | ||
|
||
#### Step 3: Import Data Dictionaries of the study | ||
dd_var <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_dictionary/"), paste0("DD_",dataset_name, ".xlsx")), sheet = 1)) | ||
dd_cat <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name,"/rmonize/data_dictionary/"), paste0("DD_",dataset_name, ".xlsx")), sheet = 2)) | ||
|
||
dd <- list(Variables = dd_var, | ||
Categories = dd_cat) | ||
|
||
#### Step 4: Import prepared Data Processing Elements (DPE) | ||
data_proc_elem <- readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_proc_elem"), paste0("DPE_",dataset_name, ".xlsx")), sheet = 1) | ||
|
||
#### Step 5: Combine input datasets and data dictionaries into a dossier | ||
dataset <- madshapR::data_dict_apply( | ||
dataset = input_dataset, | ||
data_dict = dd) | ||
|
||
dossier <- madshapR::dossier_create(dataset_list = list( | ||
dataset)) | ||
|
||
#### Step 6: Create the harmonized dossier using the dossier, overall dataschema and DPE's | ||
harmonized_dossier <- Rmonize::harmo_process( | ||
dossier, | ||
dataschema, | ||
data_proc_elem) | ||
|
||
#### Step 9: Extract and save harmonized data into a pre-set folder | ||
harmonized_dataset <- Rmonize::pooled_harmonized_dataset_create(harmonized_dossier) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#### Script for harmonizing data for NFDI4Health | ||
|
||
####Installation of Rmonize and its dependent packages (necessary R Version > 3.4) | ||
# install.packages("Rmonize") | ||
# install.packages("readxl") | ||
# install.packages("tidyverse") | ||
# install.packages("here") | ||
# install.packages("car") | ||
# install.packages("writexl") | ||
|
||
#### Load the package in order to conduct | ||
library(Rmonize) | ||
library(readxl) | ||
library(tidyverse) | ||
library(here) | ||
library(car) | ||
library(writexl) | ||
|
||
|
||
#### Step 0: Name of the study and creation of mock data | ||
dataset_name <- 'KARMEN_INES' | ||
folder_name <- 'Ines' | ||
|
||
source(here::here("create_mock_data", "mock_data_function.R")) | ||
create_mock_data(studyname = dataset_name, folder_name = folder_name) | ||
|
||
#### Step 1: Import overall DataSchema | ||
dataschema_1 <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_schema/", paste0("Dataschema_", toupper(folder_name), ".xlsx"))), sheet = 1)) | ||
dataschema_2 <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_schema/", paste0("Dataschema_", toupper(folder_name), ".xlsx"))), sheet = 2)) | ||
|
||
dataschema <- list(Variables = dataschema_1, | ||
Categories = dataschema_2) | ||
|
||
#### Step 2: Import Datasets | ||
#### Import check provided in case the csv file is in German style (delim = ";", dec.point = ",") | ||
|
||
input_dataset <- readr::read_csv(here::here(paste0(folder_name, "/data/", paste0("DATA_", dataset_name, ".csv")))) | ||
|
||
if(dim(input_dataset)[2] == 1){ | ||
input_dataset <- read.csv(here::here(paste0(folder_name, "/data", paste0("DATA_", dataset_name, ".csv"))), sep = ";", dec = ",") | ||
} | ||
|
||
#### Step 3: Import Data Dictionaries of the study | ||
dd_var <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_dictionary/"), paste0("DD_",dataset_name, ".xlsx")), sheet = 1)) | ||
dd_cat <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name,"/rmonize/data_dictionary/"), paste0("DD_",dataset_name, ".xlsx")), sheet = 2)) | ||
|
||
dd <- list(Variables = dd_var, | ||
Categories = dd_cat) | ||
|
||
#### Step 4: Import prepared Data Processing Elements (DPE) | ||
data_proc_elem <- readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_proc_elem"), paste0("DPE_",dataset_name, ".xlsx")), sheet = 1) | ||
|
||
#### Step 5: Combine input datasets and data dictionaries into a dossier | ||
dataset <- madshapR::data_dict_apply( | ||
dataset = input_dataset, | ||
data_dict = dd) | ||
|
||
dossier <- madshapR::dossier_create(dataset_list = list( | ||
dataset)) | ||
|
||
#### Step 6: Create the harmonized dossier using the dossier, overall dataschema and DPE's | ||
harmonized_dossier <- Rmonize::harmo_process( | ||
dossier, | ||
dataschema, | ||
data_proc_elem) | ||
|
||
#### Step 9: Extract and save harmonized data into a pre-set folder | ||
harmonized_dataset <- Rmonize::pooled_harmonized_dataset_create(harmonized_dossier) | ||
|
||
|
Binary file not shown.
File renamed without changes.
Binary file not shown.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#### Script for harmonizing data for NFDI4Health | ||
|
||
####Installation of Rmonize and its dependent packages (necessary R Version > 3.4) | ||
# install.packages("Rmonize") | ||
# install.packages("readxl") | ||
# install.packages("tidyverse") | ||
# install.packages("here") | ||
# install.packages("car") | ||
# install.packages("writexl") | ||
|
||
#### Load the package in order to conduct | ||
library(Rmonize) | ||
library(readxl) | ||
library(tidyverse) | ||
library(here) | ||
library(car) | ||
library(writexl) | ||
|
||
|
||
#### Step 0: Name of the study and creation of mock data | ||
dataset_name <- 'DEGS1_TRACY' | ||
folder_name <- 'Tracy' | ||
|
||
source(here::here("create_mock_data", "mock_data_function.R")) | ||
create_mock_data(studyname = dataset_name, folder_name = folder_name) | ||
|
||
#### Step 1: Import overall DataSchema | ||
dataschema_1 <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_schema/", paste0("Dataschema_", toupper(folder_name), ".xlsx"))), sheet = 1)) | ||
dataschema_2 <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_schema/", paste0("Dataschema_", toupper(folder_name), ".xlsx"))), sheet = 2)) | ||
|
||
dataschema <- list(Variables = dataschema_1, | ||
Categories = dataschema_2) | ||
|
||
#### Step 2: Import Datasets | ||
#### Import check provided in case the csv file is in German style (delim = ";", dec.point = ",") | ||
|
||
input_dataset <- readr::read_csv(here::here(paste0(folder_name, "/data/", paste0("DATA_", dataset_name, ".csv")))) | ||
|
||
if(dim(input_dataset)[2] == 1){ | ||
input_dataset <- read.csv(here::here(paste0(folder_name, "/data", paste0("DATA_", dataset_name, ".csv"))), sep = ";", dec = ",") | ||
} | ||
|
||
#### Step 3: Import Data Dictionaries of the study | ||
dd_var <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_dictionary/"), paste0("DD_",dataset_name, ".xlsx")), sheet = 1)) | ||
dd_cat <- tibble::tibble(readxl::read_excel(here::here(paste0(folder_name,"/rmonize/data_dictionary/"), paste0("DD_",dataset_name, ".xlsx")), sheet = 2)) | ||
|
||
dd <- list(Variables = dd_var, | ||
Categories = dd_cat) | ||
|
||
#### Step 4: Import prepared Data Processing Elements (DPE) | ||
data_proc_elem <- readxl::read_excel(here::here(paste0(folder_name, "/rmonize/data_proc_elem"), paste0("DPE_",dataset_name, ".xlsx")), sheet = 1) | ||
|
||
#### Step 5: Combine input datasets and data dictionaries into a dossier | ||
dataset <- madshapR::data_dict_apply( | ||
dataset = input_dataset, | ||
data_dict = dd) | ||
|
||
dossier <- madshapR::dossier_create(dataset_list = list( | ||
dataset)) | ||
|
||
#### Step 6: Create the harmonized dossier using the dossier, overall dataschema and DPE's | ||
harmonized_dossier <- Rmonize::harmo_process( | ||
dossier, | ||
dataschema, | ||
data_proc_elem) | ||
|
||
#### Step 9: Extract and save harmonized data into a pre-set folder | ||
harmonized_dataset <- Rmonize::pooled_harmonized_dataset_create(harmonized_dossier) | ||
|
||
|
Oops, something went wrong.