Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
FlorianSchw committed Jun 13, 2023
1 parent 9bc834c commit 49e1a4a
Show file tree
Hide file tree
Showing 5 changed files with 243 additions and 17 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ AggregateMethods:
clusterPlotDS,
nbclustDS,
nbclustVisualDS,
varSelLcmDS1,
varSelLcmDS2
varSelLcmDS1
AssignMethods:
cutreeDS,
distDS,
hclustDS,
kmeansDS,
varSelLcm_DA_DS1,
varSelLcmDS2,
varSelLcm_AlternateDS2,
varSelLcmDS3
RoxygenNote: 7.2.3
Encoding: UTF-8
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ export(nbclustVisualDS)
export(varSelLcmDS1)
export(varSelLcmDS2)
export(varSelLcmDS3)
export(varSelLcm_AlternateDS2)
export(varSelLcm_DA_DS1)
import(VarSelLCM)
import(cluster)
import(dplyr)
Expand Down
41 changes: 26 additions & 15 deletions R/varSelLcmDS2.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,16 @@ varSelLcmDS2 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcor
mean <- data_structure[[paste0("Mean_X_",variable_cont[p])]][j]
sd <- data_structure[[paste0("SD_X_",variable_cont[p])]][j]
length_1 <- data_structure$Observations[j]
var_min <- min(df[[variable_cont[p]]])
var_max <- max(df[[variable_cont[p]]])


if(mean - 2*sd < 0){
var_min <- 0
} else {
var_min <- mean - 2*sd
}

var_max <- mean + 2*sd

zzz[[j]] <- rtruncnorm(length_1, mean = mean, sd = sd, a = var_min, b = var_max)

}
Expand Down Expand Up @@ -207,17 +215,18 @@ varSelLcmDS2 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcor

#### adjustment for NA calculations

# for (uu in 1:length(cols)){
for (uu in 1:length(cols)){

# current_vector <- FinalResults_DF %>%
# select(contains(cols[uu])) %>%
# rowSums()
current_vector <- FinalResults_DF %>%
select(contains(cols[uu])) %>%
rowSums()

#FinalResults_DF <- FinalResults_DF %>%
# mutate(across(contains(cols[uu]), ~ .x / current_vector * 100))
FinalResults_DF <- FinalResults_DF %>%
mutate(across(contains(cols[uu]), ~ .x / current_vector * 100))

#}
}



FinalResults_DF <- subset(FinalResults_DF, select = -c(results_values_final.1,
Observations))
Expand Down Expand Up @@ -251,10 +260,11 @@ varSelLcmDS2 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcor
}




matching_vector <- rep(NA, dim(summaries_dataframe)[1])

for (n in 1:num.clust){
for (n in 1:length(assignments)){

first_iteration <- seq(from = 1, to = num.clust)
additional_iteration <- seq(from = (n-1)*num.clust + 1, to = n*num.clust)
Expand All @@ -267,6 +277,7 @@ varSelLcmDS2 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcor
iter1 <- matching_indiv[seq(from = 1, to = num.clust)]
iter2 <- matching_indiv[seq(from = num.clust + 1, to = 2*num.clust)]


pos <- c()
for (ll in 1:length(iter2)){

Expand All @@ -277,13 +288,15 @@ varSelLcmDS2 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcor
}



summaries_dataframe$matching <- matching_vector

#dist_obj <- dist(summaries_dataframe[-1])
#hc_object <- hclust(dist_obj)
#summaries_dataframe$matching <- cutree(hc_object, k = num.clust)



collect <- list()

for(pp in 1:length(assignments)){
Expand All @@ -300,20 +313,18 @@ varSelLcmDS2 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcor

int.var <- subset(int.var, select = c(2))
collect[[pp]] <- int.var
}
}

collect_dataframe <- bind_cols(collect)


cluster_intermediate <- as.numeric(apply(collect_dataframe[1:nrow(df),], 1, function(x) names(which.max(table(x)))))


outcome <- list(matching_vector,
summaries_dataframe,
assignments)



return(outcome)
return(cluster_intermediate)



Expand Down
178 changes: 178 additions & 0 deletions R/varSelLcm_AlternateDS2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
#'
#' @import VarSelLCM
#' @import dplyr
#' @import truncnorm
#' @export
#'


varSelLcm_AlternateDS2 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcores, nbSmall, iterSmall, nbKeep, iterKeep, tolKeep, num.iterations){

df <- eval(parse(text=df), envir = parent.frame())

list_other_study_data <- objects(pattern = "StudyData", envir = parent.frame())


assignments <- list()
summaries <- list()
store_dfs <- list()

check_df_total <- list()

for (ww in 1:num.iterations){





extension_dfs <- list()
for (qqq in 1:length(list_other_study_data)){




data_structure <- eval(parse(text=list_other_study_data[qqq]), envir = parent.frame())
variable_columns <- strsplit(colnames(data_structure)[which(!(grepl("results_|Observations", colnames(data_structure))))], "_X_", fixed = TRUE)

type <- c()
variables <- c()
expression <- c()
for (i in 1:length(variable_columns)){

type[i] <- variable_columns[[i]][1]
variables[i] <- variable_columns[[i]][2]
expression[i] <- variable_columns[[i]][3]

}

variable_overview <- data.frame(type, variables, expression)


not_categ <- variable_overview %>%
filter(!type == "CAT")

categ <- variable_overview %>%
filter(type == "CAT")

variable_cont <- unique(not_categ$variables)

variable_cat <- unique(categ$variables)

zzz <- list()
data_extension <- df[1:sum(data_structure$Observations),]
data_extension[,] <- NA

for (p in 1:length(variable_cont)){
for (j in 1:dim(data_structure)[1]) {

mean <- data_structure[[paste0("Mean_X_",variable_cont[p])]][j]
sd <- data_structure[[paste0("SD_X_",variable_cont[p])]][j]
length_1 <- data_structure$Observations[j]


if(mean - 2*sd < 0){
var_min <- 0
} else {
var_min <- mean - 2*sd
}

var_max <- mean + 2*sd

zzz[[j]] <- rtruncnorm(length_1, mean = mean, sd = sd, a = var_min, b = var_max)

}

tmp_collect <- unlist(zzz)
data_extension[[variable_cont[p]]] <- tmp_collect

}

for (p in 1:length(variable_cat)){

current_cat_variable <- data_structure[which(grepl(paste0("_X_" , variable_cat[p], "_X_"), colnames(data_structure), fixed = TRUE))]
uuu <- list()

for (j in 1:dim(data_structure)[1]){

lll <- list()

for (k in 1:dim(current_cat_variable)[2]){

lll[[k]] <- rep(as.numeric(strsplit(colnames(current_cat_variable[k]), "_X_")[[1]][3]), current_cat_variable[j,k])

}

tmp_cat <- sample(unlist(lll))
uuu[[j]] <- tmp_cat

}

tmp_coll <- unlist(uuu)
data_extension[[variable_cat[p]]] <- as.factor(tmp_coll)

}

for (u in 1:length(colnames(df))){

if(class(df[[u]]) == "integer"){

data_extension[[u]] <- as.integer(round(data_extension[[u]]))

}

}



extension_dfs[[qqq]] <- data_extension



#### qqq ends here

}


data_extension_full <- bind_rows(extension_dfs)

store_dfs[[ww]] <- data_extension_full

}

additional_dfs <- bind_rows(store_dfs)
dataframe_pooled <- rbind(df, additional_dfs)


set.seed(42)
FinalResults <- VarSelLCM::VarSelCluster(x = dataframe_pooled,
gvals = num.clust,
vbleSelec = vbleSelec,
crit.varsel = crit.varsel,
initModel = initModel,
nbcores = nbcores,
nbSmall = nbSmall,
iterSmall = iterSmall,
nbKeep = nbKeep,
iterKeep = iterKeep,
tolKeep = tolKeep)


results_values_final <- fitted(FinalResults)

cluster_intermediate <- results_values_final[1:nrow(df)]




return(cluster_intermediate)





}





33 changes: 33 additions & 0 deletions R/varSelLcm_DA_DS1.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#'
#' @import VarSelLCM
#' @import dplyr
#' @export
#'


varSelLcm_DA_DS1 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcores, nbSmall, iterSmall, nbKeep, iterKeep, tolKeep){

df <- eval(parse(text=df), envir = parent.frame())


# Cuts the tree
results <- VarSelLCM::VarSelCluster(x = df,
gvals = num.clust,
vbleSelec = vbleSelec,
crit.varsel = crit.varsel,
initModel = initModel,
nbcores = nbcores,
nbSmall = nbSmall,
iterSmall = iterSmall,
nbKeep = nbKeep,
iterKeep = iterKeep,
tolKeep = tolKeep)


results_values <- fitted(results)

return(results_values)



}

0 comments on commit 49e1a4a

Please sign in to comment.