update

FlorianSchw · Jun 13, 2023 · 49e1a4a · 49e1a4a
1 parent 9bc834c
commit 49e1a4a
Show file tree

Hide file tree

Showing 5 changed files with 243 additions and 17 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -19,13 +19,15 @@ AggregateMethods:
   clusterPlotDS,
   nbclustDS,
   nbclustVisualDS,
-  varSelLcmDS1,
-  varSelLcmDS2
+  varSelLcmDS1
 AssignMethods:
   cutreeDS,
   distDS,
   hclustDS,
   kmeansDS,
+  varSelLcm_DA_DS1,
+  varSelLcmDS2,
+  varSelLcm_AlternateDS2,
   varSelLcmDS3
 RoxygenNote: 7.2.3
 Encoding: UTF-8

diff --git a/NAMESPACE b/NAMESPACE
@@ -10,6 +10,8 @@ export(nbclustVisualDS)
 export(varSelLcmDS1)
 export(varSelLcmDS2)
 export(varSelLcmDS3)
+export(varSelLcm_AlternateDS2)
+export(varSelLcm_DA_DS1)
 import(VarSelLCM)
 import(cluster)
 import(dplyr)

diff --git a/R/varSelLcmDS2.R b/R/varSelLcmDS2.R
@@ -68,8 +68,16 @@ varSelLcmDS2 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcor
       mean <- data_structure[[paste0("Mean_X_",variable_cont[p])]][j]
       sd <- data_structure[[paste0("SD_X_",variable_cont[p])]][j]
       length_1 <- data_structure$Observations[j]
-      var_min <- min(df[[variable_cont[p]]])
-      var_max <- max(df[[variable_cont[p]]])
+
+
+      if(mean - 2*sd < 0){
+        var_min <- 0
+      } else {
+        var_min <- mean - 2*sd
+      }
+
+      var_max <- mean + 2*sd
+
       zzz[[j]] <- rtruncnorm(length_1, mean = mean, sd = sd, a = var_min, b = var_max)
 
     }
@@ -207,17 +215,18 @@ varSelLcmDS2 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcor
 
   #### adjustment for NA calculations
 
- # for (uu in 1:length(cols)){
+  for (uu in 1:length(cols)){
 
-  #  current_vector <- FinalResults_DF %>%
-   #   select(contains(cols[uu])) %>%
-    #  rowSums()
+    current_vector <- FinalResults_DF %>%
+      select(contains(cols[uu])) %>%
+      rowSums()
 
-    #FinalResults_DF <- FinalResults_DF %>%
-     # mutate(across(contains(cols[uu]), ~ .x / current_vector * 100))
+    FinalResults_DF <- FinalResults_DF %>%
+      mutate(across(contains(cols[uu]), ~ .x / current_vector * 100))
 
-  #}
+  }
 
+
 
   FinalResults_DF <- subset(FinalResults_DF, select = -c(results_values_final.1,
                                                          Observations))
@@ -251,10 +260,11 @@ varSelLcmDS2 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcor
   }
 
 
+
 
   matching_vector <- rep(NA, dim(summaries_dataframe)[1])
 
-  for (n in 1:num.clust){
+  for (n in 1:length(assignments)){
 
     first_iteration <- seq(from = 1, to = num.clust)
     additional_iteration <- seq(from = (n-1)*num.clust + 1, to = n*num.clust)
@@ -267,6 +277,7 @@ varSelLcmDS2 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcor
     iter1 <- matching_indiv[seq(from = 1, to = num.clust)]
     iter2 <- matching_indiv[seq(from = num.clust + 1, to = 2*num.clust)]
 
+
     pos <- c()
     for (ll in 1:length(iter2)){
 
@@ -277,13 +288,15 @@ varSelLcmDS2 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcor
   }
 
 
+
   summaries_dataframe$matching <- matching_vector
 
   #dist_obj <- dist(summaries_dataframe[-1])
   #hc_object <- hclust(dist_obj)
   #summaries_dataframe$matching <- cutree(hc_object, k = num.clust)
 
 
+
   collect <- list()
 
   for(pp in 1:length(assignments)){
@@ -300,20 +313,18 @@ varSelLcmDS2 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcor
 
     int.var <- subset(int.var, select = c(2))
     collect[[pp]] <- int.var
-  }
+ }
 
   collect_dataframe <- bind_cols(collect)
 
 
   cluster_intermediate <- as.numeric(apply(collect_dataframe[1:nrow(df),], 1, function(x) names(which.max(table(x)))))
 
 
-  outcome <- list(matching_vector,
-                  summaries_dataframe,
-                  assignments)
+
 
 
-  return(outcome)
+  return(cluster_intermediate)
 
 
 

diff --git a/R/varSelLcm_AlternateDS2.R b/R/varSelLcm_AlternateDS2.R
@@ -0,0 +1,178 @@
+#'
+#' @import VarSelLCM
+#' @import dplyr
+#' @import truncnorm
+#' @export
+#' 
+
+
+varSelLcm_AlternateDS2 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcores, nbSmall, iterSmall, nbKeep, iterKeep, tolKeep, num.iterations){
+
+  df <- eval(parse(text=df), envir = parent.frame())  
+
+  list_other_study_data <- objects(pattern = "StudyData", envir = parent.frame())
+
+
+  assignments <- list()
+  summaries <- list()
+  store_dfs <- list()
+
+  check_df_total <- list()
+
+  for (ww in 1:num.iterations){
+
+
+
+
+
+    extension_dfs <- list()
+    for (qqq in 1:length(list_other_study_data)){
+
+
+
+
+      data_structure <- eval(parse(text=list_other_study_data[qqq]), envir = parent.frame())  
+      variable_columns <- strsplit(colnames(data_structure)[which(!(grepl("results_|Observations", colnames(data_structure))))], "_X_", fixed = TRUE)
+
+      type <- c()
+      variables <- c()
+      expression <- c()
+      for (i in 1:length(variable_columns)){
+
+        type[i] <- variable_columns[[i]][1]
+        variables[i] <- variable_columns[[i]][2]
+        expression[i] <- variable_columns[[i]][3]
+
+      }
+
+      variable_overview <- data.frame(type, variables, expression)
+
+
+      not_categ <- variable_overview %>%
+        filter(!type == "CAT")
+
+      categ <- variable_overview %>%
+        filter(type == "CAT")
+
+      variable_cont <- unique(not_categ$variables)
+
+      variable_cat <- unique(categ$variables)
+
+      zzz <- list()
+      data_extension <- df[1:sum(data_structure$Observations),]
+      data_extension[,] <-  NA
+
+      for (p in 1:length(variable_cont)){
+        for (j in 1:dim(data_structure)[1]) {
+
+          mean <- data_structure[[paste0("Mean_X_",variable_cont[p])]][j]
+          sd <- data_structure[[paste0("SD_X_",variable_cont[p])]][j]
+          length_1 <- data_structure$Observations[j]
+
+
+          if(mean - 2*sd < 0){
+            var_min <- 0
+          } else {
+            var_min <- mean - 2*sd
+          }
+
+          var_max <- mean + 2*sd
+
+          zzz[[j]] <- rtruncnorm(length_1, mean = mean, sd = sd, a = var_min, b = var_max)
+
+        }
+
+        tmp_collect <- unlist(zzz)
+        data_extension[[variable_cont[p]]] <- tmp_collect
+
+      }
+
+      for (p in 1:length(variable_cat)){
+
+        current_cat_variable <- data_structure[which(grepl(paste0("_X_" , variable_cat[p], "_X_"), colnames(data_structure), fixed = TRUE))]
+        uuu <- list()
+
+        for (j in 1:dim(data_structure)[1]){
+
+          lll <- list()
+
+          for (k in 1:dim(current_cat_variable)[2]){
+
+            lll[[k]] <- rep(as.numeric(strsplit(colnames(current_cat_variable[k]), "_X_")[[1]][3]), current_cat_variable[j,k])
+
+          } 
+
+          tmp_cat <- sample(unlist(lll))
+          uuu[[j]] <- tmp_cat
+
+        }
+
+        tmp_coll <- unlist(uuu)
+        data_extension[[variable_cat[p]]] <- as.factor(tmp_coll)
+
+      }
+
+      for (u in 1:length(colnames(df))){
+
+        if(class(df[[u]]) == "integer"){
+
+          data_extension[[u]] <- as.integer(round(data_extension[[u]]))
+
+        }
+
+      }
+
+
+
+      extension_dfs[[qqq]] <- data_extension
+
+
+
+      #### qqq ends here
+
+    }
+
+
+    data_extension_full <- bind_rows(extension_dfs)
+
+    store_dfs[[ww]] <- data_extension_full
+
+  }
+
+  additional_dfs <- bind_rows(store_dfs)
+  dataframe_pooled <- rbind(df, additional_dfs)
+
+
+  set.seed(42)
+  FinalResults <- VarSelLCM::VarSelCluster(x = dataframe_pooled,
+                                             gvals = num.clust,
+                                             vbleSelec = vbleSelec,
+                                             crit.varsel = crit.varsel,
+                                             initModel = initModel,
+                                             nbcores = nbcores,
+                                             nbSmall = nbSmall,
+                                             iterSmall = iterSmall,
+                                             nbKeep = nbKeep,
+                                             iterKeep = iterKeep,
+                                             tolKeep = tolKeep) 
+
+
+   results_values_final <- fitted(FinalResults)
+
+   cluster_intermediate <- results_values_final[1:nrow(df)]
+
+
+
+
+  return(cluster_intermediate)
+
+
+
+
+
+}
+
+
+
+
+
diff --git a/R/varSelLcm_DA_DS1.R b/R/varSelLcm_DA_DS1.R
@@ -0,0 +1,33 @@
+#'
+#' @import VarSelLCM
+#' @import dplyr
+#' @export
+#' 
+
+
+varSelLcm_DA_DS1 <- function(df, num.clust, vbleSelec, crit.varsel, initModel, nbcores, nbSmall, iterSmall, nbKeep, iterKeep, tolKeep){
+
+  df <- eval(parse(text=df), envir = parent.frame())  
+
+
+  # Cuts the tree
+  results <- VarSelLCM::VarSelCluster(x = df,
+                                      gvals = num.clust,
+                                      vbleSelec = vbleSelec,
+                                      crit.varsel = crit.varsel,
+                                      initModel = initModel,
+                                      nbcores = nbcores,
+                                      nbSmall = nbSmall,
+                                      iterSmall = iterSmall,
+                                      nbKeep = nbKeep,
+                                      iterKeep = iterKeep,
+                                      tolKeep = tolKeep) 
+
+
+  results_values <- fitted(results)
+
+  return(results_values)
+
+
+
+}