diff --git a/R/PseudoBulk.R b/R/PseudoBulk.R index e505ce66..377daa2e 100644 --- a/R/PseudoBulk.R +++ b/R/PseudoBulk.R @@ -606,11 +606,20 @@ FitRegularizedClassificationGlm <- function(seuratObj, #drop rownames column dplyr::select(-dplyr::all_of("Row.names")) - #fix gene names like MAMU-A with an easily replaceable "dash" and "leadingNumber" + #fix gene names like MAMU-A or UGT2B9*2 with an easily replaceable and uniquely mapping "dash", "leadingNumber", or "star". + + # Perform a test to ensure we wont get conflicts: + for (token in c("dash", "leadingNumber", "star")) { + if (any(grepl(colnames(target_labeled_data), pattern = token))) { + matches <- grep(colnames(target_labeled_data), pattern = token, value = TRUE) + stop(paste0('The input feature names contained the unexpected pattern: ', token, '. Feature(s) were: ', paste0(matches, collapse = ', '))) + } + } colnames(target_labeled_data) <- gsub("-", "dash", colnames(target_labeled_data)) colnames(target_labeled_data) <- gsub("^[0-9]", "leadingNumber", colnames(target_labeled_data)) + colnames(target_labeled_data) <- gsub("\\*", "star", colnames(target_labeled_data)) ##set up task task_metadata_classification <- mlr3::as_task_classif(target_labeled_data, @@ -643,9 +652,10 @@ FitRegularizedClassificationGlm <- function(seuratObj, classification_features_for_class) } - #put the dashes back in the feature names and delete the "leadingNumber" prefix + #put the dashes back in the feature names, delete the "leadingNumber" prefix, and replace "star" with asterisks. classification_features <- gsub("dash", "-", classification_features) - classification_features <- gsub("leadingNumber", "", classification_features) + classification_features <- gsub("^leadingNumber", "", classification_features) + classification_features <- gsub("star", "*", classification_features) #return either a vector of genes or both a model and vector of genes. if (!returnModelAndSplits) {