Skip to content

Commit

Permalink
fix asterisks in gene names (#199)
Browse files Browse the repository at this point in the history
* fix asterisks in gene names

* Add check before editing feature names

---------

Co-authored-by: bbimber <[email protected]>
  • Loading branch information
GWMcElfresh and bbimber authored Dec 14, 2023
1 parent a50dfa2 commit 0065bcc
Showing 1 changed file with 13 additions and 3 deletions.
16 changes: 13 additions & 3 deletions R/PseudoBulk.R
Original file line number Diff line number Diff line change
Expand Up @@ -606,11 +606,20 @@ FitRegularizedClassificationGlm <- function(seuratObj,
#drop rownames column
dplyr::select(-dplyr::all_of("Row.names"))

#fix gene names like MAMU-A with an easily replaceable "dash" and "leadingNumber"
#fix gene names like MAMU-A or UGT2B9*2 with an easily replaceable and uniquely mapping "dash", "leadingNumber", or "star".

# Perform a test to ensure we wont get conflicts:
for (token in c("dash", "leadingNumber", "star")) {
if (any(grepl(colnames(target_labeled_data), pattern = token))) {
matches <- grep(colnames(target_labeled_data), pattern = token, value = TRUE)
stop(paste0('The input feature names contained the unexpected pattern: ', token, '. Feature(s) were: ', paste0(matches, collapse = ', ')))
}
}
colnames(target_labeled_data) <-
gsub("-", "dash", colnames(target_labeled_data))
colnames(target_labeled_data) <-
gsub("^[0-9]", "leadingNumber", colnames(target_labeled_data))
colnames(target_labeled_data) <- gsub("\\*", "star", colnames(target_labeled_data))

##set up task
task_metadata_classification <- mlr3::as_task_classif(target_labeled_data,
Expand Down Expand Up @@ -643,9 +652,10 @@ FitRegularizedClassificationGlm <- function(seuratObj,
classification_features_for_class)
}

#put the dashes back in the feature names and delete the "leadingNumber" prefix
#put the dashes back in the feature names, delete the "leadingNumber" prefix, and replace "star" with asterisks.
classification_features <- gsub("dash", "-", classification_features)
classification_features <- gsub("leadingNumber", "", classification_features)
classification_features <- gsub("^leadingNumber", "", classification_features)
classification_features <- gsub("star", "*", classification_features)

#return either a vector of genes or both a model and vector of genes.
if (!returnModelAndSplits) {
Expand Down

0 comments on commit 0065bcc

Please sign in to comment.