Include CRAN submission changes

OHDSI · Feb 11, 2025 · d5f8451 · d5f8451
2 parents e54690f + ca5a910
commit d5f8451
Show file tree

Hide file tree

Showing 39 changed files with 148 additions and 96 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -15,3 +15,5 @@ _pkgdown.yml
 ^vignettes/Videos.Rmd
 ^doc$
 ^Meta$
+^CRAN-RELEASE$
+^cran-comments\.md$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: PatientLevelPrediction
 Type: Package
-Title: Developing Patient Level Prediction Models Using the Observational Medical Outcomes Partnership Common Data Model
+Title: Develop Clinical Prediction Models Using the Common Data Model
 Version: 6.4.0
 Date: 2025-02-05
 Authors@R: c(
@@ -11,11 +11,12 @@ Authors@R: c(
     person("Patrick", "Ryan", role = c("aut")),
     person("Peter", "Rijnbeek", role = c("aut")),
     person("Observational Health Data Science and Informatics", role = c("cph")))
-Description: A user friendly way to create patient level prediction models using the OMOP (ObservationalMedical Outcomes Partnership) common data model. Given a
-    cohort of interest and an outcome of interest, the package can use data in the
-    OMOP Common Data Model to build a large set of features. These features can then
-    be used to fit a predictive model with a number of machine learning algorithms.
-    This is further described in J. Reps et al <doi:10.1093/jamia/ocy032>. 
+Description: A user friendly way to create patient level prediction models using
+  the Observational Medical Outcomes Partnership Common Data Model. Given a cohort
+  of interest and an outcome of interest, the package can use data in the Common
+  Data Model to build a large set of features. These features can then be used to
+  fit a predictive model with a number of machine learning algorithms. This is
+  further described in Reps (2017) <doi:10.1093/jamia/ocy032>.
 License: Apache License 2.0
 URL: https://ohdsi.github.io/PatientLevelPrediction/, https://github.com/OHDSI/PatientLevelPrediction
 BugReports: https://github.com/OHDSI/PatientLevelPrediction/issues

diff --git a/R/DiagnosePlp.R b/R/DiagnosePlp.R
@@ -45,8 +45,11 @@ diagnoseMultiplePlp <- function(
       timeStamp = TRUE,
       logName = "diagnosePlp Log"
     ),
-    saveDirectory = getwd()) {
+    saveDirectory = NULL) {
   # input checks
+  if (is.null(saveDirectory)) {
+    stop("saveDirectory must be specified")
+  }
   checkIsClass(databaseDetails, c("databaseDetails"))
   checkIsClass(modelDesignList, c("list", "modelDesign"))
   checkIsClass(logSettings, "logSettings")

diff --git a/R/ExternalValidatePlp.R b/R/ExternalValidatePlp.R
@@ -208,9 +208,12 @@ externalValidateDbPlp <- function(plpModel,
                                   validationRestrictPlpDataSettings = createRestrictPlpDataSettings(),
                                   settings = createValidationSettings(recalibrate = "weakRecalibration"),
                                   logSettings = createLogSettings(verbosity = "INFO", logName = "validatePLP"),
-                                  outputFolder = getwd()) {
+                                  outputFolder = NULL) {
   # Input checks
   # =======
+  if (is.null(outputFolder)) {
+    stop("outputFolder must be specified")
+  }
 
   checkIsClass(plpModel, "plpModel")
 

diff --git a/R/FeatureEngineering.R b/R/FeatureEngineering.R
@@ -80,8 +80,8 @@ createFeatureEngineeringSettings <- function(type = "none") {
 #' @return
 #' An object of class \code{featureEngineeringSettings}
 #' @examples
-#' \dontrun{ \dontshow{ # requires python and scikit-learn }
-#' # create a feature selection that selects the 100 most associated features
+#' \dontshow{ # dontrun reason: requires python and scikit-learn }
+#' \dontrun{ #' # create a feature selection that selects the 100 most associated features
 #' featureSelector <- createUnivariateFeatureSelection(k = 100) 
 #' }
 #' @export
@@ -124,8 +124,8 @@ createUnivariateFeatureSelection <- function(k = 100) {
 #' @return
 #' An object of class \code{featureEngineeringSettings}
 #' @examples
-#' \dontrun{ \dontshow{ # requires python and scikit-learn }
-#' featureSelector <- createRandomForestFeatureSelection(ntrees = 2000, maxDepth = 10)
+#' \dontshow{ # dontrun reason: requires python and scikit-learn }
+#' \dontrun{ #' featureSelector <- createRandomForestFeatureSelection(ntrees = 2000, maxDepth = 10)
 #' }
 #' @export
 createRandomForestFeatureSelection <- function(ntrees = 2000, maxDepth = 17) {

diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R
@@ -101,7 +101,8 @@ listAppend <- function(a, b) {
 #' @param condaPythonVersion String, Python version to use when creating a conda environment
 #' @return location of the created conda or virtual python environment
 #' @examples
-#' \dontrun{ \dontshow{ # don't modify environment in examples }
+#' \dontshow{ # dontrun reason: don't modify environment in examples }
+#' \dontrun{ 
 #'  configurePython(envname="PLP", envtype="conda")
 #' }
 #' @export
@@ -159,8 +160,8 @@ configurePython <- function(envname = "PLP", envtype = NULL, condaPythonVersion
 #'
 #' @return A string indicating the which python environment will be used
 #' @examples
-#' \dontrun{ \dontshow{ # don't modify environment in examples }
-#' # create a conda environment named PLP
+#' \dontshow{ # dontrun reason: don't modify environment in examples }
+#' \dontrun{ #' # create a conda environment named PLP
 #' configurePython(envname="PLP", envtype="conda")
 #' }
 #' @export

diff --git a/R/LearningCurve.R b/R/LearningCurve.R
@@ -85,7 +85,7 @@ createLearningCurve <- function(
     parallel = TRUE,
     cores = 4,
     modelSettings,
-    saveDirectory = getwd(),
+    saveDirectory = NULL,
     analysisId = "learningCurve",
     populationSettings = createStudyPopulationSettings(),
     splitSettings = createDefaultSplitSetting(),
@@ -106,6 +106,10 @@ createLearningCurve <- function(
       runModelDevelopment = TRUE,
       runCovariateSummary = FALSE
     )) {
+  if (is.null(saveDirectory)) {
+    stop("saveDirectory must be specified")
+  }
+
   if (is.null(analysisId)) {
     analysisId <- gsub(":", "", gsub("-", "", gsub(" ", "", Sys.time())))
   }

diff --git a/R/LightGBM.R b/R/LightGBM.R
@@ -30,6 +30,7 @@
 #' @param scalePosWeight    Controls weight of positive class in loss - useful for imbalanced classes
 #' @param isUnbalance       This parameter cannot be used at the same time with scalePosWeight, choose only one of them. While enabling this should increase the overall performance metric of your model, it will also result in poor estimates of the individual class probabilities.
 #' @param seed              An option to add a seed when training the final model
+#' @return A list of settings that can be used to train a model with \code{runPlp}
 #'
 #' @examplesIf rlang::is_installed("lightgbm")
 #' modelLightGbm <- setLightGBM(

diff --git a/R/Logging.R b/R/Logging.R
@@ -64,8 +64,11 @@ createLog <- function(
     verbosity = "INFO",
     timeStamp = FALSE,
     logName = "PLP Log",
-    saveDirectory = getwd(),
+    saveDirectory = NULL,
     logFileName = paste0("plpLog", as.Date(Sys.Date(), "%Y%m%d"), ".txt")) {
+  if (is.null(saveDirectory)) {
+    stop("saveDirectory for logging must be specified")
+  }
   createDir(saveDirectory)
 
   logFileName <- gsub("[[:punct:]]", "", logFileName)

diff --git a/R/RunMultiplePlp.R b/R/RunMultiplePlp.R
@@ -98,9 +98,13 @@ runMultiplePlp <- function(
       timeStamp = TRUE,
       logName = "runPlp Log"
     ),
-    saveDirectory = getwd(),
+    saveDirectory = NULL,
     sqliteLocation = file.path(saveDirectory, "sqlite")) {
   # input checks
+  if (is.null(saveDirectory)) {
+    stop("saveDirectory must be specified")
+  }
+
   checkIsClass(databaseDetails, c("databaseDetails"))
   checkIsClass(modelDesignList, c("list", "modelDesign"))
   checkIsClass(onlyFetchData, "logical")

diff --git a/R/RunPlp.R b/R/RunPlp.R
@@ -113,10 +113,14 @@ runPlp <- function(
     logName = "runPlp Log"
     ),
   executeSettings = createDefaultExecuteSettings(),
-  saveDirectory = getwd()
+  saveDirectory =  NULL
 ) {
   start <- Sys.time()
 
+  if (is.null(saveDirectory)) {
+    stop("Please provide a saveDirectory")
+  }
+
   # start log 
   analysisPath <- file.path(saveDirectory, analysisId)
   logSettings$saveDirectory <- analysisPath

diff --git a/R/SklearnClassifierSettings.R b/R/SklearnClassifierSettings.R
@@ -24,7 +24,8 @@
 #' @param seed           A seed for the model
 #' @return a modelSettings object
 #' @examples
-#' \dontrun{ \dontshow { # requires python's scikit-learn, checkSklearn() will error without it }
+#' \dontshow{ # dontrun reason: requires python's scikit-learn, checkSklearn() will error without it }
+#' \dontrun{ 
 #' model <- setAdaBoost(nEstimators = list(10),
 #'                      learningRate = list(0.1),
 #'                      seed = 42)
@@ -123,7 +124,8 @@ AdaBoostClassifierInputs <- function(classifier, param) {
 #' @param seed                The random state seed
 #' @return a modelSettings object
 #' @examples
-#' \dontrun{ \dontshow{ # requires python's scikit-learn, checkSklearn() will error without it }
+#' \dontshow{ # dontrun reason: requires python's scikit-learn, checkSklearn() will error without it }
+#' \dontrun{ 
 #' model <- setDecisionTree(criterion = list("gini"),
 #'                          maxDepth = list(4),
 #'                          minSamplesSplit = list(2),
@@ -383,7 +385,8 @@ DecisionTreeClassifierInputs <- function(classifier, param) {
 #' @return a modelSettings object
 #'
 #' @examples
-#' \dontrun{ \dontshow { # requires python's scikit-learn, checkSklearn() will error without it }
+#' \dontshow{ # dontrun reason: requires python's scikit-learn, checkSklearn() will error without it } 
+#' \dontrun{ 
 #' model <- setMLP(hiddenLayerSizes = list(c(20)), alpha=list(3e-4), seed = 42)
 #' }
 #' @export
@@ -542,7 +545,8 @@ MLPClassifierInputs <- function(classifier, param) {
 #'
 #' @return a modelSettings object
 #' @examples
-#' \dontrun{ \dontshow{ # requires python's scikit-learn, checkSklearn() will error without it}
+#' \dontshow{ # dontrun reason: requires python's scikit-learn, checkSklearn() will error without it }
+#' \dontrun{ 
 #' plpData <- getEunomiaPlpData()
 #' model <- setNaiveBayes()
 #' analysisId <- "naiveBayes"
@@ -612,7 +616,8 @@ GaussianNBInputs <- function(classifier, param) {
 #' @param seed  A seed when training the final model
 #' @return a modelSettings object
 #' @examples
-#' \dontrun{ \dontshow{ # requires python's scikit-learn, checkSklearn() will error without it }
+#' \dontshow{ # dontrun reason: requires python's scikit-learn, checkSklearn() will error without it }
+#' \dontrun{ 
 #' plpData <- getEunomiaPlpData()
 #' model <- setRandomForest(ntrees = list(100),
 #'                           maxDepth = list(4),
@@ -785,7 +790,8 @@ RandomForestClassifierInputs <- function(classifier, param) {
 #' @param seed           A seed for the model
 #' @return a modelSettings object
 #' @examples
-#' \dontrun{ \dontshow{ # requires python's scikit-learn, checkSklearn() will error without it }
+#' \dontshow{ # dontrun reason: requires python's scikit-learn, checkSklearn() will error without it }
+#' \dontrun{ 
 #' plpData <- getEunomiaPlpData()
 #' model <- setSVM(C = list(1), gamma = list("scale"), seed = 42)
 #' saveLoc <- file.path(tempdir(), "svm")

diff --git a/R/SklearnToJson.R b/R/SklearnToJson.R
@@ -21,13 +21,16 @@
 #' @param     path  path to the saved model file
 #' @return    nothing, saves the model to the path as json
 #' @examples
-#' \dontrun{ \dontshow{ # requires python environment with sklearn }
+#' \dontshow{ # dontrun reason: requires python environment with sklearn }
+#' \dontrun{ 
 #' sklearn <- reticulate::import("sklearn", convert = FALSE)
 #' model <- sklearn$tree$DecisionTreeClassifier()
 #' model$fit(sklearn$datasets$load_iris()$data, sklearn$datasets$load_iris()$target)
-#' saveLoc <- file.path(tempdir() "model.json")
+#' saveLoc <- file.path(tempdir(), "model.json")
 #' sklearnToJson(model, saveLoc)
+#' # the model.json is saved in the tempdir
 #' dir(tempdir())
+#' # clean up
 #' unlink(saveLoc)
 #' }
 #' @export
@@ -61,7 +64,8 @@ sklearnToJson <- function(model, path) {
 #' @return    a sklearn python model object
 #' @export
 #' @examples
-#' \dontrun{ \dontshow{ # requires python environment with sklearn }
+#' \dontshow{ # dontrun reason: requires python environment with sklearn }
+#' \dontrun{ 
 #' plpData <- getEunomiaPlpData()
 #' modelSettings <- setDecisionTree(maxDepth = list(3), minSamplesSplit = list(2),
 #'                                   minSamplesLeaf = list(1), maxFeatures = list(100))

diff --git a/README.md b/README.md
@@ -14,7 +14,7 @@ Introduction
 
 PatientLevelPrediction is an R package for building and validating patient-level predictive models using data in the OMOP Common Data Model format.  
 
-Reps JM, Schuemie MJ, Suchard MA, Ryan PB, Rijnbeek PR. [Design and implementation of a standardized framework to generate and evaluate patient-level prediction models using observational healthcare data.](https://academic.oup.com/jamia/article/25/8/969/4989437) J Am Med Inform Assoc. 2018;25(8):969-975.
+Reps JM, Schuemie MJ, Suchard MA, Ryan PB, Rijnbeek PR. [Design and implementation of a standardized framework to generate and evaluate patient-level prediction models using observational healthcare data.](https://doi.org/10.1093/jamia/ocy032) J Am Med Inform Assoc. 2018;25(8):969-975.
 
 The figure below illustrates the prediction problem we address. Among a population at risk, we aim to predict which patients at a defined moment in time (t = 0) will experience some outcome during a time-at-risk. Prediction is done using only information about the patients in an observation window prior to that moment in time.
 

diff --git a/man/configurePython.Rd b/man/configurePython.Rd
diff --git a/man/createLearningCurve.Rd b/man/createLearningCurve.Rd
diff --git a/man/createRandomForestFeatureSelection.Rd b/man/createRandomForestFeatureSelection.Rd
diff --git a/man/createUnivariateFeatureSelection.Rd b/man/createUnivariateFeatureSelection.Rd
diff --git a/man/diagnoseMultiplePlp.Rd b/man/diagnoseMultiplePlp.Rd
diff --git a/man/externalValidateDbPlp.Rd b/man/externalValidateDbPlp.Rd
diff --git a/man/predictPlp.Rd b/man/predictPlp.Rd
diff --git a/man/runMultiplePlp.Rd b/man/runMultiplePlp.Rd
diff --git a/man/runPlp.Rd b/man/runPlp.Rd
diff --git a/man/setAdaBoost.Rd b/man/setAdaBoost.Rd
diff --git a/man/setDecisionTree.Rd b/man/setDecisionTree.Rd
diff --git a/man/setLightGBM.Rd b/man/setLightGBM.Rd