Merge pull request #101 from Ludwigm6/master

trainDI update Example
HannaMeyer · Mar 13, 2024 · faed78b · faed78b
2 parents 08a838a + 3ce2800
commit faed78b
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 48 deletions.
diff --git a/R/trainDI.R b/R/trainDI.R
@@ -60,46 +60,35 @@
 #' library(sf)
 #' library(terra)
 #' library(caret)
-#' library(viridis)
-#' library(ggplot2)
+#' library(CAST)
 #'
 #' # prepare sample data:
-#' dat <- readRDS(system.file("extdata","Cookfarm.RDS",package="CAST"))
-#' dat <- aggregate(dat[,c("VW","Easting","Northing")],by=list(as.character(dat$SOURCEID)),mean)
-#' pts <- st_as_sf(dat,coords=c("Easting","Northing"))
-#' pts$ID <- 1:nrow(pts)
-#' set.seed(100)
-#' pts <- pts[1:30,]
-#' studyArea <- rast(system.file("extdata","predictors_2012-03-25.tif",package="CAST"))[[1:8]]
-#' trainDat <- extract(studyArea,pts,na.rm=FALSE)
-#' trainDat <- merge(trainDat,pts,by.x="ID",by.y="ID")
-#'
-#' # visualize data spatially:
-#' plot(studyArea)
-#' plot(studyArea$DEM)
-#' plot(pts[,1],add=TRUE,col="black")
+#' data("splotdata")
+#' splotdata = st_drop_geometry(splotdata)
 #'
 #' # train a model:
 #' set.seed(100)
-#' variables <- c("DEM","NDRE.Sd","TWI")
-#' model <- train(trainDat[,which(names(trainDat)%in%variables)],
-#' trainDat$VW, method="rf", importance=TRUE, tuneLength=1,
-#' trControl=trainControl(method="cv",number=5,savePredictions=T))
-#' print(model) #note that this is a quite poor prediction model
-#' prediction <- predict(studyArea,model,na.rm=TRUE)
+#' model <- caret::train(splotdata[,6:16],
+#'                       splotdata$Species_richness,
+#'                       importance=TRUE, tuneLength=1, ntree = 15, method = "rf",
+#'                       trControl = trainControl(method="cv", number=5, savePredictions=T))
+#' # variable importance is used for scaling predictors
 #' plot(varImp(model,scale=FALSE))
 #'
-#' #...then calculate the DI of the trained model:
+#' # calculate the DI of the trained model:
 #' DI = trainDI(model=model)
 #' plot(DI)
 #'
 #' #...or calculate the DI and LPD of the trained model:
 #' # DI = trainDI(model=model, LPD = TRUE)
 #'
 #' # the DI can now be used to compute the AOA (here with LPD):
+#' studyArea = rast(system.file("extdata/predictors_chile.tif", package = "CAST"))
 #' AOA = aoa(studyArea, model = model, trainDI = DI, LPD = TRUE, maxLPD = 1)
 #' print(AOA)
 #' plot(AOA)
+#' plot(AOA$AOA)
+#' plot(AOA$LPD)
 #' }
 #'
 
@@ -198,12 +187,14 @@ trainDI <- function(model = NA,
   for(i in seq(nrow(train))){
 
     # distance to all other training data (for average)
-    trainDistAll   <- .alldistfun(t(train[i,]), train,  method, S_inv=S_inv)[-1]
-    trainDist_avrg <- append(trainDist_avrg, mean(trainDistAll, na.rm = TRUE))
+    ## redundant distance calculation (removed 13.03.24)
+    #trainDistAll   <- .alldistfun(t(train[i,]), train,  method, S_inv=S_inv)[-1]
+    #trainDist_avrg <- append(trainDist_avrg, mean(trainDistAll, na.rm = TRUE))
 
     # calculate  distance to other training data:
     trainDist      <- matrix(.alldistfun(t(matrix(train[i,])), train, method, sorted = FALSE, S_inv))
     trainDist[i]   <- NA
+    trainDist_avrg <- append(trainDist_avrg, mean(trainDist, na.rm = TRUE))
 
 
     # mask of any data that are not used for training for the respective data point (using CV)

diff --git a/man/trainDI.Rd b/man/trainDI.Rd