diff --git a/R/trainDI.R b/R/trainDI.R index 5e65a356..7006f598 100644 --- a/R/trainDI.R +++ b/R/trainDI.R @@ -60,36 +60,22 @@ #' library(sf) #' library(terra) #' library(caret) -#' library(viridis) -#' library(ggplot2) +#' library(CAST) #' #' # prepare sample data: -#' dat <- readRDS(system.file("extdata","Cookfarm.RDS",package="CAST")) -#' dat <- aggregate(dat[,c("VW","Easting","Northing")],by=list(as.character(dat$SOURCEID)),mean) -#' pts <- st_as_sf(dat,coords=c("Easting","Northing")) -#' pts$ID <- 1:nrow(pts) -#' set.seed(100) -#' pts <- pts[1:30,] -#' studyArea <- rast(system.file("extdata","predictors_2012-03-25.tif",package="CAST"))[[1:8]] -#' trainDat <- extract(studyArea,pts,na.rm=FALSE) -#' trainDat <- merge(trainDat,pts,by.x="ID",by.y="ID") -#' -#' # visualize data spatially: -#' plot(studyArea) -#' plot(studyArea$DEM) -#' plot(pts[,1],add=TRUE,col="black") +#' data("splotdata") +#' splotdata = st_drop_geometry(splotdata) #' #' # train a model: #' set.seed(100) -#' variables <- c("DEM","NDRE.Sd","TWI") -#' model <- train(trainDat[,which(names(trainDat)%in%variables)], -#' trainDat$VW, method="rf", importance=TRUE, tuneLength=1, -#' trControl=trainControl(method="cv",number=5,savePredictions=T)) -#' print(model) #note that this is a quite poor prediction model -#' prediction <- predict(studyArea,model,na.rm=TRUE) +#' model <- caret::train(splotdata[,6:16], +#' splotdata$Species_richness, +#' importance=TRUE, tuneLength=1, ntree = 15, method = "rf", +#' trControl = trainControl(method="cv", number=5, savePredictions=T)) +#' # variable importance is used for scaling predictors #' plot(varImp(model,scale=FALSE)) #' -#' #...then calculate the DI of the trained model: +#' # calculate the DI of the trained model: #' DI = trainDI(model=model) #' plot(DI) #' @@ -97,9 +83,12 @@ #' # DI = trainDI(model=model, LPD = TRUE) #' #' # the DI can now be used to compute the AOA (here with LPD): +#' studyArea = rast(system.file("extdata/predictors_chile.tif", package = "CAST")) #' AOA = aoa(studyArea, model = model, trainDI = DI, LPD = TRUE, maxLPD = 1) #' print(AOA) #' plot(AOA) +#' plot(AOA$AOA) +#' plot(AOA$LPD) #' } #' @@ -198,12 +187,14 @@ trainDI <- function(model = NA, for(i in seq(nrow(train))){ # distance to all other training data (for average) - trainDistAll <- .alldistfun(t(train[i,]), train, method, S_inv=S_inv)[-1] - trainDist_avrg <- append(trainDist_avrg, mean(trainDistAll, na.rm = TRUE)) + ## redundant distance calculation (removed 13.03.24) + #trainDistAll <- .alldistfun(t(train[i,]), train, method, S_inv=S_inv)[-1] + #trainDist_avrg <- append(trainDist_avrg, mean(trainDistAll, na.rm = TRUE)) # calculate distance to other training data: trainDist <- matrix(.alldistfun(t(matrix(train[i,])), train, method, sorted = FALSE, S_inv)) trainDist[i] <- NA + trainDist_avrg <- append(trainDist_avrg, mean(trainDist, na.rm = TRUE)) # mask of any data that are not used for training for the respective data point (using CV) diff --git a/man/trainDI.Rd b/man/trainDI.Rd index 76d28f6c..5384f0dd 100644 --- a/man/trainDI.Rd +++ b/man/trainDI.Rd @@ -74,36 +74,22 @@ or to facilitate a parallelization of \code{\link{aoa}} by avoiding a repeated c library(sf) library(terra) library(caret) -library(viridis) -library(ggplot2) +library(CAST) # prepare sample data: -dat <- readRDS(system.file("extdata","Cookfarm.RDS",package="CAST")) -dat <- aggregate(dat[,c("VW","Easting","Northing")],by=list(as.character(dat$SOURCEID)),mean) -pts <- st_as_sf(dat,coords=c("Easting","Northing")) -pts$ID <- 1:nrow(pts) -set.seed(100) -pts <- pts[1:30,] -studyArea <- rast(system.file("extdata","predictors_2012-03-25.tif",package="CAST"))[[1:8]] -trainDat <- extract(studyArea,pts,na.rm=FALSE) -trainDat <- merge(trainDat,pts,by.x="ID",by.y="ID") - -# visualize data spatially: -plot(studyArea) -plot(studyArea$DEM) -plot(pts[,1],add=TRUE,col="black") +data("splotdata") +splotdata = st_drop_geometry(splotdata) # train a model: set.seed(100) -variables <- c("DEM","NDRE.Sd","TWI") -model <- train(trainDat[,which(names(trainDat)\%in\%variables)], -trainDat$VW, method="rf", importance=TRUE, tuneLength=1, -trControl=trainControl(method="cv",number=5,savePredictions=T)) -print(model) #note that this is a quite poor prediction model -prediction <- predict(studyArea,model,na.rm=TRUE) +model <- caret::train(splotdata[,6:16], + splotdata$Species_richness, + importance=TRUE, tuneLength=1, ntree = 15, method = "rf", + trControl = trainControl(method="cv", number=5, savePredictions=T)) +# variable importance is used for scaling predictors plot(varImp(model,scale=FALSE)) -#...then calculate the DI of the trained model: +# calculate the DI of the trained model: DI = trainDI(model=model) plot(DI) @@ -111,9 +97,12 @@ plot(DI) # DI = trainDI(model=model, LPD = TRUE) # the DI can now be used to compute the AOA (here with LPD): +studyArea = rast(system.file("extdata/predictors_chile.tif", package = "CAST")) AOA = aoa(studyArea, model = model, trainDI = DI, LPD = TRUE, maxLPD = 1) print(AOA) plot(AOA) +plot(AOA$AOA) +plot(AOA$LPD) } }