-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added ggplot-based graphing to FitCurves
Calls to long-based, multi-series graphing based on participant index. Works with single+ series, likely extensible to the 100's or 1000's if needed. Likely to be supplemented with a "Group" tag or similar, for residual/group comparisons or overall group fit comparisons.
- Loading branch information
Showing
1 changed file
with
222 additions
and
100 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,131 +35,253 @@ | |
## link @ https://cran.r-project.org/web/packages/nlmrt/index.html | ||
## license @ https://cran.r-project.org/web/licenses/GPL-2 | ||
## | ||
## @ggplot2 (R package) = Grammar of Graphics plotting library (Copyright 2016 - Hadley Wickham - GPLv2) | ||
## link @ https://cran.r-project.org/web/packages/ggplot2/index.html | ||
## license @ https://cran.r-project.org/web/licenses/GPL-2 | ||
## | ||
|
||
##' Analyzes purchase task data | ||
##' | ||
##' Analyzes purchase task data | ||
##' @title FitCurves | ||
##' @param mat data frame (long form) of purchase task data. | ||
##' @param equation Character vector of length one. Accepts either "hs" for Hursh and Silberberg (2008) or "koff" for Koffarnus, Franck, Stein, and Bickel (2015). If "hs" and the first price (x) is 0, it will be replaced by replfree. | ||
##' @param k A numeric vector of length one. Reflects the range of consumption in log10 units. If none provided, k will be calculated based on the max/min of the entire sample | ||
##' @param remq0e If TRUE, removes consumption and price where price == 0. Default value is FALSE | ||
##' @param replfree Optionally replaces price == 0 with specified value. Note, if fitting using equation == "hs", and 0 is first price, 0 gets replaced by replfree. Default value is .01 | ||
##' @param rem0 If TRUE, removes all 0s in consumption data prior to analysis. Default value is FALSE. | ||
##' @param plotting If TRUE, removes all 0s in consumption data prior to analysis. Default value is FALSE. | ||
##' @return Data frame, fitting params and CI's/SE's | ||
##' @author Shawn Gilroy <[email protected]> Brent Kaplan <bkaplan4@@ku.edu> | ||
##' @export | ||
FitCurves <- function(mat, equation, k = NULL, remq0e = FALSE, replfree = NULL, rem0 = FALSE) { | ||
FitCurves <- function(mat, equation, k = NULL, remq0e = FALSE, replfree = NULL, rem0 = FALSE, plotting=FALSE) { | ||
|
||
## Assert not Inf | ||
if (is.infinite(k)) | ||
{ | ||
warning("k is Inf. I will calculate a k based on the entire sample.") | ||
k <- log10(max(mat[mat$y > 1, "y"])) - log10(min(mat[mat$y > 1, "y"])) | ||
} | ||
|
||
## If no k is provided | ||
if (is.null(k)) | ||
{ | ||
k <- log10(max(mat[mat$y > 1, "y"])) - log10(min(mat[mat$y > 1, "y"])) | ||
} | ||
|
||
## Get N unique participants, informing loop | ||
participants <- length(unique(mat$p)) | ||
|
||
## Preallocate for speed | ||
cnames <- c("Participant", "Q0e", "BP0", "BP1", "Omaxe", "Pmaxe", "Equation", "Q0", "K", | ||
"R2", "Alpha", "Q0se", "Alphase", "N", "AbsSS", "SdRes", "Q0Low", "Q0High", | ||
"AlphaLow", "AlphaHigh", "EV", "Omaxd", "Pmaxd", "Notes") | ||
|
||
dfres <- data.frame(matrix(vector(), | ||
participants, | ||
length(cnames), | ||
dimnames = list(c(), c(cnames))), stringsAsFactors = FALSE) | ||
|
||
## Loop through unique values as indices, not necessarily sequentially | ||
for (i in unique(mat$p)) | ||
{ | ||
dfres[i, "Participant"] <- i | ||
dfres[i, "Equation"] <- equation | ||
|
||
adf <- NULL | ||
adf <- mat[mat$p == i, ] | ||
adf[, "expend"] <- adf$x * adf$y | ||
adf[, "k"] <- k | ||
|
||
## Find empirical Q0, BP0, BP1 | ||
dfres[i, "Q0e"] <- if (0 %in% adf$x) adf[adf$x == 0, "y"] else NA | ||
dfres[i, "BP0"] <- if (0 %in% adf$y) min(adf[adf$y == 0, "x"]) else NA | ||
dfres[i, "BP1"] <- if (!0 %in% adf$y) max(adf[adf$y != 0, "x"]) else NA | ||
|
||
## Find empirical Pmax, Omax | ||
dfres[i, "Omaxe"] <- max(adf$expend) | ||
dfres[i, "Pmaxe"] <- adf[max(which(adf$expend == max(adf$expend))), "x"] | ||
|
||
if (equation == "hs") | ||
{ | ||
## If retain y where x = 0, replace | ||
if (remq0e) | ||
{ | ||
adf <- adf[adf$x != 0, ] | ||
} else | ||
{ | ||
replfree <- if (is.null(replfree)) 0.01 else replfree | ||
adf[adf$x == 0, "x"] <- replfree | ||
} | ||
|
||
## Drop any zero consumption points altogether | ||
adf <- adf[adf$y != 0, ] | ||
|
||
fit <- NULL | ||
try(fit <- nlmrt::wrapnls(data = adf, | ||
(log(y)/log(10)) ~ (log(q0)/log(10)) + k * (exp(-alpha * q0 * x) - 1), | ||
start = list(q0 = 10, alpha = 0.01), | ||
control = list(maxiter = 1000)), silent = TRUE) | ||
|
||
if (!is.null(fit)) | ||
{ | ||
dfres[i, c("Q0", "Alpha")] <- as.numeric(coef(fit)[c("q0", "alpha")]) | ||
dfres[i, "K"] <- min(adf$k) | ||
dfres[i, c("Q0se", "Alphase")] <- summary(fit)[[10]][c(1, 2), 2] | ||
dfres[i, "N"] <- length(adf$k) | ||
dfres[i, "R2"] <- 1.0 - (deviance(fit)/sum((log10(adf$y) - mean(log10(adf$y)))^2)) | ||
dfres[i, "AbsSS"] <- deviance(fit) | ||
dfres[i, "SdRes"] <- sqrt(deviance(fit)/df.residual(fit)) | ||
dfres[i, c("Q0Low", "Q0High")] <- nlstools::confint2(fit)[c(1, 3)] | ||
dfres[i, c("AlphaLow", "AlphaHigh")] <- nlstools::confint2(fit)[c(2, 4)] | ||
dfres[i, "EV"] <- 1/(dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5) * 100) | ||
dfres[i, "Pmaxd"] <- 1/(dfres[i, "Q0"] * dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5)) * | ||
(0.083 * dfres[i, "K"] + 0.65) | ||
dfres[i, "Omaxd"] <- (10^(log10(dfres[i, "Q0"]) + (dfres[i, "K"] * (exp(-dfres[i, "Alpha"] * | ||
dfres[i, "Q0"] * dfres[i, "Pmaxd"]) - 1)))) * dfres[i, "Pmaxd"] | ||
} | ||
} else | ||
{ | ||
if (equation == "koff") | ||
{ | ||
if (rem0) | ||
{ | ||
adf <- adf[adf$y != 0, ] | ||
} | ||
|
||
## Assert not Inf | ||
if (is.infinite(k)) { | ||
warning("k is Inf. I will calculate a k based on the entire sample.") | ||
k <- log10(max(mat[mat$y > 1, "y"])) - log10(min(mat[mat$y > 1, "y"])) | ||
fit <- NULL | ||
try(fit <- nlmrt::wrapnls(data = adf, | ||
y ~ q0 * 10^(k * (exp(-alpha * q0 * x) - 1)), | ||
start = list(q0 = 10, alpha = 0.01), | ||
control = list(maxiter = 1000)), silent = TRUE) | ||
|
||
if (!is.null(fit)) | ||
{ | ||
dfres[i, c("Q0", "Alpha")] <- as.numeric(coef(fit)[c("q0", "alpha")]) | ||
dfres[i, "K"] <- min(adf$k) | ||
dfres[i, c("Q0se", "Alphase")] <- summary(fit)[[10]][c(1, 2), 2] | ||
dfres[i, "N"] <- length(adf$k) | ||
dfres[i, "R2"] <- 1.0 -(deviance(fit)/sum((adf$y - mean(adf$y))^2)) | ||
dfres[i, "AbsSS"] <- deviance(fit) | ||
dfres[i, "SdRes"] <- sqrt(deviance(fit)/df.residual(fit)) | ||
dfres[i, c("Q0Low", "Q0High")] <- nlstools::confint2(fit)[c(1, 3)] | ||
dfres[i, c("AlphaLow", "AlphaHigh")] <- nlstools::confint2(fit)[c(2, 4)] | ||
dfres[i, "EV"] <- 1/(dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5) * 100) | ||
dfres[i, "Pmaxd"] <- 1/(dfres[i, "Q0"] * dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5)) * | ||
(0.083 * dfres[i, "K"] + 0.65) | ||
dfres[i, "Omaxd"] <- (dfres[i, "Q0"] * (10^(dfres[i, "K"] * (exp(-dfres[i, "Alpha"] * | ||
dfres[i, "Q0"] * dfres[i, "Pmaxd"]) - 1)))) * dfres[i, "Pmaxd"] | ||
} | ||
} | ||
} | ||
} | ||
|
||
if(plotting) | ||
{ | ||
## Can add this to build tools and remove later, just added for now -sg | ||
|
||
## If no k is provided | ||
if (is.null(k)) { | ||
k <- log10(max(mat[mat$y > 1, "y"])) - log10(min(mat[mat$y > 1, "y"])) | ||
if(!require(ggplot2)) { | ||
install.packages("ggplot2") | ||
require(ggplot2) | ||
library(ggplot2) | ||
} | ||
|
||
## Get unique participants, informing loop | ||
participants <- length(unique(mat$p)) | ||
xDraw <- seq(min(mat$x), max(mat$x), 0.01) | ||
|
||
## Preallocate for speed | ||
cnames <- c("Participant", "Q0e", "BP0", "BP1", "Omaxe", "Pmaxe", "Equation", "Q0", "K", | ||
"R2", "Alpha", "Q0se", "Alphase", "N", "AbsSS", "SdRes", "Q0Low", "Q0High", | ||
"AlphaLow", "AlphaHigh", "EV", "Omaxd", "Pmaxd", "Notes") | ||
p.rep <- seq(1,max(mat$p),1) | ||
|
||
dfres <- data.frame(matrix(vector(), participants, length(cnames), | ||
dimnames = list(c(), c(cnames))), stringsAsFactors = FALSE) | ||
graphFrame<-data.frame(Individual=rep(seq(min(p.rep), max(p.rep),1),each=length(xDraw)), | ||
DemandSeries=rep(seq(1:length(xDraw)-1),length(p.rep)), | ||
YSeries=rep(seq(1:length(xDraw)-1),length(p.rep)), | ||
XSeries=rep(seq(1:length(xDraw)-1),length(p.rep))) | ||
|
||
## Loop through unique values as indices, not necessarily sequentially | ||
for (i in unique(mat$p)) { | ||
dfres[i, "Participant"] <- i | ||
dfres[i, "Equation"] <- equation | ||
#browser() | ||
adf <- NULL | ||
adf <- mat[mat$p == i, ] | ||
adf[, "expend"] <- adf$x * adf$y | ||
adf[, "k"] <- k | ||
pointFrame <- NULL | ||
|
||
## Find empirical Q0, BP0, BP1 | ||
dfres[i, "Q0e"] <- if (0 %in% adf$x) adf[adf$x == 0, "y"] else NA | ||
dfres[i, "BP0"] <- if (0 %in% adf$y) min(adf[adf$y == 0, "x"]) else NA | ||
dfres[i, "BP1"] <- if (!0 %in% adf$y) max(adf[adf$y != 0, "x"]) else NA | ||
for(i in unique(mat$p)) | ||
{ | ||
qTemp <- dfres[i,]$Q0 | ||
aTemp <- dfres[i,]$Alpha | ||
kTemp <- dfres[i,]$K | ||
|
||
## Find empirical Pmax, Omax | ||
dfres[i, "Omaxe"] <- max(adf$expend) | ||
dfres[i, "Pmaxe"] <- adf[max(which(adf$expend == max(adf$expend))), "x"] | ||
for (j in 1:length(xDraw)) | ||
{ | ||
### mapped fittings, based on model | ||
if (equation == "hs") | ||
{ | ||
graphFrame[ graphFrame$Individual==i & graphFrame$DemandSeries==as.numeric(j),]$YSeries <- log10(qTemp) + kTemp * (exp(-aTemp*qTemp*xDraw[j])-1) | ||
}else if (equation == "koff") | ||
{ | ||
graphFrame[ graphFrame$Individual==i & graphFrame$DemandSeries==as.numeric(j),]$YSeries <- qTemp * 10^(kTemp * (exp(-aTemp * qTemp * xDraw[j]) - 1)) | ||
} | ||
|
||
if (equation == "hs") { | ||
### Base domain | ||
graphFrame[ graphFrame$Individual==i & graphFrame$DemandSeries==as.numeric(j),]$XSeries <- xDraw[j] | ||
} | ||
} | ||
|
||
axis_mod <- function(l) { | ||
l <- paste("10^", l, sep = "") | ||
parse(text=l) | ||
} | ||
|
||
if (equation == "hs") | ||
{ | ||
pointFrame <- data.frame(X=mat$x, Y=log10(mat$y), Individual=mat$p) | ||
|
||
logChart <- ggplot() + | ||
geom_line(data=graphFrame, aes(x=XSeries, y=YSeries, group=Individual, colour = factor(Individual))) + | ||
geom_point(data=pointFrame, aes(x=pointFrame$X, y=pointFrame$Y, shape=factor(Individual))) + | ||
expand_limits(y=0) + | ||
theme_bw() + | ||
theme(panel.grid.minor = element_blank()) + | ||
ggtitle("Fitted Demand Curves\n") + | ||
ylab("log(Consumption)") + | ||
scale_x_log10( | ||
breaks = scales::trans_breaks("log10", function(x) 10^x), | ||
labels = scales::trans_format("log10", scales::math_format(10^.x)) | ||
) + | ||
scale_y_continuous(labels=axis_mod) + | ||
annotation_logticks(sides = "b") + | ||
xlab("log(Price)") + | ||
theme(legend.title = element_blank()) + | ||
theme(legend.position = "none") + | ||
theme(legend.direction = "vertical") + | ||
theme(panel.grid.minor = element_blank()) + | ||
theme(panel.grid.major = element_blank()) + | ||
guides(col = guide_legend(ncol = 3)) | ||
|
||
print(logChart) | ||
|
||
}else if (equation == "koff") | ||
{ | ||
pointFrame <- data.frame(X=mat$x, Y=mat$y, Individual=mat$p) | ||
|
||
logChart <- ggplot() + | ||
geom_line(data=graphFrame, aes(x=XSeries, y=YSeries, group=Individual, colour = factor(Individual))) + | ||
geom_point(data=pointFrame, aes(x=pointFrame$X, y=pointFrame$Y, shape=factor(Individual))) + | ||
expand_limits(y=0) + | ||
theme_bw() + | ||
theme(panel.grid.minor = element_blank()) + | ||
ggtitle("Fitted Demand Curves\n") + | ||
ylab("log(Consumption)") + | ||
scale_x_log10( | ||
breaks = scales::trans_breaks("log10", function(x) 10^x), | ||
labels = scales::trans_format("log10", scales::math_format(10^.x)) | ||
) + | ||
annotation_logticks(sides = "b") + | ||
xlab("log(Price)") + | ||
theme(legend.title = element_blank()) + | ||
theme(legend.position = "none") + | ||
theme(legend.direction = "vertical") + | ||
theme(panel.grid.minor = element_blank()) + | ||
theme(panel.grid.major = element_blank()) + | ||
guides(col = guide_legend(ncol = 3)) | ||
|
||
print(logChart) | ||
|
||
## If retain y where x = 0, replace | ||
if (remq0e) { | ||
adf <- adf[adf$x != 0, ] | ||
} else { | ||
replfree <- if (is.null(replfree)) 0.01 else replfree | ||
adf[adf$x == 0, "x"] <- replfree | ||
} | ||
|
||
## Drop any zero consumption points altogether | ||
adf <- adf[adf$y != 0, ] | ||
|
||
fit <- NULL | ||
try(fit <- nlmrt::wrapnls(data = adf, | ||
(log(y)/log(10)) ~ (log(q0)/log(10)) + k * (exp(-alpha * q0 * x) - 1), | ||
start = list(q0 = 10, alpha = 0.01), | ||
control = list(maxiter = 1000)), silent = TRUE) | ||
|
||
if (!is.null(fit)) { | ||
dfres[i, c("Q0", "Alpha")] <- as.numeric(coef(fit)[c("q0", "alpha")]) | ||
dfres[i, "K"] <- min(adf$k) | ||
dfres[i, c("Q0se", "Alphase")] <- summary(fit)[[10]][c(1, 2), 2] | ||
dfres[i, "N"] <- length(adf$k) | ||
dfres[i, "R2"] <- 1.0 - (deviance(fit)/sum((log10(adf$y) - mean(log10(adf$y)))^2)) | ||
dfres[i, "AbsSS"] <- deviance(fit) | ||
dfres[i, "SdRes"] <- sqrt(deviance(fit)/df.residual(fit)) | ||
dfres[i, c("Q0Low", "Q0High")] <- nlstools::confint2(fit)[c(1, 3)] | ||
dfres[i, c("AlphaLow", "AlphaHigh")] <- nlstools::confint2(fit)[c(2, 4)] | ||
dfres[i, "EV"] <- 1/(dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5) * 100) | ||
dfres[i, "Pmaxd"] <- 1/(dfres[i, "Q0"] * dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5)) * | ||
(0.083 * dfres[i, "K"] + 0.65) | ||
dfres[i, "Omaxd"] <- (10^(log10(dfres[i, "Q0"]) + (dfres[i, "K"] * (exp(-dfres[i, "Alpha"] * | ||
dfres[i, "Q0"] * dfres[i, "Pmaxd"]) - 1)))) * dfres[i, "Pmaxd"] | ||
} | ||
} else { | ||
if (equation == "koff") { | ||
|
||
if (rem0) { | ||
adf <- adf[adf$y != 0, ] | ||
} | ||
|
||
fit <- NULL | ||
try(fit <- nlmrt::wrapnls(data = adf, | ||
y ~ q0 * 10^(k * (exp(-alpha * q0 * x) - 1)), | ||
start = list(q0 = 10, alpha = 0.01), | ||
control = list(maxiter = 1000)), silent = TRUE) | ||
|
||
if (!is.null(fit)) { | ||
dfres[i, c("Q0", "Alpha")] <- as.numeric(coef(fit)[c("q0", "alpha")]) | ||
dfres[i, "K"] <- min(adf$k) | ||
dfres[i, c("Q0se", "Alphase")] <- summary(fit)[[10]][c(1, 2), 2] | ||
dfres[i, "N"] <- length(adf$k) | ||
dfres[i, "R2"] <- 1.0 -(deviance(fit)/sum((adf$y - mean(adf$y))^2)) | ||
dfres[i, "AbsSS"] <- deviance(fit) | ||
dfres[i, "SdRes"] <- sqrt(deviance(fit)/df.residual(fit)) | ||
dfres[i, c("Q0Low", "Q0High")] <- nlstools::confint2(fit)[c(1, 3)] | ||
dfres[i, c("AlphaLow", "AlphaHigh")] <- nlstools::confint2(fit)[c(2, 4)] | ||
dfres[i, "EV"] <- 1/(dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5) * 100) | ||
dfres[i, "Pmaxd"] <- 1/(dfres[i, "Q0"] * dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5)) * | ||
(0.083 * dfres[i, "K"] + 0.65) | ||
dfres[i, "Omaxd"] <- (dfres[i, "Q0"] * (10^(dfres[i, "K"] * (exp(-dfres[i, "Alpha"] * | ||
dfres[i, "Q0"] * dfres[i, "Pmaxd"]) - 1)))) * dfres[i, "Pmaxd"] | ||
} | ||
} | ||
} | ||
} | ||
return(dfres) | ||
} | ||
|
||
return(dfres) | ||
} | ||
|
||
##' Analyzes a dataframe and returns the regression model. | ||
|
@@ -391,4 +513,4 @@ calcAUC <- function(dfs, qmaxs = NULL) { | |
aucs <- sapply(dfs, cauc) | ||
aucs <- aucs / maxauc | ||
aucs | ||
} | ||
} |