Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Push for graphics in fit curves #1

Merged
merged 4 commits into from
May 14, 2016
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
320 changes: 221 additions & 99 deletions R/analyze.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,131 +35,253 @@
## link @ https://cran.r-project.org/web/packages/nlmrt/index.html
## license @ https://cran.r-project.org/web/licenses/GPL-2
##
## @ggplot2 (R package) = Grammar of Graphics plotting library (Copyright 2016 - Hadley Wickham - GPLv2)
## link @ https://cran.r-project.org/web/packages/ggplot2/index.html
## license @ https://cran.r-project.org/web/licenses/GPL-2
##

##' Analyzes purchase task data
##'
##' Analyzes purchase task data
##' @title FitCurves
##' @param mat data frame (long form) of purchase task data.
##' @param equation Character vector of length one. Accepts either "hs" for Hursh and Silberberg (2008) or "koff" for Koffarnus, Franck, Stein, and Bickel (2015). If "hs" and the first price (x) is 0, it will be replaced by replfree.
##' @param k A numeric vector of length one. Reflects the range of consumption in log10 units. If none provided, k will be calculated based on the max/min of the entire sample
##' @param remq0e If TRUE, removes consumption and price where price == 0. Default value is FALSE
##' @param replfree Optionally replaces price == 0 with specified value. Note, if fitting using equation == "hs", and 0 is first price, 0 gets replaced by replfree. Default value is .01
##' @param rem0 If TRUE, removes all 0s in consumption data prior to analysis. Default value is FALSE.
##' @param plotting If TRUE, removes all 0s in consumption data prior to analysis. Default value is FALSE.
##' @return Data frame, fitting params and CI's/SE's
##' @author Shawn Gilroy <[email protected]> Brent Kaplan <bkaplan4@@ku.edu>
##' @export
FitCurves <- function(mat, equation, k = NULL, remq0e = FALSE, replfree = NULL, rem0 = FALSE) {
FitCurves <- function(mat, equation, k = NULL, remq0e = FALSE, replfree = NULL, rem0 = FALSE, plotting=FALSE) {

## Assert not Inf
if (is.infinite(k))
{
warning("k is Inf. I will calculate a k based on the entire sample.")
k <- log10(max(mat[mat$y > 1, "y"])) - log10(min(mat[mat$y > 1, "y"]))
}

## If no k is provided
if (is.null(k))
{
k <- log10(max(mat[mat$y > 1, "y"])) - log10(min(mat[mat$y > 1, "y"]))
}

## Get N unique participants, informing loop
participants <- length(unique(mat$p))

## Preallocate for speed
cnames <- c("Participant", "Q0e", "BP0", "BP1", "Omaxe", "Pmaxe", "Equation", "Q0", "K",
"R2", "Alpha", "Q0se", "Alphase", "N", "AbsSS", "SdRes", "Q0Low", "Q0High",
"AlphaLow", "AlphaHigh", "EV", "Omaxd", "Pmaxd", "Notes")

dfres <- data.frame(matrix(vector(),
participants,
length(cnames),
dimnames = list(c(), c(cnames))), stringsAsFactors = FALSE)

## Loop through unique values as indices, not necessarily sequentially
for (i in unique(mat$p))
{
dfres[i, "Participant"] <- i
dfres[i, "Equation"] <- equation

adf <- NULL
adf <- mat[mat$p == i, ]
adf[, "expend"] <- adf$x * adf$y
adf[, "k"] <- k

## Find empirical Q0, BP0, BP1
dfres[i, "Q0e"] <- if (0 %in% adf$x) adf[adf$x == 0, "y"] else NA
dfres[i, "BP0"] <- if (0 %in% adf$y) min(adf[adf$y == 0, "x"]) else NA
dfres[i, "BP1"] <- if (!0 %in% adf$y) max(adf[adf$y != 0, "x"]) else NA

## Find empirical Pmax, Omax
dfres[i, "Omaxe"] <- max(adf$expend)
dfres[i, "Pmaxe"] <- adf[max(which(adf$expend == max(adf$expend))), "x"]

if (equation == "hs")
{
## If retain y where x = 0, replace
if (remq0e)
{
adf <- adf[adf$x != 0, ]
} else
{
replfree <- if (is.null(replfree)) 0.01 else replfree
adf[adf$x == 0, "x"] <- replfree
}

## Drop any zero consumption points altogether
adf <- adf[adf$y != 0, ]

fit <- NULL
try(fit <- nlmrt::wrapnls(data = adf,
(log(y)/log(10)) ~ (log(q0)/log(10)) + k * (exp(-alpha * q0 * x) - 1),
start = list(q0 = 10, alpha = 0.01),
control = list(maxiter = 1000)), silent = TRUE)

if (!is.null(fit))
{
dfres[i, c("Q0", "Alpha")] <- as.numeric(coef(fit)[c("q0", "alpha")])
dfres[i, "K"] <- min(adf$k)
dfres[i, c("Q0se", "Alphase")] <- summary(fit)[[10]][c(1, 2), 2]
dfres[i, "N"] <- length(adf$k)
dfres[i, "R2"] <- 1.0 - (deviance(fit)/sum((log10(adf$y) - mean(log10(adf$y)))^2))
dfres[i, "AbsSS"] <- deviance(fit)
dfres[i, "SdRes"] <- sqrt(deviance(fit)/df.residual(fit))
dfres[i, c("Q0Low", "Q0High")] <- nlstools::confint2(fit)[c(1, 3)]
dfres[i, c("AlphaLow", "AlphaHigh")] <- nlstools::confint2(fit)[c(2, 4)]
dfres[i, "EV"] <- 1/(dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5) * 100)
dfres[i, "Pmaxd"] <- 1/(dfres[i, "Q0"] * dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5)) *
(0.083 * dfres[i, "K"] + 0.65)
dfres[i, "Omaxd"] <- (10^(log10(dfres[i, "Q0"]) + (dfres[i, "K"] * (exp(-dfres[i, "Alpha"] *
dfres[i, "Q0"] * dfres[i, "Pmaxd"]) - 1)))) * dfres[i, "Pmaxd"]
}
} else
{
if (equation == "koff")
{
if (rem0)
{
adf <- adf[adf$y != 0, ]
}

## Assert not Inf
if (is.infinite(k)) {
warning("k is Inf. I will calculate a k based on the entire sample.")
k <- log10(max(mat[mat$y > 1, "y"])) - log10(min(mat[mat$y > 1, "y"]))
fit <- NULL
try(fit <- nlmrt::wrapnls(data = adf,
y ~ q0 * 10^(k * (exp(-alpha * q0 * x) - 1)),
start = list(q0 = 10, alpha = 0.01),
control = list(maxiter = 1000)), silent = TRUE)

if (!is.null(fit))
{
dfres[i, c("Q0", "Alpha")] <- as.numeric(coef(fit)[c("q0", "alpha")])
dfres[i, "K"] <- min(adf$k)
dfres[i, c("Q0se", "Alphase")] <- summary(fit)[[10]][c(1, 2), 2]
dfres[i, "N"] <- length(adf$k)
dfres[i, "R2"] <- 1.0 -(deviance(fit)/sum((adf$y - mean(adf$y))^2))
dfres[i, "AbsSS"] <- deviance(fit)
dfres[i, "SdRes"] <- sqrt(deviance(fit)/df.residual(fit))
dfres[i, c("Q0Low", "Q0High")] <- nlstools::confint2(fit)[c(1, 3)]
dfres[i, c("AlphaLow", "AlphaHigh")] <- nlstools::confint2(fit)[c(2, 4)]
dfres[i, "EV"] <- 1/(dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5) * 100)
dfres[i, "Pmaxd"] <- 1/(dfres[i, "Q0"] * dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5)) *
(0.083 * dfres[i, "K"] + 0.65)
dfres[i, "Omaxd"] <- (dfres[i, "Q0"] * (10^(dfres[i, "K"] * (exp(-dfres[i, "Alpha"] *
dfres[i, "Q0"] * dfres[i, "Pmaxd"]) - 1)))) * dfres[i, "Pmaxd"]
}
}
}
}

if(plotting)
{
## Can add this to build tools and remove later, just added for now -sg

## If no k is provided
if (is.null(k)) {
k <- log10(max(mat[mat$y > 1, "y"])) - log10(min(mat[mat$y > 1, "y"]))
if(!require(ggplot2)) {
install.packages("ggplot2")
require(ggplot2)
library(ggplot2)
}

## Get unique participants, informing loop
participants <- length(unique(mat$p))
xDraw <- seq(min(mat$x), max(mat$x), 0.01)

## Preallocate for speed
cnames <- c("Participant", "Q0e", "BP0", "BP1", "Omaxe", "Pmaxe", "Equation", "Q0", "K",
"R2", "Alpha", "Q0se", "Alphase", "N", "AbsSS", "SdRes", "Q0Low", "Q0High",
"AlphaLow", "AlphaHigh", "EV", "Omaxd", "Pmaxd", "Notes")
p.rep <- seq(1,max(mat$p),1)

dfres <- data.frame(matrix(vector(), participants, length(cnames),
dimnames = list(c(), c(cnames))), stringsAsFactors = FALSE)
graphFrame<-data.frame(Individual=rep(seq(min(p.rep), max(p.rep),1),each=length(xDraw)),
DemandSeries=rep(seq(1:length(xDraw)-1),length(p.rep)),
YSeries=rep(seq(1:length(xDraw)-1),length(p.rep)),
XSeries=rep(seq(1:length(xDraw)-1),length(p.rep)))

## Loop through unique values as indices, not necessarily sequentially
for (i in unique(mat$p)) {
dfres[i, "Participant"] <- i
dfres[i, "Equation"] <- equation
#browser()
adf <- NULL
adf <- mat[mat$p == i, ]
adf[, "expend"] <- adf$x * adf$y
adf[, "k"] <- k
pointFrame <- NULL

## Find empirical Q0, BP0, BP1
dfres[i, "Q0e"] <- if (0 %in% adf$x) adf[adf$x == 0, "y"] else NA
dfres[i, "BP0"] <- if (0 %in% adf$y) min(adf[adf$y == 0, "x"]) else NA
dfres[i, "BP1"] <- if (!0 %in% adf$y) max(adf[adf$y != 0, "x"]) else NA
for(i in unique(mat$p))
{
qTemp <- dfres[i,]$Q0
aTemp <- dfres[i,]$Alpha
kTemp <- dfres[i,]$K

## Find empirical Pmax, Omax
dfres[i, "Omaxe"] <- max(adf$expend)
dfres[i, "Pmaxe"] <- adf[max(which(adf$expend == max(adf$expend))), "x"]
for (j in 1:length(xDraw))
{
### mapped fittings, based on model
if (equation == "hs")
{
graphFrame[ graphFrame$Individual==i & graphFrame$DemandSeries==as.numeric(j),]$YSeries <- log10(qTemp) + kTemp * (exp(-aTemp*qTemp*xDraw[j])-1)
}else if (equation == "koff")
{
graphFrame[ graphFrame$Individual==i & graphFrame$DemandSeries==as.numeric(j),]$YSeries <- qTemp * 10^(kTemp * (exp(-aTemp * qTemp * xDraw[j]) - 1))
}

if (equation == "hs") {
### Base domain
graphFrame[ graphFrame$Individual==i & graphFrame$DemandSeries==as.numeric(j),]$XSeries <- xDraw[j]
}
}

## If retain y where x = 0, replace
if (remq0e) {
adf <- adf[adf$x != 0, ]
} else {
replfree <- if (is.null(replfree)) 0.01 else replfree
adf[adf$x == 0, "x"] <- replfree
}

## Drop any zero consumption points altogether
adf <- adf[adf$y != 0, ]

fit <- NULL
try(fit <- nlmrt::wrapnls(data = adf,
(log(y)/log(10)) ~ (log(q0)/log(10)) + k * (exp(-alpha * q0 * x) - 1),
start = list(q0 = 10, alpha = 0.01),
control = list(maxiter = 1000)), silent = TRUE)

if (!is.null(fit)) {
dfres[i, c("Q0", "Alpha")] <- as.numeric(coef(fit)[c("q0", "alpha")])
dfres[i, "K"] <- min(adf$k)
dfres[i, c("Q0se", "Alphase")] <- summary(fit)[[10]][c(1, 2), 2]
dfres[i, "N"] <- length(adf$k)
dfres[i, "R2"] <- 1.0 - (deviance(fit)/sum((log10(adf$y) - mean(log10(adf$y)))^2))
dfres[i, "AbsSS"] <- deviance(fit)
dfres[i, "SdRes"] <- sqrt(deviance(fit)/df.residual(fit))
dfres[i, c("Q0Low", "Q0High")] <- nlstools::confint2(fit)[c(1, 3)]
dfres[i, c("AlphaLow", "AlphaHigh")] <- nlstools::confint2(fit)[c(2, 4)]
dfres[i, "EV"] <- 1/(dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5) * 100)
dfres[i, "Pmaxd"] <- 1/(dfres[i, "Q0"] * dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5)) *
(0.083 * dfres[i, "K"] + 0.65)
dfres[i, "Omaxd"] <- (10^(log10(dfres[i, "Q0"]) + (dfres[i, "K"] * (exp(-dfres[i, "Alpha"] *
dfres[i, "Q0"] * dfres[i, "Pmaxd"]) - 1)))) * dfres[i, "Pmaxd"]
}
} else {
if (equation == "koff") {

if (rem0) {
adf <- adf[adf$y != 0, ]
}

fit <- NULL
try(fit <- nlmrt::wrapnls(data = adf,
y ~ q0 * 10^(k * (exp(-alpha * q0 * x) - 1)),
start = list(q0 = 10, alpha = 0.01),
control = list(maxiter = 1000)), silent = TRUE)

if (!is.null(fit)) {
dfres[i, c("Q0", "Alpha")] <- as.numeric(coef(fit)[c("q0", "alpha")])
dfres[i, "K"] <- min(adf$k)
dfres[i, c("Q0se", "Alphase")] <- summary(fit)[[10]][c(1, 2), 2]
dfres[i, "N"] <- length(adf$k)
dfres[i, "R2"] <- 1.0 -(deviance(fit)/sum((adf$y - mean(adf$y))^2))
dfres[i, "AbsSS"] <- deviance(fit)
dfres[i, "SdRes"] <- sqrt(deviance(fit)/df.residual(fit))
dfres[i, c("Q0Low", "Q0High")] <- nlstools::confint2(fit)[c(1, 3)]
dfres[i, c("AlphaLow", "AlphaHigh")] <- nlstools::confint2(fit)[c(2, 4)]
dfres[i, "EV"] <- 1/(dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5) * 100)
dfres[i, "Pmaxd"] <- 1/(dfres[i, "Q0"] * dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5)) *
(0.083 * dfres[i, "K"] + 0.65)
dfres[i, "Omaxd"] <- (dfres[i, "Q0"] * (10^(dfres[i, "K"] * (exp(-dfres[i, "Alpha"] *
dfres[i, "Q0"] * dfres[i, "Pmaxd"]) - 1)))) * dfres[i, "Pmaxd"]
}
}
}
axis_mod <- function(l) {
l <- paste("10^", l, sep = "")
parse(text=l)
}
return(dfres)

if (equation == "hs")
{
pointFrame <- data.frame(X=mat$x, Y=log10(mat$y), Individual=mat$p)

logChart <- ggplot() +
geom_line(data=graphFrame, aes(x=XSeries, y=YSeries, group=Individual, colour = factor(Individual))) +
geom_point(data=pointFrame, aes(x=pointFrame$X, y=pointFrame$Y, shape=factor(Individual))) +
expand_limits(y=0) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
ggtitle("Fitted Demand Curves\n") +
ylab("log(Consumption)") +
scale_x_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
scale_y_continuous(labels=axis_mod) +
annotation_logticks(sides = "b") +
xlab("log(Price)") +
theme(legend.title = element_blank()) +
theme(legend.position = "none") +
theme(legend.direction = "vertical") +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major = element_blank()) +
guides(col = guide_legend(ncol = 3))

print(logChart)

}else if (equation == "koff")
{
pointFrame <- data.frame(X=mat$x, Y=mat$y, Individual=mat$p)

logChart <- ggplot() +
geom_line(data=graphFrame, aes(x=XSeries, y=YSeries, group=Individual, colour = factor(Individual))) +
geom_point(data=pointFrame, aes(x=pointFrame$X, y=pointFrame$Y, shape=factor(Individual))) +
expand_limits(y=0) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
ggtitle("Fitted Demand Curves\n") +
ylab("log(Consumption)") +
scale_x_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
annotation_logticks(sides = "b") +
xlab("log(Price)") +
theme(legend.title = element_blank()) +
theme(legend.position = "none") +
theme(legend.direction = "vertical") +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major = element_blank()) +
guides(col = guide_legend(ncol = 3))

print(logChart)

}
}

return(dfres)
}

##' Analyzes a dataframe and returns the regression model.
Expand Down