Purchase-Task-Report.Rmd

---
title: "Purchase Task Report"
output:
  pdf_document: default
---

```{r req.changes, include=FALSE, error=TRUE}
knitr::opts_chunk$set(error = TRUE, comment = "", results = 'asis')


##### ----------  REQUIRED CHANGES BY USER:
###############################################################################################
##### PLEASE MAKE THE NECESSARY CHANGES IN THIS R CODE CHUNK ONLY
### a) SAVE this script in the same file location as the data set.
###    The report will be saved in this location.
### b) NAME of .csv file:
pt.name <- "PBCAR_PT.csv"
### c) NAME of ID variable:
id.name <- "ID"
### d) COPY & PASTE names all purchase task item names here:
purchase.task.names <- c("apt000","apt025","apt050","apt1","apt150",
                         "apt2","apt250","apt3","apt4","apt5","apt6",
                         "apt7","apt8","apt9","apt10","apt11","apt12",
                         "apt13","apt14","apt15","apt16","apt18","apt20",	
                         "apt22","apt24","apt26","apt28","apt30","apt35","apt40")
### e) ASSIGN the price associated with each purchase task item:
prices <- c("0","0.25","0.50","1","1.50","2","2.50","3","4","5","6",
            "7","8","9","10","11","12","13","14","15","16","18","20",
            "22","24","26","28","30","35","40")
### f) IDENTIFY the maximum allowed value in the purchase task:
max.val <- 99
### g) IDENTIFY total N individuals in data set
tot.n <- 730

##### ----------  OPTIONAL CHANGES:
###############################################################################################
### The k-values to test:
k.span <- c(2,3,4)

### The Bounce criteria (default 10%):
bounce.crit <- 0.1

### The Winsorizing type: 'preserve_order', '1_higher_sd', or '1_higher_max_non_outlier'
wins.type <- 'preserve_order'

##### ----------  END OF CHANGES NEEDED. KNIT THIS DOCUMENT TO PRODUCE REPORT
###############################################################################################
```


``` {r processing, include=FALSE, error=TRUE}

#################################################################################################
##### STEP 0: DATA INPUT AND FORMATTING PRIOR TO CLEANING AND PROCESSING
#################################################################################################
library(dplyr)
library(psych)
library(beezdemand)
library(ggplot2)

purchase.task.df <- read.csv(pt.name)
purchase.task.df <- purchase.task.df[c(id.name,purchase.task.names)]

# RENAMES the columns in the data frame to "id" (required), plus the price of each purchase task item

item.names <- c("id",prices)
colnames(purchase.task.df) <- item.names

#################################################################################################
##### STEP 1: IMPUTE ALL PERTINENT ZEROS
#################################################################################################

for (id_num in purchase.task.df$id){
  if (purchase.task.df[purchase.task.df[,"id"]==id_num,][max(which(!is.na(purchase.task.df[purchase.task.df[,"id"]==id_num,])))] == 0){
    purchase.task.df[purchase.task.df[,"id"]==id_num,][is.na(purchase.task.df[purchase.task.df[,"id"]==id_num,])] <- 0
  }
}

###############################################################################################
### ----- CHANGE values which exceed the maximum value to the maximum allowed value (set by user):

purchase.task.df[,2:ncol(purchase.task.df)][purchase.task.df[,2:ncol(purchase.task.df)] > max.val] <- max.val

#################################################################################################
##### STEP 2: REVIEW MISSING DATA
#################################################################################################

missing.id <- {}
for (id_num in purchase.task.df$id){
  if (sum(is.na(purchase.task.df[purchase.task.df[,"id"]==id_num,])) > 0){
    missing.id <- append(missing.id, id_num)
  }
}

# REMOVES the IDs with missing data
purchase.task.df2 <- purchase.task.df[!purchase.task.df[,"id"] %in% missing.id,]

#################################################################################################
##### STEP 3: VIOLATION OF TREND, BOUNCE RATIO CRITERION, AND REVERSAL ALLOWANCE
#################################################################################################

remove.id.trend = {}
for (id_num in purchase.task.df2$id){
  if ( (purchase.task.df2[purchase.task.df2$id == id_num,prices[1]]>0) & 
       (purchase.task.df2[purchase.task.df2$id == id_num,prices[1]] <= purchase.task.df2[purchase.task.df2$id == id_num,prices[length(prices)]]) ){
    purchase.task.df2 <- purchase.task.df2[!purchase.task.df2[,"id"] %in% c(id_num),]
    remove.id.trend <- append(remove.id.trend,id_num)
  }
}

remove.id.bounce <- {}
for (id_num in purchase.task.df2$id){
  num.bounces <- 0
  for (j in seq(1,length(prices)-1,1)){
    if (purchase.task.df2[purchase.task.df2$id == id_num,prices[j]] < purchase.task.df2[purchase.task.df2$id == id_num,prices[j+1]]){
      num.bounces <- num.bounces + 1
    }
  }
  if (num.bounces/(length(prices)-1) > 0.1){
    purchase.task.df2 <- purchase.task.df2[!purchase.task.df2[,"id"] %in% c(id_num),]
    remove.id.bounce <- append(remove.id.bounce,id_num)
    cat("ID",id_num,"has bounce ratio:", num.bounces/(length(prices)-1),"and is being removed.\n")
  }
}

##### RESHAPE data from wide to long to CHECK for reversals
##### The {beezdemand} package requires column names to = "id", "x", "y"
PT.long <- reshape(as.data.frame(purchase.task.df2), idvar = "id", 
                   varying = prices,
                   v.names = c("y"), timevar = c("x"), sep = "", direction = "long")

# Reordering the long data by id
PT.long <- PT.long[order(PT.long$id),]
# Reassigning x values in the long format using 'prices' object
PT.long$x <- prices

check.unsys <- CheckUnsystematic(dat = PT.long, deltaq = -0.01, bounce = bounce.crit, 
                                 reversals = 1.5, ncons0 = 2)

# IDENTIFIES IDs with 2 or more reversals
check.unsys[check.unsys$ReversalsPass=="Fail",]

fail.list <- check.unsys$ReversalsPass=="Fail"

check.unsys[fail.list,]

# LISTS & REMOVES the IDs of those who failed
good.id.list <- check.unsys$id[!fail.list]
PT.long2 <- PT.long[!is.na(match(PT.long$id,good.id.list)),]

#################################################################################################
##### STEP 4: OUTLIER MANAGEMENT AT THE PRICE LEVEL
#################################################################################################

# RESHAPE the data back from long to wide format to winsorize the data
PT.wide <- reshape(as.data.frame(PT.long2), idvar = "id", v.names = "y", timevar = "x", direction = "wide")

colnames(PT.wide) <- item.names

# CREATE z-scores in a separate data frame
wide.zs <- PT.wide
wide.zs[c(prices)] <- scale(PT.wide[c(prices)], center = TRUE, scale = TRUE)

# CREATE a new data frame for the winsorized data, so original values can later be referred to
PT.wide2 <- PT.wide

##### MODIFIED Price List (if final price array not reached)
price.count <- colSums(PT.wide[prices])
price_df <- data.frame(price.count,prices)
mod.prices <- price_df$prices[price_df$price.count!=0]

##### ----------  WINSORIZING TYPE - OPTION 1:
#################################################################################################
# 1: Values with a SD over 3.99 are replaced with their corresponding 3.99 regular value rounded up
if (wins.type=="1_higher_sd"){
  for (price in prices){
    PT.wide2[wide.zs[,price]> 3.99,price] <- ceiling(3.99*sd(PT.wide2[,price])+
                                                       mean(PT.wide2[,price]))
    PT.wide2[wide.zs[,price]< -3.99,price] <- floor(-3.99*sd(PT.wide2[,price])+
                                                      mean(PT.wide2[,price]))
    print(price)
  }
}

##### ----------  WINSORIZING TYPE - OPTION 2:
#################################################################################################
# 2: All outliers are replaced with 1 higher than highest (or 1 lower than the lowest) non-outlying value
if (wins.type=="1_higher_max_non_outlier"){
  for (price in prices){
    PT.wide2[wide.zs[,price]> 3.99,price] <- max(PT.wide2[wide.zs[,price]< 3.99,price]) + 1
    PT.wide2[wide.zs[,price]< -3.99,price] <- min(PT.wide2[wide.zs[,price]> -3.99,price]) - 1
  }
}

##### ----------  WINSORIZING TYPE - OPTION 3:
#################################################################################################
# 3: Order is maintained by replacing outlying values with 1 unit above the next highest non-outlying value
if (wins.type=="preserve_order"){
  for (price in prices){
    above.399 <- unique(wide.zs[wide.zs[,price]> 3.99,price])
    below.neg399 <- unique(wide.zs[wide.zs[,price]< -3.99,price])
    if (length(above.399)>0){
      for (q in seq(1, length(above.399), by=1)){
        if (length(above.399)>1){
          quantity.zs <- above.399[order(above.399)][q]
        } else if (length(above.399)==1) {
          quantity.zs <- above.399[q]
        }
        PT.wide2[wide.zs[,price]==quantity.zs,price] <- max(PT.wide2[wide.zs[,price]< 3.99,price]) + q
      }
    }
    if (length(below.neg399)>0){
      for (q in seq(1, length(below.neg399), by=1)){
        if (length(below.neg399)>1){
          quantity.zs <- below.neg399[rev(order(below.neg399))][q]
        } else if (length(below.neg399)==1) {
          quantity.zs <- below.neg399[q]
        }
        PT.wide2[wide.zs[,price]==quantity.zs,price] <- min(PT.wide2[wide.zs[,price]> -3.99,price]) - q
      }
    }
  }
}

# IDENTIFY which items have been changed for which IDs via winsorization
df.winsor.track <- data.frame(ID=character(),
                              Price=numeric(),
                              Bef_Winsor=integer(), 
                              After_Winsor=integer())
i = 1
for (id_num in PT.wide$id){
  for (price in prices){
    orig = PT.wide[PT.wide$id == id_num,price]
    new = PT.wide2[PT.wide2$id == id_num,price]
    if (orig != new){
      df.winsor.track[i,1] <- id_num
      df.winsor.track[i,2] <- price
      df.winsor.track[i,3] <- orig
      df.winsor.track[i,4] <- new
      i = i + 1
    }
  }
}

##### ----- WINSORIZED
PT.W.long <- reshape(as.data.frame(PT.wide2), idvar = "id", 
                     varying = prices,
                     v.names = c("y"), timevar = c("x"), sep = "", direction = "long")

# Reordering PT long data by id
PT.W.long2 <- PT.W.long[order(PT.W.long$id),]
# Reassigning x values in the long format using 'prices' object
PT.W.long2$x <- prices

##### ----- NON-WINSORIZED
#  USE PREVIOUS PT.long2 DF
PT.nonW.long2 <- PT.long2

#################################################################################################
##### STEP 5: ELASTICITY MODELLING TESTS
#################################################################################################

##### ----- WINSORIZED
PT.emp <- GetEmpirical(dat = PT.W.long2)
colnames(PT.emp) <- c("id","Intensity","BP0","BP1","Omax","Pmax")
# DETERMINE which k-value is best for curve fitting by testing a series of values
R2.val.k <- {}

# The k-values tested are in the 'k.span' object, input by the user (default is values 2, 3, and 4)
for (k_value in k.span){
  mean.curve <- FitCurves(dat = PT.long2, equation = "koff", 
                          k = k_value, agg='Mean')
  R2.val.k <- append(R2.val.k, mean.curve$R2)
}

# CHOOSE k-value based on which R^2 is highest for the mean data
# ! # Ties are broken by choosing the lower k-value
k.value.final <- min(k.span[R2.val.k == max(R2.val.k)] )
print(k.value.final)

mean.curve <- FitCurves(dat = PT.W.long2, equation = "koff", 
                        k = k.value.final, agg='Mean')

mean.curve$id <- c('mean.curve')
mean.curve.final <- mean.curve[,c("id","Q0d","K","Alpha","R2","EV","Omaxd","Pmaxd")]
colnames(mean.curve.final) <- c("id","Q0d","K","Alpha","R2","EV","Omax_curve","Pmax_curve")
print(mean.curve.final)

part.curve <- FitCurves(dat = PT.W.long2, equation = "koff", 
                        k = k.value.final, agg=NULL)

spec.curve <- part.curve[,c("id","Q0d","K","Alpha","R2","EV","Omaxd","Pmaxd")]
colnames(spec.curve) <- c("id","Q0d","K","Alpha","R2","EV","Omax_curve","Pmax_curve")

all.out <- merge(PT.emp,spec.curve)
all.out <- all.out[order(all.out$id),]

PT.final.results <- bind_rows(mean.curve.final,all.out)

# CREATE proper breakpoint variable
PT.final.results$Breakpoint <- PT.final.results$BP0
for (id_num in PT.wide2$id){
  pt.sum <- sum(PT.wide2[PT.wide2$id==id_num,prices], na.rm = FALSE)
  last.amount <- PT.wide2[PT.wide2$id==id_num,length(prices)+1]
  if(is.na(PT.final.results$BP0[PT.final.results$id==id_num]) & (pt.sum==0)){
    PT.final.results$Breakpoint[PT.final.results$id==id_num] <- as.numeric(min(mod.prices))
  } else if (is.na(PT.final.results$BP0[PT.final.results$id==id_num]) & (last.amount>0)){
    PT.final.results$Breakpoint[PT.final.results$id==id_num] <- as.numeric(prices)[length(prices)]+1
  }
}

breakpoint.change <- NULL

# REDEFINE breakpoints to the 1st 0 consumption price point reached in instances of reversals
check.unsys.2 <- CheckUnsystematic(dat = PT.long, deltaq = -0.01, bounce = 0.1, reversals = .01, ncons0 = 1)
one.rev.list <- check.unsys.2[check.unsys.2$ReversalsPass=="Fail",]$id
one.rev.list <- one.rev.list[one.rev.list %in% PT.wide2$id]
for (id_num in one.rev.list){
  str(PT.final.results[PT.final.results$id==id_num,]$Breakpoint)
  cons.vals <- PT.wide2[PT.wide2$id==id_num,]
  for (price in prices){
    if (cons.vals[,price]==0){
     breakpoint.change <- capture.output(cat('The breakpoint for ID ',id_num,'has been changed from',
          PT.final.results[PT.final.results$id==id_num,]$Breakpoint,'to',as.numeric(price)))
      PT.final.results[PT.final.results$id==id_num,]$Breakpoint <- as.numeric(price)
      break
    }
  }
}


PT.results <- merge(PT.wide2, PT.final.results)
item.names <- c("id",prices,"Q0d", "K", "Alpha", "R2", "EV", "Omax_curve",
                "Pmax_curve","Intensity", "BP0", "BP1", "Omax", "Pmax", "Breakpoint")
colnames(PT.results) <- item.names

##### ----- NON-WINSORIZED
PT.nonW.emp <- GetEmpirical(dat = PT.nonW.long2)
colnames(PT.nonW.emp) <- c("id","Intensity","BP0","BP1","Omax","Pmax")
# DETERMINE which k-value is best for curve fitting by testing a series of values
nonW.R2.val.k <- {}

# The k-values tested are in the 'k.span' object, input by the user (default is values 2, 3, and 4)
for (k_value in k.span){
  nonW.mean.curve <- FitCurves(dat = PT.nonW.long2, equation = "koff", 
                          k = k_value, agg='Mean')
  nonW.R2.val.k <- append(nonW.R2.val.k, nonW.mean.curve$R2)
}

# CHOOSE k-value based on which R^2 is highest for the mean data
# ! # Ties are broken by choosing the lower k-value
nonW.k.value.final <- min(k.span[nonW.R2.val.k == max(nonW.R2.val.k)] )
print(nonW.k.value.final)

nonW.mean.curve <- FitCurves(dat = PT.nonW.long2, equation = "koff", 
                        k = nonW.k.value.final, agg='Mean')

nonW.mean.curve$id <- c('nonW.mean.curve')
nonW.mean.curve.final <- nonW.mean.curve[,c("id","Q0d","K","Alpha","R2","EV","Omaxd","Pmaxd")]
colnames(nonW.mean.curve.final) <- c("id","Q0d","K","Alpha","R2","EV","Omax_curve","Pmax_curve")
print(nonW.mean.curve.final)

nonW.part.curve <- FitCurves(dat = PT.nonW.long2, equation = "koff", 
                        k = nonW.k.value.final, agg=NULL)

nonW.spec.curve <- nonW.part.curve[,c("id","Q0d","K","Alpha","R2","EV","Omaxd","Pmaxd")]
colnames(nonW.spec.curve) <- c("id","Q0d","K","Alpha","R2","EV","Omax_curve","Pmax_curve")

nonW.all.out <- merge(PT.nonW.emp,nonW.spec.curve)
nonW.all.out <- nonW.all.out[order(nonW.all.out$id),]

PT.nonW.final.results <- bind_rows(nonW.mean.curve.final,nonW.all.out)

# CREATE proper breakpoint variable
# PT.wide = non-winsorized
PT.nonW.final.results$Breakpoint <- PT.nonW.final.results$BP0
for (id_num in PT.wide$id){
  pt.nonW.sum <- sum(PT.wide[PT.wide$id==id_num,prices], na.rm = FALSE)
  nonW.last.amount <- PT.wide[PT.wide$id==id_num,length(prices)+1]
  if(is.na(PT.nonW.final.results$BP0[PT.nonW.final.results$id==id_num]) & (pt.nonW.sum==0)){
    PT.nonW.final.results$Breakpoint[PT.nonW.final.results$id==id_num] <- as.numeric(min(mod.prices))
  } else if (is.na(PT.nonW.final.results$BP0[PT.nonW.final.results$id==id_num]) & (nonW.last.amount>0)){
    PT.nonW.final.results$Breakpoint[PT.nonW.final.results$id==id_num] <- as.numeric(prices)[length(prices)]+1
  }
}

# REDEFINE breakpoints to the 1st 0 consumption price point reached in instances of reversals
nonW.check.unsys.2 <- CheckUnsystematic(dat = PT.long, deltaq = -0.01, bounce = 0.1, reversals = .01, ncons0 = 1)
nonW.one.rev.list <- nonW.check.unsys.2[nonW.check.unsys.2$ReversalsPass=="Fail",]$id
nonW.one.rev.list <- nonW.one.rev.list[nonW.one.rev.list %in% PT.wide$id]
for (id_num in nonW.one.rev.list){
  str(PT.nonW.final.results[PT.nonW.final.results$id==id_num,]$Breakpoint)
  nonW.cons.vals <- PT.wide[PT.wide$id==id_num,]
  for (price in prices){
    if (nonW.cons.vals[,price]==0){
      cat('The breakpoint for ID',id_num,'has been changed from',
          PT.nonW.final.results[PT.nonW.final.results$id==id_num,]$Breakpoint,'to',as.numeric(price))
      PT.nonW.final.results[PT.nonW.final.results$id==id_num,]$Breakpoint <- as.numeric(price)
      break
    }
  }
}

PT.nonW.results <- merge(PT.wide, PT.nonW.final.results)
item.names <- c("id",prices,"Q0d", "K", "Alpha", "R2", "EV", "Omax_curve",
                "Pmax_curve","Intensity", "BP0", "BP1", "Omax", "Pmax", "Breakpoint")
colnames(PT.nonW.results) <- item.names

#################################################################################################
##### STEP 6: WINSORIZING INDEX VARIABLES (ALPHA, PMAX, ETC.)
#################################################################################################

# CREATES a FUNCTION for winsorizing index variables
winsorize.index <- function(all_out_temp,var_name,delta) {
  all_out <- all_out_temp[!is.na(all_out_temp[,c(var_name)]),]
  alpha_zs <- scale(all_out[,c(var_name)], center = TRUE, scale = TRUE)
  above_399 <- unique(all_out[,c(var_name)][alpha_zs > 3.99])
  below_neg399 <- unique(all_out[,c(var_name)][alpha_zs < -3.99])
  cat('There is/are',length(c(all_out[,c(var_name)][alpha_zs > 3.99],all_out[,c(var_name)][alpha_zs < -3.99])),
      'outlying ',var_name,' value(s): \n')
  alpha_outliers <- append(above_399, below_neg399)
  if (wins.type=="preserve_order"){
    above_399 <- unique(all_out[,c(var_name)][alpha_zs > 3.99])
    below_neg399 <- unique(all_out[,c(var_name)][alpha_zs < -3.99])
    if (length(above_399)>0){
      q <- 1
      for (ab_399 in sort(above_399)){
        cat('For ID(s) ',all_out[all_out[,c(var_name)] == ab_399,c('id')],'\n','the ',var_name,' value was changed from ',
            ab_399,' to ',max(all_out[,c(var_name)][alpha_zs < 3.99]) + q*delta, '\n')
        all_out[,c(var_name)][all_out[,c(var_name)] == ab_399] <- max(all_out[,c(var_name)][alpha_zs < 3.99]) + q*delta
        q <- q + 1
      }
    }
    if (length(below_neg399)>0){
      for (bel_399 in sort(below_neg399,decreasing = TRUE)){
        q <- 1
        cat('For ID(s) ',all_out[all_out[,c(var_name)] == bel_399,c('id')],'\n','the ',var_name,' value was changed from ',
            bel_399,' to ',min(all_out[,c(var_name)][alpha_zs > -3.99]) - q*delta, '\n')
        all_out[,c(var_name)][all_out[,c(var_name)] == bel_399] <- min(all_out[,c(var_name)][alpha_zs > -3.99]) - q*delta
        q <- q + 1
      }
    }
  }
  for_replace <- all_out[,c(var_name)]
  all_out_temp[,c(var_name)] <- replace(all_out_temp[,c(var_name)], !is.na(all_out_temp[,c(var_name)]), for_replace)
  all_out_temp
}


##### ----- WINSORIZED
PT.W.index <- PT.results

# CALCULATING Elasticity (curve data) requires the first 2 numbers to be non-zero
non_zero <- (PT.W.index[,2]==0)|(PT.W.index[,3]==0)

##### ----- NON-WINSORIZED
PT.nonW.index <- PT.nonW.results

# IDENTIFY IDs who had a 0 value in one or both of their first 2 responses
one.response.zero <- NULL
if(length(PT.W.index[(non_zero),]$id)>0){
one.response.zero <- capture.output(cat(PT.W.index[(non_zero),]$id, sep='\n'))
}

### REMOVES IDs with ZEROS in first 2 responses:
zero.id <- PT.W.index[non_zero,]$id

if(length(zero.id>0)){
PT.W.index[(PT.W.index$id %in% zero.id),][,c('Q0d','Alpha','R2','EV','Omax','Pmax')] <- NA
}

if(length(zero.id>0)){
PT.nonW.index[(PT.nonW.index$id %in% zero.id),][,c('Q0d','Alpha','R2','EV','Omax','Pmax')] <- NA
}

```


#### Date: `r format(Sys.time(), '%d %B, %Y')`

#### Data File:
```{r report.specifics1, echo=FALSE}
cat(pt.name)
```

#### Total Number of Individuals:
```{r report.specifics2, echo=FALSE}
cat(tot.n)
```


# Missing Data

ID's with Missing Data:
```{r missing1, echo=FALSE}
cat(missing.id)
```

ID's with Trend Violations:
```{r missing2, echo=FALSE}
cat(remove.id.trend)
```

Bounce Ratio:
```{r missing3, echo=FALSE}
cat('The bounce ratio was set at',(bounce.crit*100),'%')
```

ID's Removed due to Bounce Ratio Violation:
```{r missing.bounce, echo=FALSE}
cat(remove.id.bounce)
```


# After Removal

Individuals Left After Removal:
```{r removal, echo=FALSE}
cat(nrow(PT.W.index),'/',tot.n,'=',100*nrow(PT.W.index)/tot.n,'% left after removal\n',sep='')
```

Price Points:
```{r removal2, echo=FALSE}
cat(length(prices),' different price values, ',nrow(PT.W.index),' individuals = ',
    nrow(PT.W.index)*length(prices),' data points\n',sep='')
```

Outliers:

*See Appendix for specific changes by ID*

```{r outliers, echo=FALSE}
cat(nrow(df.winsor.track),' outlying values (',
    100*nrow(df.winsor.track)/(nrow(PT.W.index)*length(prices)),'% of total values)\n',sep='')
```

# Elasticity Modelling Tests:

ID's with a zero value for the first and/or second purchase task item:
```{r elasticity, echo=FALSE}
cat(one.response.zero)
```

# Model Fitting (Winsorized)

K values tested:
```{r model.fitting, echo=FALSE}
cat(k.span)
```

K value selected:
``` {r model.fit2, echo=FALSE}
cat(min(PT.W.index$K))
```

Mean $R^{2}$:
``` {r model.fit3, echo=FALSE}
cat(PT.final.results[PT.final.results$id=='mean.curve',]$R2)
```

Median $R^{2}$:
```{r model.fit4, echo=FALSE}
cat(median(PT.W.index$R2,na.rm=TRUE))
```

Inter-Quartile Range:
```{r model.fit5, echo=FALSE}
cat(IQR(PT.W.index$R2,na.rm=TRUE))
```

Min:
```{r model.fit6, echo=FALSE}
cat(min(PT.W.index$R2,na.rm=TRUE))
```

Max:
```{r model.fit7, echo=FALSE}
cat(max(PT.W.index$R2,na.rm=TRUE))
```

## Mean Winsorized Curve:

```{r model.plot.apt, echo=FALSE}
PT <- FitCurves(dat = PT.W.long2, equation = "koff",k = k.value.final, agg = "Mean", detailed = T)
plot.apt <- PlotCurve(PT$adfs[[1]], PT$dfres[1, ], PT$newdats[[1]])
plot.apt + ggtitle("Mean Curve") + theme_apa()
```

#### Winsorized:
``` {r mean.curve, echo=FALSE, results='markup'}
knitr::kable(mean.curve.final[c(-1)])
```

#### Non-Winsorized:
``` {r mean.curve.nonW, echo=FALSE, results='markup'}
knitr::kable(nonW.mean.curve.final[c(-1)])
```

# Price-Level Winsorizing of Outlying values:

Change in Breakpoints (reversals to 1st 0 consumption reached):
```{r breakpoint.price.level, echo=FALSE}
cat(breakpoint.change)
```

# Index-Level Winsorizing of Outlying values:

```{r index.level, echo=FALSE, results='markup'}

delta <- 0.001
PT.W.index <- winsorize.index(PT.W.index,'Alpha', delta)
PT.W.index <- winsorize.index(PT.W.index,'Breakpoint', delta)
PT.W.index <- winsorize.index(PT.W.index, 'Intensity', delta)
PT.W.index <- winsorize.index(PT.W.index,'Omax', delta)
PT.W.index <- winsorize.index(PT.W.index,'Pmax', delta)
```

```{r final.data, include=FALSE, error=TRUE}

# This merges the output with N = 'tot.n' so that any individuals that were removed for the
# purchase task have NAs in the purchase task output
PT.W.index.final <- merge(purchase.task.df[c(1)],
  PT.W.index[c("id","Alpha","Breakpoint","Intensity","Omax","Pmax")], by = "id", all.x = TRUE)

winso.names <- c("id","Alpha_W","Breakpoint_W","Intensity_W","Omax_W","Pmax_W")
colnames(PT.W.index.final) <- winso.names

# Includes Omax and Pmax demand curve values
PT.nonW <- PT.nonW.index[c("id","Omax_curve","Pmax_curve","Alpha","Breakpoint","Intensity","Omax","Pmax")]

PT.ALL.DATA <- merge(PT.nonW,PT.W.index.final, by = "id", all.x = TRUE)

### PRICE LEVEL VARIABLES (PRICES) DESCRIPTIVE STATISTICS (WINSORIZED)
price.stats <- psych::describe(PT.wide2[c(prices)])
price.stats$vars <- purchase.task.names
price.stats <- price.stats[c("vars","n","mean","sd","se","min","max")]

### PRICE LEVEL VARIABLES (PRICES) DESCRIPTIVE STATISTICS (NON-WINSORIZED)
nonW.price.stats <- psych::describe(PT.wide[c(prices)])
nonW.price.stats$vars <- purchase.task.names
nonW.price.stats <- nonW.price.stats[c("vars","n","mean","sd","se","min","max")]

### DATA TRANSFORMATIONS
# Determine best transformation of (winsorized) variables and save to data set
# since a value of 0 is possible for Breakpoint, Intensity, Omax, and Pmax,
# a small constant (0.1) is added prior to log10 transformation

se <- function(x) sqrt(var(x,na.rm = T)/length(x))

PT.TRFMED <- PT.ALL.DATA

PT.LOG.TRFMED <- log10(PT.ALL.DATA[c("Alpha_W","Breakpoint_W","Intensity_W","Omax_W","Pmax_W")]+0.1)
PT.LOG.TRFMED <- PT.LOG.TRFMED %>%
  rename(Alpha_Log = Alpha_W, Breakpoint_Log = Breakpoint_W,
         Intensity_Log = Intensity_W, Omax_Log = Omax_W, Pmax_Log = Pmax_W)

PT.SQRT.TRFMED <- sqrt(PT.ALL.DATA[c("Alpha_W","Breakpoint_W","Intensity_W","Omax_W","Pmax_W")])
PT.SQRT.TRFMED <- PT.SQRT.TRFMED %>%
  rename(Alpha_Sqrt = Alpha_W, Breakpoint_Sqrt = Breakpoint_W,
         Intensity_Sqrt = Intensity_W, Omax_Sqrt = Omax_W, Pmax_Sqrt = Pmax_W)

PT.TRFMED <- cbind(PT.TRFMED,PT.LOG.TRFMED,PT.SQRT.TRFMED)
PT.ALL.TRANSFORMED <- PT.TRFMED ### TO SAVE AS OUTPUT

PT.TRFMED <- PT.TRFMED %>%
  rename(Alpha_NonWinsorized = Alpha, Alpha_Winsorized = Alpha_W,
         Breakpoint_NonWinsorized = Breakpoint, Breakpoint_Winsorized = Breakpoint_W,
         Intensity_NonWinsorized = Intensity, Intensity_Winsorized = Intensity_W,
         Omax_NonWinsorized = Omax, Omax_Winsorized = Omax_W,
         Pmax_NonWinsorized = Pmax, Pmax_Winsorized = Pmax_W)


PT.describe <- psych::describe(PT.ALL.TRANSFORMED[c("Alpha","Alpha_W","Alpha_Log","Alpha_Sqrt",
                                                    "Breakpoint","Breakpoint_W","Breakpoint_Log","Breakpoint_Sqrt",
                                                    "Intensity","Intensity_W","Intensity_Log","Intensity_Sqrt",
                                                    "Omax","Omax_W","Omax_Log","Omax_Sqrt",
                                                    "Pmax","Pmax_W","Pmax_Log","Pmax_Sqrt")])

PT.describe$vars <- c("Alpha","Alpha_W","Alpha_Log","Alpha_Sqrt",
                      "Breakpoint","Breakpoint_W","Breakpoint_Log","Breakpoint_Sqrt",
                      "Intensity","Intensity_W","Intensity_Log","Intensity_Sqrt",
                      "Omax","Omax_W","Omax_Log","Omax_Sqrt",
                      "Pmax","Pmax_W","Pmax_Log","Pmax_Sqrt")

PT.describe <- PT.describe[c("vars","n","mean","sd","min","max",
                             "skew","kurtosis","se")]

## Change column names for Appendix A:
colnames(df.winsor.track) <- c("ID","Price","Before Winsorization","After Winsorization")

```

# Final Winsorized & Non-Winsorized Purchase Task Variable Descriptives & Transformations

Winsorized variables are denoted by "_W"

```{r pt.descriptives, echo=FALSE, results='markup'}
knitr::kable(PT.describe, digits = 4, row.names = F)
```

# Price Level Descriptives (Winsorized)
```{r price.level.descriptives, echo=FALSE, results='markup'}
knitr::kable(price.stats, digits = 2, row.names = F)
```

# Price Level Descriptives (Non-Winsorized)
```{r price.level.descriptives.nonW, echo=FALSE, results='markup'}
knitr::kable(nonW.price.stats, digits = 2, row.names = F)
```


# Appendix: Outlier changes by ID

```{r appendix, echo=FALSE, results='markup'}
knitr::kable(df.winsor.track)
```