Feedback should be send to goran.milovanovic_ext@wikimedia.de
.
Libraries setup.
# !diagnostics off
### --- Setup
knitr::opts_chunk$set(fig.width = 15, fig.height = 8)
library(dplyr)
library(ggplot2)
library(data.table)
library(lawstat)
library(effsize)
library(TOSTER)
library(poweRlaw)
library(kableExtra)
library(car)
Load data sets.
donationsData <- fread('data/campaign-2017-donations.csv',
header = T, data.table = F)
membershipData <- fread('data/campaign-2017-memberships.csv',
header = T, data.table = F)
The donations dataset
The membership dataset
Experimental factor: in the donationsData$keyword
, anything ending in “-ctrl” or “var”. Also, in in donationsData$status
: filter out Z and X cases.
donationsData <- filter(donationsData, grepl("-ctrl$|var$", donationsData$keyword))
donationsData <- filter(donationsData, !(status %in% c('X', 'Z')))
recodingScheme <- read.csv('data/recodingScheme.csv',
header = T,
stringsAsFactors = F,
check.names = F)
oldCodes <- unique(recodingScheme$`keyword of old lp`)
newCodes <- unique(recodingScheme$`keyword of new lp`)
donationsData$expFactor <- sapply(donationsData$keyword, function(x) {
if (x %in% oldCodes) {
return("old")
} else if (x %in% newCodes) {
return("new")
} else {
return("Unnasigned")
}
})
donationsData$interval
would mean: once per yeardonationsData$amountYearly <- numeric(dim(donationsData)[1])
donationsData$amountYearly[donationsData$interval == 0 | donationsData$interval == 12] <-
donationsData$amount[donationsData$interval == 0 | donationsData$interval == 12]
donationsData$amountYearly[donationsData$interval == 1] <-
donationsData$amount[donationsData$interval == 1] * 12
donationsData$amountYearly[donationsData$interval == 3] <-
donationsData$amount[donationsData$interval == 3] * 4
donationsData$amountYearly[donationsData$interval == 6] <-
donationsData$amount[donationsData$interval == 6] * 2
donationsData <- select(donationsData,
campaign, expFactor, amountYearly, opt_in)
ggplot(data = donationsData,
aes(x = expFactor, y = amountYearly, color = expFactor)) +
geom_jitter(size = 1.5, width = .1, shape = 1, alpha = .45) +
scale_color_manual(values = c('deepskyblue', 'darkorange'), name = "Page") +
xlab("Page") + ylab("Donation Amount (Yearly)") +
theme_bw()
Boxplots indicate that the extreme outliers in both groups (“old”/“new” page) are found on the upper tails of the distributions only, justifying our approach to outlier removal that is used later in statistical hypothesis testing.
ggplot(data = donationsData,
aes(x = expFactor, y = amountYearly, color = expFactor)) +
geom_boxplot(outlier.colour = "black",
outlier.shape = 1,
outlier.size = 1) +
scale_color_manual(values = c('deepskyblue', 'darkorange'), name = "Page") +
xlab("Page") + ylab("Donation Amount (Yearly)") +
theme_bw()
After the log()
transform:
ggplot(data = donationsData,
aes(x = expFactor, y = log(amountYearly), color = expFactor)) +
geom_boxplot(outlier.colour = "black",
outlier.shape = 1,
outlier.size = 1) +
scale_color_manual(values = c('deepskyblue', 'darkorange'), name = "Page") +
xlab("Page") + ylab("log(Donation Amount (Yearly))") +
theme_bw()
To inspect the distributions of the yearly Donation Amount per campaign:
ggplot(data = donationsData,
aes(x = expFactor, y = amountYearly, color = expFactor)) +
geom_jitter(size = 1.5, width = .1, shape = 1, alpha = .45) +
scale_color_manual(values = c('deepskyblue', 'darkorange'), name = "Page") +
xlab("Page") + ylab("Donation Amount (Yearly)") +
theme_bw() +
facet_wrap(~ campaign)
ggplot(data = donationsData,
aes(x = expFactor, y = amountYearly, color = expFactor)) +
geom_boxplot(outlier.colour = "black",
outlier.shape = 1,
outlier.size = 1) +
scale_color_manual(values = c('deepskyblue', 'darkorange'), name = "Page") +
xlab("Page") + ylab("Donation Amount (Yearly)") +
theme_bw() +
facet_wrap(~ campaign)
ggplot(data = donationsData,
aes(x = expFactor, y = log(amountYearly), color = expFactor)) +
geom_boxplot(outlier.colour = "black",
outlier.shape = 1,
outlier.size = 1) +
scale_color_manual(values = c('deepskyblue', 'darkorange'), name = "Page") +
xlab("Page") + ylab("log(Donation Amount (Yearly))") +
theme_bw() +
facet_wrap(~ campaign)
The analytical workflow is as follows:
x_min
is the lowest observation at which the power-law behavior is asummed; tricky, we should re-run this w. previous estimation of x_min
).Run independent t-tests or Welch, after testing for homogenity of variances w. Levene’s test, w. correction for unequal variances if necessary In other words, we first test for the homogeneity of variances, and the choose to conduct an Independent
t-test in case of homogeneity or Welch
in case of heterogeneity. No matter the nature of the t-test, we perform TOST equivalence testing against {.3, -.3} bounds considered as minimal effect size. The measure of effect size, however, is chosen in respect to the type of the t-test conducted (Cohen’s d or Glass’ delta).
Results. The following table summarizes the t-test results: campaign
is the campaign’s code, n_old
and n_new
the number of observations for the “new” and the “old” page, respectively, eq_variance
is the boolean signifying the outcome of the Levene’s test of equality of variances (i.e. the test of homogeneity of variance), ttest
stands for the t statistic (the t-test outcome), ttest_p
is the Type I Error probability (i.e. alpha), ttest_df
stands for the respective number of degrees of freedom, ttest_method
would have a value of Independent
for equal variances and Welch
when the homogenity of variance assumption is violated, eff_size
is the measure of effect size (N.B. Cohen’s d for Independent
and Glass’ delta for Welch
), eff_magnitude
is the categorization of the effect size (N.B. not available for Glass’ delta since it is uncertain whether the same categories apply as in the case of Cohen’s d), test_eq
is the result of the TOST equivalence test.
campaigns <- unique(donationsData$campaign)
methodAResults <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
eq_variance = logical(length(campaigns)),
ttest = numeric(length(campaigns)),
ttest_p = numeric(length(campaigns)),
ttest_df = numeric(length(campaigns)),
ttest_method = character(length(campaigns)),
eff_size = numeric(length(campaigns)),
eff_mag = character(length(campaigns)),
test_eq = logical(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- donationsData %>%
filter(campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly)
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - test for the homogeneity of variance
testLevene <- lawstat::levene.test(testData$amountYearly, group = testData$expFactor)
if (testLevene$p.value > .05) {
# - independent t-test
ttest <- t.test(testData$amountYearly ~ testData$expFactor,
var.equal = T)
# - effect size
cohend <- cohen.d(d = testData$amountYearly, f = testData$expFactor,
pooled = T)
# - TOST
tost <- TOSTtwo(m1 = mean(testData$amountYearly[testData$expFactor == 'new']),
m2 = mean(testData$amountYearly[testData$expFactor == 'old']),
sd1 = sd(testData$amountYearly[testData$expFactor == 'new']),
sd2 = sd(testData$amountYearly[testData$expFactor == 'old']),
n1 = sum(testData$expFactor == 'new'),
n2 = sum(testData$expFactor == 'old'),
low_eqbound_d = -.3,
high_eqbound_d = .3,
alpha = .05,
var.equal = T,
plot = F)
# - store results
methodAResults$campaign[i] <- campaigns[i]
methodAResults$n_old[i] <- sum(testData$expFactor == 'old')
methodAResults$n_new[i] <- sum(testData$expFactor == 'new')
methodAResults$eq_variance[i] <- T
methodAResults$ttest[i] <- ttest$statistic
methodAResults$ttest_p[i] <- ttest$p.value
methodAResults$ttest_df[i] <- ttest$parameter
methodAResults$ttest_method[i] <- 'Independent'
methodAResults$eff_size[i] <- cohend$estimate
methodAResults$eff_mag[i] <- as.character(cohend$magnitude)
methodAResults$test_eq[i] <- (tost$TOST_p1 < .05 & tost$TOST_p2 < .05)
} else {
# - perform correct t-test for unequal variances
ttest <- t.test(testData$amountYearly ~ testData$expFactor,
var.equal = F)
# - effect size
glassdelta <- (mean(testData$amountYearly[testData$expFactor == 'new']) -
mean(testData$amountYearly[testData$expFactor == 'old'])) /
sd(testData$amountYearly[testData$expFactor == 'old'])
# - TOST
tost <- TOSTtwo(m1 = mean(testData$amountYearly[testData$expFactor == 'new']),
m2 = mean(testData$amountYearly[testData$expFactor == 'old']),
sd1 = sd(testData$amountYearly[testData$expFactor == 'new']),
sd2 = sd(testData$amountYearly[testData$expFactor == 'old']),
n1 = length(testData$amountYearly[testData$expFactor == 'new']),
n2 = length(testData$amountYearly[testData$expFactor == 'old']),
low_eqbound_d = -.3,
high_eqbound_d = .3,
alpha = .05,
var.equal = F,
plot = F)
# - store results
methodAResults$campaign[i] <- campaigns[i]
methodAResults$n_old[i] <- sum(testData$expFactor == 'old')
methodAResults$n_new[i] <- sum(testData$expFactor == 'new')
methodAResults$eq_variance[i] <- F
methodAResults$ttest[i] <- ttest$statistic
methodAResults$ttest_p[i] <- ttest$p.value
methodAResults$ttest_df[i] <- ttest$parameter
methodAResults$ttest_method[i] <- 'Welch'
methodAResults$eff_size[i] <- glassdelta
methodAResults$eff_mag[i] <- '-'
methodAResults$test_eq[i] <- (tost$TOST_p1 < .05 & tost$TOST_p2 < .05)
}
}
knitr::kable(methodAResults, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | eq_variance | ttest | ttest_p | ttest_df | ttest_method | eff_size | eff_mag | test_eq |
---|---|---|---|---|---|---|---|---|---|---|
mob05-ba-171218 | 2372 | 2431 | TRUE | 0.8583697 | 0.3907312 | 4801 | Independent | 0.0247731 | negligible | TRUE |
35-ba-171218 | 2732 | 2482 | TRUE | -0.5540745 | 0.5795516 | 5212 | Independent | 0.0154845 | negligible | TRUE |
pad04-ba-171218 | 2014 | 1940 | TRUE | 0.3133339 | 0.7540436 | 3952 | Independent | -0.0098875 | negligible | TRUE |
en03-ba-171218 | 1621 | 1545 | TRUE | 0.6601157 | 0.5092277 | 3164 | Independent | 0.0234704 | negligible | TRUE |
wpde-04-171218 | 2751 | 2767 | TRUE | -0.1485633 | 0.8819036 | 5516 | Independent | 0.0039980 | negligible | TRUE |
38-ba-171223 | 3895 | 3960 | TRUE | 0.5589331 | 0.5762233 | 7853 | Independent | 0.0126134 | negligible | TRUE |
Force Welch t-tests, assuming that is the recommended procedures when group sample sizes are not equal; this implies a correction for unequal variances.
Results. The columns have the same meaning as in the previous table, except for that the measure of effects size is always Glass’ delta.
campaigns <- unique(donationsData$campaign)
methodBResults <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
eq_variance = logical(length(campaigns)),
ttest = numeric(length(campaigns)),
ttest_p = numeric(length(campaigns)),
ttest_df = numeric(length(campaigns)),
ttest_method = character(length(campaigns)),
eff_size = numeric(length(campaigns)),
eff_mag = character(length(campaigns)),
test_eq = logical(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- donationsData %>%
filter(campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly)
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - perform correct t-test for unequal variances
ttest <- t.test(testData$amountYearly ~ testData$expFactor,
var.equal = F)
# - effect size
glassdelta <- (mean(testData$amountYearly[testData$expFactor == 'new']) -
mean(testData$amountYearly[testData$expFactor == 'old'])) /
sd(testData$amountYearly[testData$expFactor == 'old'])
# - TOST
tost <- TOSTtwo(m1 = mean(testData$amountYearly[testData$expFactor == 'new']),
m2 = mean(testData$amountYearly[testData$expFactor == 'old']),
sd1 = sd(testData$amountYearly[testData$expFactor == 'new']),
sd2 = sd(testData$amountYearly[testData$expFactor == 'old']),
n1 = length(testData$amountYearly[testData$expFactor == 'new']),
n2 = length(testData$amountYearly[testData$expFactor == 'old']),
low_eqbound_d = -.3,
high_eqbound_d = .3,
alpha = .05,
var.equal = F,
plot = F)
# - store results
methodBResults$campaign[i] <- campaigns[i]
methodBResults$n_old[i] <- sum(testData$expFactor == 'old')
methodBResults$n_new[i] <- sum(testData$expFactor == 'new')
methodBResults$eq_variance[i] <- F
methodBResults$ttest[i] <- ttest$statistic
methodBResults$ttest_p[i] <- ttest$p.value
methodBResults$ttest_df[i] <- ttest$parameter
methodBResults$ttest_method[i] <- 'Welch'
methodBResults$eff_size[i] <- glassdelta
methodBResults$eff_mag[i] <- '-'
methodBResults$test_eq[i] <- (tost$TOST_p1 < .05 & tost$TOST_p2 < .05)
}
knitr::kable(methodBResults, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | eq_variance | ttest | ttest_p | ttest_df | ttest_method | eff_size | eff_mag | test_eq |
---|---|---|---|---|---|---|---|---|---|---|
mob05-ba-171218 | 2372 | 2431 | FALSE | 0.8591243 | 0.3903149 | 4790.416 | Welch | 0.0257209 | - | TRUE |
35-ba-171218 | 2732 | 2482 | FALSE | -0.5584103 | 0.5765883 | 5188.292 | Welch | -0.0143053 | - | TRUE |
pad04-ba-171218 | 2014 | 1940 | FALSE | 0.3108147 | 0.7559614 | 3249.093 | Welch | 0.0131630 | - | TRUE |
en03-ba-171218 | 1621 | 1545 | FALSE | 0.6572774 | 0.5110529 | 3009.390 | Welch | 0.0258540 | - | TRUE |
wpde-04-171218 | 2751 | 2767 | FALSE | -0.1484904 | 0.8819613 | 5352.054 | Welch | -0.0036981 | - | TRUE |
38-ba-171223 | 3895 | 3960 | FALSE | 0.5595943 | 0.5757724 | 7729.371 | Welch | 0.0136321 | - | TRUE |
Fit a power law to each data set to learn about the nature of the sampling distribution; if the power law behavior cannot be excluded we conclude that the sampling distribution is not normal and choose to proceed with non-parametric tests; if the power law can be excluded, we choose to remove outliers and repeat the t-tests.
Test the power law behavior of the yearly amount distributions per campaign and per experimental group. CITATION. Colin S. Gillespie (2015). Fitting Heavy Tailed Distributions: The poweRlaw Package. Journal of Statistical Software, 64(2), 1-16. URL http://www.jstatsoft.org/v64/i02/.
NOTE. Do not run this; it is parallelized and takes a while of time to complete.
campaigns <- unique(donationsData$campaign)
# - test power laws
plList <- vector(mode = "list", length = length(campaigns))
for (i in 1:length(campaigns)) {
testSetOld <- as.integer(donationsData$amountYearly[which(donationsData$campaign == campaigns[i] & donationsData$expFactor == 'old')])
testSetNew <- as.integer(donationsData$amountYearly[which(donationsData$campaign == campaigns[i] & donationsData$expFactor == 'new')])
mmTestSetOld <- displ$new(testSetOld)
mmTestSetNew <- displ$new(testSetNew)
plList[[i]]$old = bootstrap_p(mmTestSetOld, no_of_sims = 1000, threads = 8)
plList[[i]]$new = bootstrap_p(mmTestSetNew, no_of_sims = 1000, threads = 8)
}
oldPValues <- sapply(plList, function(x) {x$old$p})
newPValues <- sapply(plList, function(x) {x$new$p})
Results. With 3 out of 12 data sets exhibiting power law behavior w. x_min
set to the lowest value in the data set, we choose to remove the outliers and repeat the t-tests. NOTE. Re-running this with a previous estimation of x_min
wouldn’t hurt.
Run independent t-tests or Welch, after testing for homogenity of variances w. Levene’s test, w. correction for unequal variances if necessary, and following the removal of outliers.
NOTE. Only extreme outliers on the upper tail of the distribution (> 3 * IQR
, IQR
== interquartile range) were removed. In effect, extremely high yearly donation amounts were removed.
campaigns <- unique(donationsData$campaign)
methodC1Results <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
eq_variance = logical(length(campaigns)),
ttest = numeric(length(campaigns)),
ttest_p = numeric(length(campaigns)),
ttest_df = numeric(length(campaigns)),
ttest_method = character(length(campaigns)),
eff_size = numeric(length(campaigns)),
eff_mag = character(length(campaigns)),
test_eq = logical(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- donationsData %>%
filter(campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly)
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - remove outliers: one-sided, > 3*IQR per group
testData$outlier <- logical(dim(testData)[1])
wNewOut <- which(testData$amountYearly[testData$expFactor == 'new'] >
3 * IQR(testData$amountYearly[testData$expFactor == 'new']))
testData$outlier[testData$expFactor == 'new'][wNewOut] <- T
wOldOut <- which(testData$amountYearly[testData$expFactor == 'old'] >
3 * IQR(testData$amountYearly[testData$expFactor == 'old']))
testData$outlier[testData$expFactor == 'old'][wOldOut] <- T
testData <- filter(testData, outlier == F)
# - test for the homogeneity of variance
testLevene <- lawstat::levene.test(testData$amountYearly, group = testData$expFactor)
if (testLevene$p.value > .05) {
# - independent t-test
ttest <- t.test(testData$amountYearly ~ testData$expFactor,
var.equal = T)
# - effect size
cohend <- cohen.d(d = testData$amountYearly, f = testData$expFactor,
pooled = T)
# - TOST
tost <- TOSTtwo(m1 = mean(testData$amountYearly[testData$expFactor == 'new']),
m2 = mean(testData$amountYearly[testData$expFactor == 'old']),
sd1 = sd(testData$amountYearly[testData$expFactor == 'new']),
sd2 = sd(testData$amountYearly[testData$expFactor == 'old']),
n1 = sum(testData$expFactor == 'new'),
n2 = sum(testData$expFactor == 'old'),
low_eqbound_d = -.3,
high_eqbound_d = .3,
alpha = .05,
var.equal = T,
plot = F)
# - store results
methodC1Results$campaign[i] <- campaigns[i]
methodC1Results$n_old[i] <- sum(testData$expFactor == 'old')
methodC1Results$n_new[i] <- sum(testData$expFactor == 'new')
methodC1Results$eq_variance[i] <- T
methodC1Results$ttest[i] <- ttest$statistic
methodC1Results$ttest_p[i] <- ttest$p.value
methodC1Results$ttest_df[i] <- ttest$parameter
methodC1Results$ttest_method[i] <- 'Independent'
methodC1Results$eff_size[i] <- cohend$estimate
methodC1Results$eff_mag[i] <- as.character(cohend$magnitude)
methodC1Results$test_eq[i] <- (tost$TOST_p1 < .05 & tost$TOST_p2 < .05)
} else {
# - perform correct t-test for unequal variances
ttest <- t.test(testData$amountYearly ~ testData$expFactor,
var.equal = F)
# - effect size
glassdelta <- (mean(testData$amountYearly[testData$expFactor == 'new']) -
mean(testData$amountYearly[testData$expFactor == 'old'])) /
sd(testData$amountYearly[testData$expFactor == 'old'])
# - TOST
tost <- TOSTtwo(m1 = mean(testData$amountYearly[testData$expFactor == 'new']),
m2 = mean(testData$amountYearly[testData$expFactor == 'old']),
sd1 = sd(testData$amountYearly[testData$expFactor == 'new']),
sd2 = sd(testData$amountYearly[testData$expFactor == 'old']),
n1 = length(testData$amountYearly[testData$expFactor == 'new']),
n2 = length(testData$amountYearly[testData$expFactor == 'old']),
low_eqbound_d = -.3,
high_eqbound_d = .3,
alpha = .05,
var.equal = F,
plot = F)
# - store results
methodC1Results$campaign[i] <- campaigns[i]
methodC1Results$n_old[i] <- sum(testData$expFactor == 'old')
methodC1Results$n_new[i] <- sum(testData$expFactor == 'new')
methodC1Results$eq_variance[i] <- F
methodC1Results$ttest[i] <- ttest$statistic
methodC1Results$ttest_p[i] <- ttest$p.value
methodC1Results$ttest_df[i] <- ttest$parameter
methodC1Results$ttest_method[i] <- 'Welch'
methodC1Results$eff_size[i] <- glassdelta
methodC1Results$eff_mag[i] <- '-'
methodC1Results$test_eq[i] <- (tost$TOST_p1 < .05 & tost$TOST_p2 < .05)
}
}
knitr::kable(methodC1Results, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | eq_variance | ttest | ttest_p | ttest_df | ttest_method | eff_size | eff_mag | test_eq |
---|---|---|---|---|---|---|---|---|---|---|
mob05-ba-171218 | 2225 | 2258 | TRUE | -0.0984794 | 0.9215560 | 4481.000 | Independent | -0.0029417 | negligible | TRUE |
35-ba-171218 | 2648 | 2404 | TRUE | -0.5889559 | 0.5559172 | 5050.000 | Independent | 0.0165837 | negligible | TRUE |
pad04-ba-171218 | 1944 | 1873 | TRUE | -1.0888217 | 0.2763013 | 3815.000 | Independent | 0.0352502 | negligible | TRUE |
en03-ba-171218 | 1570 | 1493 | TRUE | -0.0928635 | 0.9260181 | 3061.000 | Independent | -0.0033569 | negligible | TRUE |
wpde-04-171218 | 2151 | 2160 | FALSE | 0.0460051 | 0.9633083 | 4306.996 | Welch | 0.0013893 | - | TRUE |
38-ba-171223 | 3781 | 3830 | TRUE | -0.2823690 | 0.7776683 | 7609.000 | Independent | -0.0064734 | negligible | TRUE |
Force Welch t-test, after testing for homogenity of variances w. Levene’s test, w. correction for unequal variances if necessary, and following the removal of outliers.
NOTE. Only extreme outliers on the upper tail of the distribution (> 3 * IQR
, IQR
== interquartile range) were removed. In effect, extremely high yearly donation amounts were removed.
NOTE. The effect size (eff_size
) is always Glass’ delta.
campaigns <- unique(donationsData$campaign)
methodC2Results <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
eq_variance = logical(length(campaigns)),
ttest = numeric(length(campaigns)),
ttest_p = numeric(length(campaigns)),
ttest_df = numeric(length(campaigns)),
ttest_method = character(length(campaigns)),
eff_size = numeric(length(campaigns)),
eff_mag = character(length(campaigns)),
test_eq = logical(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- donationsData %>%
filter(campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly)
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - remove outliers: one-sided, > 3*IQR per group
testData$outlier <- logical(dim(testData)[1])
wNewOut <- which(testData$amountYearly[testData$expFactor == 'new'] >
3 * IQR(testData$amountYearly[testData$expFactor == 'new']))
testData$outlier[testData$expFactor == 'new'][wNewOut] <- T
wOldOut <- which(testData$amountYearly[testData$expFactor == 'old'] >
3 * IQR(testData$amountYearly[testData$expFactor == 'old']))
testData$outlier[testData$expFactor == 'old'][wOldOut] <- T
testData <- filter(testData, outlier == F)
# - perform correct t-test for unequal variances
ttest <- t.test(testData$amountYearly ~ testData$expFactor,
var.equal = F)
# - effect size
glassdelta <- (mean(testData$amountYearly[testData$expFactor == 'new']) -
mean(testData$amountYearly[testData$expFactor == 'old'])) /
sd(testData$amountYearly[testData$expFactor == 'old'])
# - TOST
tost <- TOSTtwo(m1 = mean(testData$amountYearly[testData$expFactor == 'new']),
m2 = mean(testData$amountYearly[testData$expFactor == 'old']),
sd1 = sd(testData$amountYearly[testData$expFactor == 'new']),
sd2 = sd(testData$amountYearly[testData$expFactor == 'old']),
n1 = length(testData$amountYearly[testData$expFactor == 'new']),
n2 = length(testData$amountYearly[testData$expFactor == 'old']),
low_eqbound_d = -.3,
high_eqbound_d = .3,
alpha = .05,
var.equal = F,
plot = F)
# - store results
methodC2Results$campaign[i] <- campaigns[i]
methodC2Results$n_old[i] <- sum(testData$expFactor == 'old')
methodC2Results$n_new[i] <- sum(testData$expFactor == 'new')
methodC2Results$eq_variance[i] <- F
methodC2Results$ttest[i] <- ttest$statistic
methodC2Results$ttest_p[i] <- ttest$p.value
methodC2Results$ttest_df[i] <- ttest$parameter
methodC2Results$ttest_method[i] <- 'Welch'
methodC2Results$eff_size[i] <- glassdelta
methodC2Results$eff_mag[i] <- '-'
methodC2Results$test_eq[i] <- (tost$TOST_p1 < .05 & tost$TOST_p2 < .05)
}
knitr::kable(methodC2Results, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | eq_variance | ttest | ttest_p | ttest_df | ttest_method | eff_size | eff_mag | test_eq |
---|---|---|---|---|---|---|---|---|---|---|
mob05-ba-171218 | 2225 | 2258 | FALSE | -0.0984699 | 0.9215636 | 4477.542 | Welch | -0.0029226 | - | TRUE |
35-ba-171218 | 2648 | 2404 | FALSE | -0.5886759 | 0.5561053 | 4993.308 | Welch | -0.0166699 | - | TRUE |
pad04-ba-171218 | 1944 | 1873 | FALSE | -1.0887222 | 0.2763453 | 3808.242 | Welch | -0.0353386 | - | TRUE |
en03-ba-171218 | 1570 | 1493 | FALSE | -0.0927810 | 0.9260837 | 3038.731 | Welch | -0.0034163 | - | TRUE |
wpde-04-171218 | 2151 | 2160 | FALSE | 0.0460051 | 0.9633083 | 4306.996 | Welch | 0.0013893 | - | TRUE |
38-ba-171223 | 3781 | 3830 | FALSE | -0.2823820 | 0.7776583 | 7608.753 | Welch | -0.0064970 | - | TRUE |
Conduct non-parametric, independent 2-group Mann-Whitney U Test; no removal of outliers.
Results. campaign
stands for the campaign code, n_old
and n_new
are the number of observations for the “new” and the “old” page respectively, W
is the Wilcoxon test statistics, and MWUTest_p
the associated probability of the Type I Error.
campaigns <- unique(donationsData$campaign)
methodD1Results <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
W = numeric(length(campaigns)),
MWUTest_p = numeric(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- donationsData %>%
filter(campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly)
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - non-parametric test
mwu <- wilcox.test(testData$amountYearly ~ testData$expFactor)
# - store results
methodD1Results$campaign[i] <- campaigns[i]
methodD1Results$n_old[i] <- sum(testData$expFactor == 'old')
methodD1Results$n_new[i] <- sum(testData$expFactor == 'new')
methodD1Results$W[i] <- mwu$statistic
methodD1Results$MWUTest_p[i] <- mwu$p.value
}
knitr::kable(methodD1Results, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | W | MWUTest_p |
---|---|---|---|---|
mob05-ba-171218 | 2372 | 2431 | 2913410 | 0.4998617 |
35-ba-171218 | 2732 | 2482 | 3340974 | 0.3442295 |
pad04-ba-171218 | 2014 | 1940 | 1904823 | 0.1598756 |
en03-ba-171218 | 1621 | 1545 | 1236390 | 0.5227544 |
wpde-04-171218 | 2751 | 2767 | 3811216 | 0.9287713 |
38-ba-171223 | 3895 | 3960 | 7687976 | 0.8034789 |
Conduct non-parametric, independent 2-group Mann-Whitney U Test, following the removal of outliers.
NOTE. Only extreme outliers on the upper tail of the distribution (> 3 * IQR
, IQR
== interquartile range) were removed. In effect, extremely high yearly donation amounts were removed.
campaigns <- unique(donationsData$campaign)
methodD2Results <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
W = numeric(length(campaigns)),
MWUTest_p = numeric(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- donationsData %>%
filter(campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly)
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - remove outliers: one-sided, > 3*IQR per group
testData$outlier <- logical(dim(testData)[1])
wNewOut <- which(testData$amountYearly[testData$expFactor == 'new'] >
3 * IQR(testData$amountYearly[testData$expFactor == 'new']))
testData$outlier[testData$expFactor == 'new'][wNewOut] <- T
wOldOut <- which(testData$amountYearly[testData$expFactor == 'old'] >
3 * IQR(testData$amountYearly[testData$expFactor == 'old']))
testData$outlier[testData$expFactor == 'old'][wOldOut] <- T
testData <- filter(testData, outlier == F)
# - non-parametric test
mwu <- wilcox.test(testData$amountYearly ~ testData$expFactor)
# - store results
methodD2Results$campaign[i] <- campaigns[i]
methodD2Results$n_old[i] <- sum(testData$expFactor == 'old')
methodD2Results$n_new[i] <- sum(testData$expFactor == 'new')
methodD2Results$W[i] <- mwu$statistic
methodD2Results$MWUTest_p[i] <- mwu$p.value
}
knitr::kable(methodD2Results, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | W | MWUTest_p |
---|---|---|---|---|
mob05-ba-171218 | 2225 | 2258 | 2516217 | 0.9159759 |
35-ba-171218 | 2648 | 2404 | 3131223 | 0.2980615 |
pad04-ba-171218 | 1944 | 1873 | 1772213 | 0.1402104 |
en03-ba-171218 | 1570 | 1493 | 1153248 | 0.4243507 |
wpde-04-171218 | 2151 | 2160 | 2326203 | 0.9372047 |
38-ba-171223 | 3781 | 3830 | 7188939 | 0.5748126 |
In this section, we consider the overall data on donation amount for all campaigns. Two attempts at a parametric analysis are presented in the this section. The first one is a simple t-tes with “old” vs. “new” as the experimental factor and the donation amount as the dependent variable; we will perform both the independent t-test as well as the Welch t-test (to account for unequal sample sizes in “old” vs. “new”). The second one is a between-subjects ANOVA aproach with two experimental factors: (a) “old” vs. “new” page, and (b) campaign, and the donation amount as the dependent variable.
Independent t-test
# - perform correct t-test for unequal variances
ttest1 <- t.test(donationsData$amountYearly ~ donationsData$expFactor,
var.equal = T)
ttest1
Two Sample t-test
data: donationsData$amountYearly by donationsData$expFactor
t = 0.41328, df = 30508, p-value = 0.6794
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.5689491 0.8729871
sample estimates:
mean in group new mean in group old
20.72905 20.57703
Conclusion. No effect in “old” vs. “new” page in the donation amount.
Welch t-test
# - perform correct t-test for unequal variances
ttest2 <- t.test(donationsData$amountYearly ~ donationsData$expFactor,
var.equal = F)
ttest2
Welch Two Sample t-test
data: donationsData$amountYearly by donationsData$expFactor
t = 0.4133, df = 30503, p-value = 0.6794
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.5689196 0.8729576
sample estimates:
mean in group new mean in group old
20.72905 20.57703
Conclusion. No effect in “old” vs. “new” page in the donation amount.
2-Way Independent ANOVA
# - perform ANOVA, Type III SS Decomposition for unequal design cell sizes
atest <- aov(amountYearly ~ expFactor * campaign,
data = donationsData)
Anova(atest, type = "III")
Anova Table (Type III tests)
Response: amountYearly
Sum Sq Df F value Pr(>F)
(Intercept) 848207 1 839.5827 <2e-16 ***
expFactor 236 1 0.2334 0.6290
campaign 325458 5 64.4299 <2e-16 ***
expFactor:campaign 887 5 0.1755 0.9718
Residuals 30811264 30498
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Conclusion. No main effect in “old” vs. “new” page in the donation amount, and no significant interaction between this factor and the campaign. However, we observe a significant effect of the campaign:
donationsCampaignPlot <- donationsData %>%
select(campaign, amountYearly) %>%
group_by(campaign) %>%
summarise(mean = mean(amountYearly), stderr = sd(amountYearly)/sqrt(n()))
ggplot(donationsCampaignPlot,
aes(x = campaign, y = mean, fill = campaign)
) +
geom_bar(stat="identity", color="black", width = .2) +
geom_errorbar(aes(ymin = mean - stderr, ymax = mean + stderr,
width = .2)) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90)) +
xlab("") + ylab("Mean Yearly Donation Amount")
Binary logistic regression was a method of our choice, with the experimental A/B factor (“new”/“old”) as a predictor of opt_in
. The “old” level of the experimental factor was used as a baseline. Conclusion. Except for the mob05-ba-171218
campaign where the experimental factor did not have a statistically significant effect upon opt_in
, the new page seems to always induce a reduction in the probability to opt_in.
Results. campaign
stands for the campaign code, n_old
and n_new
the number of observations for the “new” and the “old” page, respectively, coeff
is the regression coefficient for the A/B experimental factor (NOTE: using old
as a baseline), exp_coeff
is the exponential of the regression coefficient - the delta odds, should be interpreted as the increase in the odds of opting in vs. not opting in with a unit increase in the experimental factors which in our case means switching from the old
to the new
page, coeff_z
is the value of the Wald’s test (similar to t-test in linear regression, tests if the regression coefficient is significantly different from zero), coeff_p
the respective probability of the Type I Error, sig
whether the Wald’s test have reached the conventional alpha < .05
significance criterion.
campaigns <- unique(donationsData$campaign)
opt_in_BLR_Results <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
coeff = numeric(length(campaigns)),
exp_coeff = numeric(length(campaigns)),
coeff_z = numeric(length(campaigns)),
coeff_p = numeric(length(campaigns)),
sig = logical(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- donationsData %>%
filter(campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly, opt_in)
testData$expFactor <- factor(testData$expFactor, levels = c('old', 'new'))
# - Binary logistic regression
tmodel <- glm(testData$opt_in ~ testData$expFactor,
family = "binomial")
# - store results
opt_in_BLR_Results$campaign[i] <- campaigns[i]
opt_in_BLR_Results$n_old[i] <- sum(testData$expFactor == 'old')
opt_in_BLR_Results$n_new[i] <- sum(testData$expFactor == 'new')
opt_in_BLR_Results$coeff[i] <- summary(tmodel)$coefficients[2, 1]
opt_in_BLR_Results$exp_coeff[i] <- exp(summary(tmodel)$coefficients[2, 1])
opt_in_BLR_Results$coeff_z[i] <- summary(tmodel)$coefficients[2, 3]
opt_in_BLR_Results$coeff_p[i] <- summary(tmodel)$coefficients[2, 4]
opt_in_BLR_Results$sig[i] <- summary(tmodel)$coefficients[2, 4] < .05
}
knitr::kable(opt_in_BLR_Results, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | coeff | exp_coeff | coeff_z | coeff_p | sig |
---|---|---|---|---|---|---|---|
mob05-ba-171218 | 2372 | 2431 | -0.0589191 | 0.9427831 | -0.9386565 | 0.3479071 | FALSE |
35-ba-171218 | 2732 | 2482 | -0.6335634 | 0.5306973 | -9.4906179 | 0.0000000 | TRUE |
pad04-ba-171218 | 2014 | 1940 | -0.8305340 | 0.4358165 | -11.0079858 | 0.0000000 | TRUE |
en03-ba-171218 | 1621 | 1545 | -0.5292162 | 0.5890665 | -6.3989787 | 0.0000000 | TRUE |
wpde-04-171218 | 2751 | 2767 | -0.4554023 | 0.6341928 | -7.4850421 | 0.0000000 | TRUE |
38-ba-171223 | 3895 | 3960 | -0.6537384 | 0.5200978 | -12.2507708 | 0.0000000 | TRUE |
Here we perform a Binary Logistic Regression w. opt_in
as a dependent variable across the whole data set, taking (a) “old” vs. “new” page and (b) campaign as experimental factors. The campaign
factors was coded so as to assume that mob05-ba-171218
campaign is the baseline. The only motivation for this choice of the baseline is the fact the this campaign has the average lowest donation amount.
# - Binary logistic regression
testData <- donationsData
orderCampaigns <- donationsCampaignPlot$campaign[order(donationsCampaignPlot$mean)]
testData$expFactor <- factor(testData$expFactor, levels = c("old", "new"))
testData$campaign <- factor(testData$campaign, levels = orderCampaigns)
bmodel <- glm(donationsData$opt_in ~ donationsData$expFactor + donationsData$campaign,
family = "binomial")
summary(bmodel)
Call:
glm(formula = donationsData$opt_in ~ donationsData$expFactor +
donationsData$campaign, family = "binomial")
Deviance Residuals:
Min 1Q Median 3Q Max
-0.9424 -0.8400 -0.6875 1.4322 1.8137
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.43015 0.03608 -39.641 < 2e-16 ***
donationsData$expFactorold 0.51784 0.02641 19.604 < 2e-16 ***
donationsData$campaign38-ba-171223 0.05197 0.04181 1.243 0.2139
donationsData$campaignen03-ba-171218 0.09433 0.05223 1.806 0.0709 .
donationsData$campaignmob05-ba-171218 0.33083 0.04537 7.292 3.05e-13 ***
donationsData$campaignpad04-ba-171218 0.10823 0.04886 2.215 0.0267 *
donationsData$campaignwpde-04-171218 0.19258 0.04447 4.330 1.49e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 35205 on 30509 degrees of freedom
Residual deviance: 34750 on 30503 degrees of freedom
AIC: 34764
Number of Fisher Scoring iterations: 4
Conclusion. A. The “old” landing page has a higher probablity for opt_in
than the “new” landing page. B. The campaigns which have attracted higher donation amounts on the average than the mob05-ba-171218
baseline (i.e. the campaign that has attracted the lowest donation amounts on the average) also have a higher probability for opt_in
(with en03-ba-171218
having a marginally significant effect only, and 38-ba-171223
no effect at all in comparison to the baseline).
membershipData <- filter(membershipData, grepl("-ctrl$|var$", membershipData$banner_keyword))
membershipData <- filter(membershipData, status == 1)
recodingScheme <- read.csv('data/recodingScheme.csv',
header = T,
stringsAsFactors = F,
check.names = F)
oldCodes <- unique(recodingScheme$`keyword of old lp`)
newCodes <- unique(recodingScheme$`keyword of new lp`)
membershipData$expFactor <- sapply(membershipData$banner_keyword, function(x) {
if (x %in% oldCodes) {
return("old")
} else if (x %in% newCodes) {
return("new")
} else {
return("Unnasigned")
}
})
# - filter out 'Unnasigned' codes:
membershipData <- filter(membershipData, !(expFactor == 'Unnasigned'))
membershipData$amountYearly <- numeric(dim(membershipData)[1])
membershipData$amountYearly[membershipData$membership_fee_interval == 0 | membershipData$membership_fee_interval == 12] <-
membershipData$membership_fee[membershipData$membership_fee_interval == 0 | membershipData$membership_fee_interval == 12]
membershipData$amountYearly[membershipData$membership_fee_interval == 1] <-
membershipData$membership_fee[membershipData$membership_fee_interval == 1] * 12
membershipData$amountYearly[membershipData$membership_fee_interval == 3] <-
membershipData$membership_fee[membershipData$membership_fee_interval == 3] * 4
membershipData$amountYearly[membershipData$membership_fee_interval == 6] <-
membershipData$membership_fee[membershipData$membership_fee_interval == 6] * 2
membershipData <- select(membershipData,
banner_campaign, expFactor, amountYearly, donation_receipt)
The analysis of the Membership data set relies on non-parametric tests only (Mann-Whitney U test), because we get to have really small sample sizes per campaign following the data clean-up. Conclusion. No effect of the new page in any of the campaigns.
Results. campaign
stands for the campaign code, n_old
and n_new
are the number of observations for the “new” and the “old” page respectively, W
is the Wilcoxon test statistics, and MWUTest_p
the associated probability of the Type I Error.
campaigns <- unique(membershipData$banner_campaign)
methodD1Results_memberhsip <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
W = numeric(length(campaigns)),
MWUTest_p = numeric(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- membershipData %>%
filter(banner_campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly)
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - non-parametric test
mwu <- wilcox.test(testData$amountYearly ~ testData$expFactor)
# - store results
methodD1Results_memberhsip$campaign[i] <- campaigns[i]
methodD1Results_memberhsip$n_old[i] <- sum(testData$expFactor == 'old')
methodD1Results_memberhsip$n_new[i] <- sum(testData$expFactor == 'new')
methodD1Results_memberhsip$W[i] <- mwu$statistic
methodD1Results_memberhsip$MWUTest_p[i] <- mwu$p.value
}
knitr::kable(methodD1Results_memberhsip, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | W | MWUTest_p |
---|---|---|---|---|
35-ba-171218 | 15 | 7 | 58.5 | 0.6714894 |
wpde-04-171218 | 35 | 25 | 365.0 | 0.2720977 |
mob05-ba-171218 | 17 | 6 | 40.0 | 0.4116710 |
pad04-ba-171218 | 10 | 5 | 23.0 | 0.8136099 |
en03-ba-171218 | 11 | 8 | 42.0 | 0.8950513 |
38-ba-171223 | 28 | 23 | 315.0 | 0.8952855 |
testData <- membershipData
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - non-parametric test
mwu <- wilcox.test(testData$amountYearly ~ testData$expFactor)
mwu
Conclusion. Non-parametric test implies no differences between the old
and new
page in membership fee.
Campaign
and New/Old
as experimental factorstestData <- membershipData
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - non-parametric test
mwu <- wilcox.test(testData$amountYearly ~ testData$expFactor)
mwu
Conclusion. Non-parametric test implies no differences between the old
and new
page in membership fee.
atest <- aov(amountYearly ~ expFactor * banner_campaign,
data = testData)
Anova(atest, type = "III")
Anova Table (Type III tests)
Response: amountYearly
Sum Sq Df F value Pr(>F)
(Intercept) 20629 1 5.0050 0.02651 *
expFactor 2 1 0.0005 0.98244
banner_campaign 5534 5 0.2686 0.92981
expFactor:banner_campaign 2603 5 0.1263 0.98631
Residuals 733641 178
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Conclusion. No effects reach statistical significance (i.e. no differences between new
and old
, and no significant effect of campaign
; no interaction).