Feedback should be send to goran.milovanovic_ext@wikimedia.de
.
Libraries setup.
# !diagnostics off
### --- Setup
knitr::opts_chunk$set(fig.width = 15, fig.height = 8)
library(dplyr)
library(ggplot2)
library(data.table)
library(lawstat)
library(effsize)
library(TOSTER)
library(poweRlaw)
library(kableExtra)
Load data sets.
donationsData <- fread('data/campaign-2017-donations.csv',
header = T, data.table = F)
membershipData <- fread('data/campaign-2017-memberships.csv',
header = T, data.table = F)
The donations dataset
The membership dataset
Experimental factor: in the donationsData$keyword
, anything ending in “-ctrl” or “var”. Also, in in donationsData$status
: filter out Z and X cases.
donationsData <- filter(donationsData, grepl("-ctrl$|var$", donationsData$keyword))
donationsData <- filter(donationsData, !(status %in% c('X', 'Z')))
recodingScheme <- read.csv('data/recodingScheme.csv',
header = T,
stringsAsFactors = F,
check.names = F)
oldCodes <- unique(recodingScheme$`keyword of old lp`)
newCodes <- unique(recodingScheme$`keyword of new lp`)
donationsData$expFactor <- sapply(donationsData$keyword, function(x) {
if (x %in% oldCodes) {
return("old")
} else if (x %in% newCodes) {
return("new")
} else {
return("Unnasigned")
}
})
# - filter out 'Unnasigned' codes:
donationsData <- filter(donationsData, !(expFactor == 'Unnasigned'))
donationsData$interval
would mean: once per yeardonationsData$amountYearly <- numeric(dim(donationsData)[1])
donationsData$amountYearly[donationsData$interval == 0 | donationsData$interval == 12] <-
donationsData$amount[donationsData$interval == 0 | donationsData$interval == 12]
donationsData$amountYearly[donationsData$interval == 1] <-
donationsData$amount[donationsData$interval == 1] * 12
donationsData$amountYearly[donationsData$interval == 3] <-
donationsData$amount[donationsData$interval == 3] * 4
donationsData$amountYearly[donationsData$interval == 6] <-
donationsData$amount[donationsData$interval == 6] * 2
donationsData <- select(donationsData,
campaign, expFactor, amountYearly, opt_in)
The analytical workflow is as follows:
x_min
is the lowest observation at which the power-law behavior is asummed; tricky, we should re-run this w. previous estimation of x_min
).Run independent t-tests or Welch, after testing for homogenity of variances w. Levene’s test, w. correction for unequal variances if necessary In other words, we first test for the homogeneity of variances, and the choose to conduct an Independent
t-test in case of homogeneity or Welch
in case of heterogeneity. No matter the nature of the t-test, we perform TOST equivalence testing against {.3, -.3} bounds considered as minimal effect size. The measure of effect size, however, is chosen in respect to the type of the t-test conducted (Cohen’s d or Glass’ delta).
Results. The following table summarizes the t-test results: campaign
is the campaign’s code, n_old
and n_new
the number of observations for the “new” and the “old” page, respectively, eq_variance
is the boolean signifying the outcome of the Levene’s test of equality of variances (i.e. the test of homogeneity of variance), ttest
stands for the t statistic (the t-test outcome), ttest_p
is the Type I Error probability (i.e. alpha), ttest_df
stands for the respective number of degrees of freedom, ttest_method
would have a value of Independent
for equal variances and Welch
when the homogenity of variance assumption is violated, eff_size
is the measure of effect size (N.B. Cohen’s d for Independent
and Glass’ delta for Welch
), eff_magnitude
is the categorization of the effect size (N.B. not available for Glass’ delta since it is uncertain whether the same categories apply as in the case of Cohen’s d), test_eq
is the result of the TOST equivalence test.
campaigns <- unique(donationsData$campaign)
methodAResults <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
eq_variance = logical(length(campaigns)),
ttest = numeric(length(campaigns)),
ttest_p = numeric(length(campaigns)),
ttest_df = numeric(length(campaigns)),
ttest_method = character(length(campaigns)),
eff_size = numeric(length(campaigns)),
eff_mag = character(length(campaigns)),
test_eq = logical(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- donationsData %>%
filter(campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly)
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - test for the homogeneity of variance
testLevene <- levene.test(testData$amountYearly, group = testData$expFactor)
if (testLevene$p.value > .05) {
# - independent t-test
ttest <- t.test(testData$amountYearly ~ testData$expFactor,
var.equal = T)
# - effect size
cohend <- cohen.d(d = testData$amountYearly, f = testData$expFactor,
pooled = T)
# - TOST
tost <- TOSTtwo(m1 = mean(testData$amountYearly[testData$expFactor == 'new']),
m2 = mean(testData$amountYearly[testData$expFactor == 'old']),
sd1 = sd(testData$amountYearly[testData$expFactor == 'new']),
sd2 = sd(testData$amountYearly[testData$expFactor == 'old']),
n1 = sum(testData$expFactor == 'new'),
n2 = sum(testData$expFactor == 'old'),
low_eqbound_d = -.3,
high_eqbound_d = .3,
alpha = .05,
var.equal = T,
plot = F)
# - store results
methodAResults$campaign[i] <- campaigns[i]
methodAResults$n_old[i] <- sum(testData$expFactor == 'old')
methodAResults$n_new[i] <- sum(testData$expFactor == 'new')
methodAResults$eq_variance[i] <- T
methodAResults$ttest[i] <- ttest$statistic
methodAResults$ttest_p[i] <- ttest$p.value
methodAResults$ttest_df[i] <- ttest$parameter
methodAResults$ttest_method[i] <- 'Independent'
methodAResults$eff_size[i] <- cohend$estimate
methodAResults$eff_mag[i] <- as.character(cohend$magnitude)
methodAResults$test_eq[i] <- (tost$TOST_p1 < .05 & tost$TOST_p2 < .05)
} else {
# - perform correct t-test for unequal variances
ttest <- t.test(testData$amountYearly ~ testData$expFactor,
var.equal = F)
# - effect size
glassdelta <- (mean(testData$amountYearly[testData$expFactor == 'new']) -
mean(testData$amountYearly[testData$expFactor == 'old'])) /
sd(testData$amountYearly[testData$expFactor == 'old'])
# - TOST
tost <- TOSTtwo(m1 = mean(testData$amountYearly[testData$expFactor == 'new']),
m2 = mean(testData$amountYearly[testData$expFactor == 'old']),
sd1 = sd(testData$amountYearly[testData$expFactor == 'new']),
sd2 = sd(testData$amountYearly[testData$expFactor == 'old']),
n1 = length(testData$amountYearly[testData$expFactor == 'new']),
n2 = length(testData$amountYearly[testData$expFactor == 'old']),
low_eqbound_d = -.3,
high_eqbound_d = .3,
alpha = .05,
var.equal = F,
plot = F)
# - store results
methodAResults$campaign[i] <- campaigns[i]
methodAResults$n_old[i] <- sum(testData$expFactor == 'old')
methodAResults$n_new[i] <- sum(testData$expFactor == 'new')
methodAResults$eq_variance[i] <- F
methodAResults$ttest[i] <- ttest$statistic
methodAResults$ttest_p[i] <- ttest$p.value
methodAResults$ttest_df[i] <- ttest$parameter
methodAResults$ttest_method[i] <- 'Welch'
methodAResults$eff_size[i] <- glassdelta
methodAResults$eff_mag[i] <- '-'
methodAResults$test_eq[i] <- (tost$TOST_p1 < .05 & tost$TOST_p2 < .05)
}
}
knitr::kable(methodAResults, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | eq_variance | ttest | ttest_p | ttest_df | ttest_method | eff_size | eff_mag | test_eq |
---|---|---|---|---|---|---|---|---|---|---|
mob05-ba-171218 | 2372 | 2431 | TRUE | 0.8583697 | 0.3907312 | 4801 | Independent | 0.0247731 | negligible | TRUE |
35-ba-171218 | 2732 | 2482 | TRUE | -0.5540745 | 0.5795516 | 5212 | Independent | 0.0154845 | negligible | TRUE |
pad04-ba-171218 | 2014 | 1940 | TRUE | 0.3133339 | 0.7540436 | 3952 | Independent | -0.0098875 | negligible | TRUE |
en03-ba-171218 | 1621 | 1545 | TRUE | 0.6601157 | 0.5092277 | 3164 | Independent | 0.0234704 | negligible | TRUE |
wpde-04-171218 | 2751 | 2767 | TRUE | -0.1485633 | 0.8819036 | 5516 | Independent | 0.0039980 | negligible | TRUE |
38-ba-171223 | 3895 | 3960 | TRUE | 0.5589331 | 0.5762233 | 7853 | Independent | 0.0126134 | negligible | TRUE |
Force Welch t-tests, assuming that is the recommended procedures when group sample sizes are not equal; this implies a correction for unequal variances.
Results. The columns have the same meaning as in the previous table, except for that the measure of effects size is always Glass’ delta.
campaigns <- unique(donationsData$campaign)
methodBResults <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
eq_variance = logical(length(campaigns)),
ttest = numeric(length(campaigns)),
ttest_p = numeric(length(campaigns)),
ttest_df = numeric(length(campaigns)),
ttest_method = character(length(campaigns)),
eff_size = numeric(length(campaigns)),
eff_mag = character(length(campaigns)),
test_eq = logical(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- donationsData %>%
filter(campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly)
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - perform correct t-test for unequal variances
ttest <- t.test(testData$amountYearly ~ testData$expFactor,
var.equal = F)
# - effect size
glassdelta <- (mean(testData$amountYearly[testData$expFactor == 'new']) -
mean(testData$amountYearly[testData$expFactor == 'old'])) /
sd(testData$amountYearly[testData$expFactor == 'old'])
# - TOST
tost <- TOSTtwo(m1 = mean(testData$amountYearly[testData$expFactor == 'new']),
m2 = mean(testData$amountYearly[testData$expFactor == 'old']),
sd1 = sd(testData$amountYearly[testData$expFactor == 'new']),
sd2 = sd(testData$amountYearly[testData$expFactor == 'old']),
n1 = length(testData$amountYearly[testData$expFactor == 'new']),
n2 = length(testData$amountYearly[testData$expFactor == 'old']),
low_eqbound_d = -.3,
high_eqbound_d = .3,
alpha = .05,
var.equal = F,
plot = F)
# - store results
methodBResults$campaign[i] <- campaigns[i]
methodBResults$n_old[i] <- sum(testData$expFactor == 'old')
methodBResults$n_new[i] <- sum(testData$expFactor == 'new')
methodBResults$eq_variance[i] <- F
methodBResults$ttest[i] <- ttest$statistic
methodBResults$ttest_p[i] <- ttest$p.value
methodBResults$ttest_df[i] <- ttest$parameter
methodBResults$ttest_method[i] <- 'Welch'
methodBResults$eff_size[i] <- glassdelta
methodBResults$eff_mag[i] <- '-'
methodBResults$test_eq[i] <- (tost$TOST_p1 < .05 & tost$TOST_p2 < .05)
}
knitr::kable(methodBResults, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | eq_variance | ttest | ttest_p | ttest_df | ttest_method | eff_size | eff_mag | test_eq |
---|---|---|---|---|---|---|---|---|---|---|
mob05-ba-171218 | 2372 | 2431 | FALSE | 0.8591243 | 0.3903149 | 4790.416 | Welch | 0.0257209 | - | TRUE |
35-ba-171218 | 2732 | 2482 | FALSE | -0.5584103 | 0.5765883 | 5188.292 | Welch | -0.0143053 | - | TRUE |
pad04-ba-171218 | 2014 | 1940 | FALSE | 0.3108147 | 0.7559614 | 3249.093 | Welch | 0.0131630 | - | TRUE |
en03-ba-171218 | 1621 | 1545 | FALSE | 0.6572774 | 0.5110529 | 3009.390 | Welch | 0.0258540 | - | TRUE |
wpde-04-171218 | 2751 | 2767 | FALSE | -0.1484904 | 0.8819613 | 5352.054 | Welch | -0.0036981 | - | TRUE |
38-ba-171223 | 3895 | 3960 | FALSE | 0.5595943 | 0.5757724 | 7729.371 | Welch | 0.0136321 | - | TRUE |
Fit a power law to each data set to learn about the nature of the sampling distribution; if the power law behavior cannot be excluded we conclude that the sampling distribution is not normal and choose to proceed with non-parametric tests; if the power law can be excluded, we choose to remove outliers and repeat the t-tests.
Test the power law behavior of the yearly amount distributions per campaign and per experimental group. CITATION. Colin S. Gillespie (2015). Fitting Heavy Tailed Distributions: The poweRlaw Package. Journal of Statistical Software, 64(2), 1-16. URL http://www.jstatsoft.org/v64/i02/.
NOTE. Do not run this; it is parallelized and takes a while of time to complete.
campaigns <- unique(donationsData$campaign)
# - test power laws
plList <- vector(mode = "list", length = length(campaigns))
for (i in 1:length(campaigns)) {
testSetOld <- as.integer(donationsData$amountYearly[which(donationsData$campaign == campaigns[i] & donationsData$expFactor == 'old')])
testSetNew <- as.integer(donationsData$amountYearly[which(donationsData$campaign == campaigns[i] & donationsData$expFactor == 'new')])
mmTestSetOld <- displ$new(testSetOld)
mmTestSetNew <- displ$new(testSetNew)
plList[[i]]$old = bootstrap_p(mmTestSetOld, no_of_sims = 1000, threads = 8)
plList[[i]]$new = bootstrap_p(mmTestSetNew, no_of_sims = 1000, threads = 8)
}
oldPValues <- sapply(plList, function(x) {x$old$p})
newPValues <- sapply(plList, function(x) {x$new$p})
Results. With 3 out of 12 data sets exhibiting power law behavior w. x_min
set to the lowest value in the data set, we choose to remove the outliers and repeat the t-tests. NOTE. Re-running this with a previous estimation of x_min
wouldn’t hurt.
Run independent t-tests or Welch, after testing for homogenity of variances w. Levene’s test, w. correction for unequal variances if necessary, and following the removal of outliers.
NOTE. Only extreme outliers on the upper tail of the distribution (> 3 * IQR
, IQR
== interquartile range) were removed. In effect, extremely high yearly donation amounts were removed.
campaigns <- unique(donationsData$campaign)
methodC1Results <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
eq_variance = logical(length(campaigns)),
ttest = numeric(length(campaigns)),
ttest_p = numeric(length(campaigns)),
ttest_df = numeric(length(campaigns)),
ttest_method = character(length(campaigns)),
eff_size = numeric(length(campaigns)),
eff_mag = character(length(campaigns)),
test_eq = logical(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- donationsData %>%
filter(campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly)
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - remove outliers: one-sided, > 3*IQR per group
testData$outlier <- logical(dim(testData)[1])
wNewOut <- which(testData$amountYearly[testData$expFactor == 'new'] >
3 * IQR(testData$amountYearly[testData$expFactor == 'new']))
testData$outlier[testData$expFactor == 'new'][wNewOut] <- T
wOldOut <- which(testData$amountYearly[testData$expFactor == 'old'] >
3 * IQR(testData$amountYearly[testData$expFactor == 'old']))
testData$outlier[testData$expFactor == 'old'][wOldOut] <- T
testData <- filter(testData, outlier == F)
# - test for the homogeneity of variance
testLevene <- levene.test(testData$amountYearly, group = testData$expFactor)
if (testLevene$p.value > .05) {
# - independent t-test
ttest <- t.test(testData$amountYearly ~ testData$expFactor,
var.equal = T)
# - effect size
cohend <- cohen.d(d = testData$amountYearly, f = testData$expFactor,
pooled = T)
# - TOST
tost <- TOSTtwo(m1 = mean(testData$amountYearly[testData$expFactor == 'new']),
m2 = mean(testData$amountYearly[testData$expFactor == 'old']),
sd1 = sd(testData$amountYearly[testData$expFactor == 'new']),
sd2 = sd(testData$amountYearly[testData$expFactor == 'old']),
n1 = sum(testData$expFactor == 'new'),
n2 = sum(testData$expFactor == 'old'),
low_eqbound_d = -.3,
high_eqbound_d = .3,
alpha = .05,
var.equal = T,
plot = F)
# - store results
methodC1Results$campaign[i] <- campaigns[i]
methodC1Results$n_old[i] <- sum(testData$expFactor == 'old')
methodC1Results$n_new[i] <- sum(testData$expFactor == 'new')
methodC1Results$eq_variance[i] <- T
methodC1Results$ttest[i] <- ttest$statistic
methodC1Results$ttest_p[i] <- ttest$p.value
methodC1Results$ttest_df[i] <- ttest$parameter
methodC1Results$ttest_method[i] <- 'Independent'
methodC1Results$eff_size[i] <- cohend$estimate
methodC1Results$eff_mag[i] <- as.character(cohend$magnitude)
methodC1Results$test_eq[i] <- (tost$TOST_p1 < .05 & tost$TOST_p2 < .05)
} else {
# - perform correct t-test for unequal variances
ttest <- t.test(testData$amountYearly ~ testData$expFactor,
var.equal = F)
# - effect size
glassdelta <- (mean(testData$amountYearly[testData$expFactor == 'new']) -
mean(testData$amountYearly[testData$expFactor == 'old'])) /
sd(testData$amountYearly[testData$expFactor == 'old'])
# - TOST
tost <- TOSTtwo(m1 = mean(testData$amountYearly[testData$expFactor == 'new']),
m2 = mean(testData$amountYearly[testData$expFactor == 'old']),
sd1 = sd(testData$amountYearly[testData$expFactor == 'new']),
sd2 = sd(testData$amountYearly[testData$expFactor == 'old']),
n1 = length(testData$amountYearly[testData$expFactor == 'new']),
n2 = length(testData$amountYearly[testData$expFactor == 'old']),
low_eqbound_d = -.3,
high_eqbound_d = .3,
alpha = .05,
var.equal = F,
plot = F)
# - store results
methodC1Results$campaign[i] <- campaigns[i]
methodC1Results$n_old[i] <- sum(testData$expFactor == 'old')
methodC1Results$n_new[i] <- sum(testData$expFactor == 'new')
methodC1Results$eq_variance[i] <- F
methodC1Results$ttest[i] <- ttest$statistic
methodC1Results$ttest_p[i] <- ttest$p.value
methodC1Results$ttest_df[i] <- ttest$parameter
methodC1Results$ttest_method[i] <- 'Welch'
methodC1Results$eff_size[i] <- glassdelta
methodC1Results$eff_mag[i] <- '-'
methodC1Results$test_eq[i] <- (tost$TOST_p1 < .05 & tost$TOST_p2 < .05)
}
}
knitr::kable(methodC1Results, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | eq_variance | ttest | ttest_p | ttest_df | ttest_method | eff_size | eff_mag | test_eq |
---|---|---|---|---|---|---|---|---|---|---|
mob05-ba-171218 | 2225 | 2258 | TRUE | -0.0984794 | 0.9215560 | 4481.000 | Independent | -0.0029417 | negligible | TRUE |
35-ba-171218 | 2648 | 2404 | TRUE | -0.5889559 | 0.5559172 | 5050.000 | Independent | 0.0165837 | negligible | TRUE |
pad04-ba-171218 | 1944 | 1873 | TRUE | -1.0888217 | 0.2763013 | 3815.000 | Independent | 0.0352502 | negligible | TRUE |
en03-ba-171218 | 1570 | 1493 | TRUE | -0.0928635 | 0.9260181 | 3061.000 | Independent | -0.0033569 | negligible | TRUE |
wpde-04-171218 | 2151 | 2160 | FALSE | 0.0460051 | 0.9633083 | 4306.996 | Welch | 0.0013893 | - | TRUE |
38-ba-171223 | 3781 | 3830 | TRUE | -0.2823690 | 0.7776683 | 7609.000 | Independent | -0.0064734 | negligible | TRUE |
Force Welch t-test, after testing for homogenity of variances w. Levene’s test, w. correction for unequal variances if necessary, and following the removal of outliers.
NOTE. Only extreme outliers on the upper tail of the distribution (> 3 * IQR
, IQR
== interquartile range) were removed. In effect, extremely high yearly donation amounts were removed.
NOTE. The effect size (eff_size
) is always Glass’ delta.
campaigns <- unique(donationsData$campaign)
methodC2Results <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
eq_variance = logical(length(campaigns)),
ttest = numeric(length(campaigns)),
ttest_p = numeric(length(campaigns)),
ttest_df = numeric(length(campaigns)),
ttest_method = character(length(campaigns)),
eff_size = numeric(length(campaigns)),
eff_mag = character(length(campaigns)),
test_eq = logical(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- donationsData %>%
filter(campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly)
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - remove outliers: one-sided, > 3*IQR per group
testData$outlier <- logical(dim(testData)[1])
wNewOut <- which(testData$amountYearly[testData$expFactor == 'new'] >
3 * IQR(testData$amountYearly[testData$expFactor == 'new']))
testData$outlier[testData$expFactor == 'new'][wNewOut] <- T
wOldOut <- which(testData$amountYearly[testData$expFactor == 'old'] >
3 * IQR(testData$amountYearly[testData$expFactor == 'old']))
testData$outlier[testData$expFactor == 'old'][wOldOut] <- T
testData <- filter(testData, outlier == F)
# - perform correct t-test for unequal variances
ttest <- t.test(testData$amountYearly ~ testData$expFactor,
var.equal = F)
# - effect size
glassdelta <- (mean(testData$amountYearly[testData$expFactor == 'new']) -
mean(testData$amountYearly[testData$expFactor == 'old'])) /
sd(testData$amountYearly[testData$expFactor == 'old'])
# - TOST
tost <- TOSTtwo(m1 = mean(testData$amountYearly[testData$expFactor == 'new']),
m2 = mean(testData$amountYearly[testData$expFactor == 'old']),
sd1 = sd(testData$amountYearly[testData$expFactor == 'new']),
sd2 = sd(testData$amountYearly[testData$expFactor == 'old']),
n1 = length(testData$amountYearly[testData$expFactor == 'new']),
n2 = length(testData$amountYearly[testData$expFactor == 'old']),
low_eqbound_d = -.3,
high_eqbound_d = .3,
alpha = .05,
var.equal = F,
plot = F)
# - store results
methodC2Results$campaign[i] <- campaigns[i]
methodC2Results$n_old[i] <- sum(testData$expFactor == 'old')
methodC2Results$n_new[i] <- sum(testData$expFactor == 'new')
methodC2Results$eq_variance[i] <- F
methodC2Results$ttest[i] <- ttest$statistic
methodC2Results$ttest_p[i] <- ttest$p.value
methodC2Results$ttest_df[i] <- ttest$parameter
methodC2Results$ttest_method[i] <- 'Welch'
methodC2Results$eff_size[i] <- glassdelta
methodC2Results$eff_mag[i] <- '-'
methodC2Results$test_eq[i] <- (tost$TOST_p1 < .05 & tost$TOST_p2 < .05)
}
knitr::kable(methodC2Results, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | eq_variance | ttest | ttest_p | ttest_df | ttest_method | eff_size | eff_mag | test_eq |
---|---|---|---|---|---|---|---|---|---|---|
mob05-ba-171218 | 2225 | 2258 | FALSE | -0.0984699 | 0.9215636 | 4477.542 | Welch | -0.0029226 | - | TRUE |
35-ba-171218 | 2648 | 2404 | FALSE | -0.5886759 | 0.5561053 | 4993.308 | Welch | -0.0166699 | - | TRUE |
pad04-ba-171218 | 1944 | 1873 | FALSE | -1.0887222 | 0.2763453 | 3808.242 | Welch | -0.0353386 | - | TRUE |
en03-ba-171218 | 1570 | 1493 | FALSE | -0.0927810 | 0.9260837 | 3038.731 | Welch | -0.0034163 | - | TRUE |
wpde-04-171218 | 2151 | 2160 | FALSE | 0.0460051 | 0.9633083 | 4306.996 | Welch | 0.0013893 | - | TRUE |
38-ba-171223 | 3781 | 3830 | FALSE | -0.2823820 | 0.7776583 | 7608.753 | Welch | -0.0064970 | - | TRUE |
Conduct non-parametric, independent 2-group Mann-Whitney U Test; no removal of outliers.
Results. campaign
stands for the campaign code, n_old
and n_new
are the number of observations for the “new” and the “old” page respectively, W
is the Wilcoxon test statistics, and MWUTest_p
the associated probability of the Type I Error.
campaigns <- unique(donationsData$campaign)
methodD1Results <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
W = numeric(length(campaigns)),
MWUTest_p = numeric(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- donationsData %>%
filter(campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly)
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - non-parametric test
mwu <- wilcox.test(testData$amountYearly ~ testData$expFactor)
# - store results
methodD1Results$campaign[i] <- campaigns[i]
methodD1Results$n_old[i] <- sum(testData$expFactor == 'old')
methodD1Results$n_new[i] <- sum(testData$expFactor == 'new')
methodD1Results$W[i] <- mwu$statistic
methodD1Results$MWUTest_p[i] <- mwu$p.value
}
knitr::kable(methodD1Results, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | W | MWUTest_p |
---|---|---|---|---|
mob05-ba-171218 | 2372 | 2431 | 2913410 | 0.4998617 |
35-ba-171218 | 2732 | 2482 | 3340974 | 0.3442295 |
pad04-ba-171218 | 2014 | 1940 | 1904823 | 0.1598756 |
en03-ba-171218 | 1621 | 1545 | 1236390 | 0.5227544 |
wpde-04-171218 | 2751 | 2767 | 3811216 | 0.9287713 |
38-ba-171223 | 3895 | 3960 | 7687976 | 0.8034789 |
Conduct non-parametric, independent 2-group Mann-Whitney U Test, following the removal of outliers.
NOTE. Only extreme outliers on the upper tail of the distribution (> 3 * IQR
, IQR
== interquartile range) were removed. In effect, extremely high yearly donation amounts were removed.
campaigns <- unique(donationsData$campaign)
methodD2Results <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
W = numeric(length(campaigns)),
MWUTest_p = numeric(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- donationsData %>%
filter(campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly)
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - remove outliers: one-sided, > 3*IQR per group
testData$outlier <- logical(dim(testData)[1])
wNewOut <- which(testData$amountYearly[testData$expFactor == 'new'] >
3 * IQR(testData$amountYearly[testData$expFactor == 'new']))
testData$outlier[testData$expFactor == 'new'][wNewOut] <- T
wOldOut <- which(testData$amountYearly[testData$expFactor == 'old'] >
3 * IQR(testData$amountYearly[testData$expFactor == 'old']))
testData$outlier[testData$expFactor == 'old'][wOldOut] <- T
testData <- filter(testData, outlier == F)
# - non-parametric test
mwu <- wilcox.test(testData$amountYearly ~ testData$expFactor)
# - store results
methodD2Results$campaign[i] <- campaigns[i]
methodD2Results$n_old[i] <- sum(testData$expFactor == 'old')
methodD2Results$n_new[i] <- sum(testData$expFactor == 'new')
methodD2Results$W[i] <- mwu$statistic
methodD2Results$MWUTest_p[i] <- mwu$p.value
}
knitr::kable(methodD2Results, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | W | MWUTest_p |
---|---|---|---|---|
mob05-ba-171218 | 2225 | 2258 | 2516217 | 0.9159759 |
35-ba-171218 | 2648 | 2404 | 3131223 | 0.2980615 |
pad04-ba-171218 | 1944 | 1873 | 1772213 | 0.1402104 |
en03-ba-171218 | 1570 | 1493 | 1153248 | 0.4243507 |
wpde-04-171218 | 2151 | 2160 | 2326203 | 0.9372047 |
38-ba-171223 | 3781 | 3830 | 7188939 | 0.5748126 |
Binary logistic regression was a method of our choice, with the experimental A/B factor (“new”/“old”) as a predictor of opt_in
. The “old” level of the experimental factor was used as a baseline. Conclusion. Except for the mob05-ba-171218
campaign where the experimental factor did not have a statistically significant effect upon opt_in
, the new page seems to always induce a reduction in the probability to opt_in.
Results. campaign
stands for the campaign code, n_old
and n_new
the number of observations for the “new” and the “old” page, respectively, coeff
is the regression coefficient for the A/B experimental factor (NOTE: using old
as a baseline), exp_coeff
is the exponential of the regression coefficient - the delta odds, should be interpreted as the increase in the odds of opting in vs. not opting in with a unit increase in the experimental factors which in our case means switching from the old
to the new
page, coeff_z
is the value of the Wald’s test (similar to t-test in linear regression, tests if the regression coefficient is significantly different from zero), coeff_p
the respective probability of the Type I Error, sig
whether the Wald’s test have reached the conventional alpha < .05
significance criterion.
campaigns <- unique(donationsData$campaign)
opt_in_BLR_Results <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
coeff = numeric(length(campaigns)),
exp_coeff = numeric(length(campaigns)),
coeff_z = numeric(length(campaigns)),
coeff_p = numeric(length(campaigns)),
sig = logical(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- donationsData %>%
filter(campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly, opt_in)
testData$expFactor <- factor(testData$expFactor, levels = c('old', 'new'))
# - Binary logistic regression
tmodel <- glm(testData$opt_in ~ testData$expFactor,
family = "binomial")
# - store results
opt_in_BLR_Results$campaign[i] <- campaigns[i]
opt_in_BLR_Results$n_old[i] <- sum(testData$expFactor == 'old')
opt_in_BLR_Results$n_new[i] <- sum(testData$expFactor == 'new')
opt_in_BLR_Results$coeff[i] <- summary(tmodel)$coefficients[2, 1]
opt_in_BLR_Results$exp_coeff[i] <- exp(summary(tmodel)$coefficients[2, 1])
opt_in_BLR_Results$coeff_z[i] <- summary(tmodel)$coefficients[2, 3]
opt_in_BLR_Results$coeff_p[i] <- summary(tmodel)$coefficients[2, 4]
opt_in_BLR_Results$sig[i] <- summary(tmodel)$coefficients[2, 4] < .05
}
knitr::kable(opt_in_BLR_Results, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | coeff | exp_coeff | coeff_z | coeff_p | sig |
---|---|---|---|---|---|---|---|
mob05-ba-171218 | 2372 | 2431 | -0.0589191 | 0.9427831 | -0.9386565 | 0.3479071 | FALSE |
35-ba-171218 | 2732 | 2482 | -0.6335634 | 0.5306973 | -9.4906179 | 0.0000000 | TRUE |
pad04-ba-171218 | 2014 | 1940 | -0.8305340 | 0.4358165 | -11.0079858 | 0.0000000 | TRUE |
en03-ba-171218 | 1621 | 1545 | -0.5292162 | 0.5890665 | -6.3989787 | 0.0000000 | TRUE |
wpde-04-171218 | 2751 | 2767 | -0.4554023 | 0.6341928 | -7.4850421 | 0.0000000 | TRUE |
38-ba-171223 | 3895 | 3960 | -0.6537384 | 0.5200978 | -12.2507708 | 0.0000000 | TRUE |
membershipData <- filter(membershipData, grepl("-ctrl$|var$", membershipData$banner_keyword))
membershipData <- filter(membershipData, status == 1)
recodingScheme <- read.csv('data/recodingScheme.csv',
header = T,
stringsAsFactors = F,
check.names = F)
oldCodes <- unique(recodingScheme$`keyword of old lp`)
newCodes <- unique(recodingScheme$`keyword of new lp`)
membershipData$expFactor <- sapply(membershipData$banner_keyword, function(x) {
if (x %in% oldCodes) {
return("old")
} else if (x %in% newCodes) {
return("new")
} else {
return("Unnasigned")
}
})
# - filter out 'Unnasigned' codes:
membershipData <- filter(membershipData, !(expFactor == 'Unnasigned'))
membershipData$amountYearly <- numeric(dim(membershipData)[1])
membershipData$amountYearly[membershipData$membership_fee_interval == 0 | membershipData$membership_fee_interval == 12] <-
membershipData$membership_fee[membershipData$membership_fee_interval == 0 | membershipData$membership_fee_interval == 12]
membershipData$amountYearly[membershipData$membership_fee_interval == 1] <-
membershipData$membership_fee[membershipData$membership_fee_interval == 1] * 12
membershipData$amountYearly[membershipData$membership_fee_interval == 3] <-
membershipData$membership_fee[membershipData$membership_fee_interval == 3] * 4
membershipData$amountYearly[membershipData$membership_fee_interval == 6] <-
membershipData$membership_fee[membershipData$membership_fee_interval == 6] * 2
membershipData <- select(membershipData,
banner_campaign, expFactor, amountYearly, donation_receipt)
The analysis of the Membership data set relies on non-parametric tests only (Mann-Whitney U test), because we get to have really small sample sizes per campaign following the data clean-up. Conclusion. No effect of the new page in any of the campaigns.
Results. campaign
stands for the campaign code, n_old
and n_new
are the number of observations for the “new” and the “old” page respectively, W
is the Wilcoxon test statistics, and MWUTest_p
the associated probability of the Type I Error.
campaigns <- unique(membershipData$banner_campaign)
methodD1Results_memberhsip <- data.frame(
campaign = character(length(campaigns)),
n_old = numeric(length(campaigns)),
n_new = numeric(length(campaigns)),
W = numeric(length(campaigns)),
MWUTest_p = numeric(length(campaigns)),
stringsAsFactors = F
)
for (i in 1:length(campaigns)) {
# - select testData
testData <- membershipData %>%
filter(banner_campaign %in% campaigns[i]) %>%
select(expFactor, amountYearly)
testData$expFactor <- factor(testData$expFactor, levels = c('new', 'old'))
# - non-parametric test
mwu <- wilcox.test(testData$amountYearly ~ testData$expFactor)
# - store results
methodD1Results_memberhsip$campaign[i] <- campaigns[i]
methodD1Results_memberhsip$n_old[i] <- sum(testData$expFactor == 'old')
methodD1Results_memberhsip$n_new[i] <- sum(testData$expFactor == 'new')
methodD1Results_memberhsip$W[i] <- mwu$statistic
methodD1Results_memberhsip$MWUTest_p[i] <- mwu$p.value
}
knitr::kable(methodD1Results_memberhsip, format = "html") %>%
kable_styling(full_width = F, position = "left")
campaign | n_old | n_new | W | MWUTest_p |
---|---|---|---|---|
35-ba-171218 | 15 | 7 | 58.5 | 0.6714894 |
wpde-04-171218 | 35 | 25 | 365.0 | 0.2720977 |
mob05-ba-171218 | 17 | 6 | 40.0 | 0.4116710 |
pad04-ba-171218 | 10 | 5 | 23.0 | 0.8136099 |
en03-ba-171218 | 11 | 8 | 42.0 | 0.8950513 |
38-ba-171223 | 28 | 23 | 315.0 | 0.8952855 |