Feedback should be send to goran.milovanovic_ext@wikimedia.de.

The campaign was run from 2021/03/08 to 2021/03/21.

CURRENT UPDATE: Dataset as of 2021/03/21; Report updated w. user edits data on 2021/03/29.

Data Acquisiton

NOTE: the Data Acquisition code chunk is not fully reproducible from this Report. The data are collected by running the script 2021_Actionweek_PRODUCTION.R on stat1007.eqiad.wmnet Analytics Client, collecting the data as .tsv and .csv files, copying manually, and processing locally. A daily crontab job was run from 2021/03/08 to 2021/03/21 to collect the data for daily reporting. User edits are then collected until 2021/05/02. The data used in this report are aggregates of the daily datasets, sanitized and anonymized.

DATA: (1) The event.wmdebannerinteractions schema is used for banner clicks/closing (see: Phab); (2) the wmf.webrequest table is a source of banner impressions data; (3) pageviews, user registrations, and user edits are collected by following the standard procedure.

Daily Update

### ----------------------------------------------------------------
### --- WMDE 2021_Actionweek
### --- https://phabricator.wikimedia.org/T274563
### ----------------------------------------------------------------

### ----------------------------------------------------------------
### --- Campaign Description and Parameters
### ----------------------------------------------------------------

### --- Tracking Part 1: banner campaign
### --- Please provide daily reports during the banner campaign (08th of March to 21th of March).

# - Tracking Part 2: Landingpage links to Wikipedia
# - Please provide a tracking report for the Links from the Landingpage of the campaign 
# - until 8 weeks after the end of the campaign.

### --- Timeline for the whole project
# - Start of the banner campaign: 08. March 2020
# - End of the banner campaign: 21. March 2020
# - Tracking test: first week of March
# - Last User signing up: 21.March 2020
# - Preliminary report for tracking part 1: sometime till end of March
# - Track editing behavior eight weeks after end of campaign: 02. May 2020
# - final report for tracking part 1 and 2: sometime till end of May

### --- Tracking Part 1: banner campaign
# - Campaign Tag used for Links
# -------- ?campaign=WMDE_aw21
# -------- this is only for pageviews, this tag is picked-up at our WMDE landing page
# - the Banners can carry one of the two following tags:
# -------- ?banner=WMDE_aw2021_var
# -------- ?banner=WMDE_aw2021_ctrl

### --- Timeline for the whole project
# - Start of the banner campaign: 1. October 2020
# - End of the banner campaign: 11. October 2020
# - Tracking test: End of September
# - Last User signing up: 11. October
# - Preliminary report for tracking part 1: sometime until end of October
# - Last User finishing mailing campaign: November 11th
# - Track editing behavior six weeks after end of campaign: December 23
# - final report for tracking part 1 and 2: beginning of January

### --- Daily Tracking
# - daily reporting during campaign
# - The following information should be included in the daily reporting:
# --- impressions per banner
# --- clicks per banner/ page views per landing page
# --- closing rate of banners
# --- If possible with the distinction whether the person was logged in or not.

### --- libraries
library(tidyverse)
library(data.table)
library(lubridate)

### --- dir structure
campaignPath <- '/home/goransm/Analytics/NewEditors/Campaigns/2021_Actionweek/'
dataDir <- paste0(campaignPath, "_data/")
analyticsDir <- paste0(campaignPath, "_analytics/")
### --- campaign specifics
campaignName <- 'Actionweek_2021'

### --- determine cetDay
cetDay <- Sys.time()
cetDay
attr(cetDay, "tzone") <- "Europe/Berlin"
# - one day behind for crontab
# - (i.e. waiting for wmf.webrequest to complete is data acquisition)
cetDay <- ymd(
  strsplit(as.character(cetDay), 
           split = " ", 
           fixed = T)[[1]][1]
) - 1

### ----------------------------------------------------------
### --- Banner Interactions:
### --- via event.WMDEBannerInteractions
### --- https://meta.wikimedia.org/wiki/Schema:WMDEBannerInteractions
### ----------------------------------------------------------

# - select dt, event.bannerName, event.bannerAction, event.bannerImpressions, event.userID 
# - from event.wmdebannerinteractions where year=2020 and month=5 and (day=11 or day=12 or day=13);

# - function: wmde_banner_actions
wmde_banner_actions <- function(uri_query_filter,
                                cetDay,
                                queryFile,
                                fileName,
                                analyticsDir,
                                campaignName) {
  
  # - NOTE:
  # - expected format for cetDay is: YYYY-MM-DD
  
  # - WHERE condition: create datetime_condition
  cet_condition <- seq(
    from = as.POSIXct(paste0(cetDay," 0:00"), tz = "Europe/Berlin"),
    to = as.POSIXct(paste0(cetDay," 23:00"), tz = "Europe/Berlin"),
    by = "hour"
  ) 
  attr(cet_condition, "tzone") <- "UTC"
  cet_condition <- as.character(cet_condition)
  cet_condition <- unlist(str_extract_all(cet_condition, "^([[:digit:]]|\\s|-)*"))
  cet_years <- sapply(
    strsplit(cet_condition, split = " ", fixed = T), function(x) {
      strsplit(x, split = "-")[[1]][1]
    })
  cet_months <- sapply(
    strsplit(cet_condition, split = " ", fixed = T), function(x) {
      strsplit(x, split = "-")[[1]][2]
    })
  cet_months <- gsub("^0", "", cet_months)
  cet_days <- sapply(
    strsplit(cet_condition, split = " ", fixed = T), function(x) {
      strsplit(x, split = "-")[[1]][3]
    })
  cet_days <- gsub("^0", "", cet_days)
  cet_hours <- sapply(strsplit(cet_condition, split = " ", fixed = T), 
                      function(x) {
                        x[2]
                      })
  cet_hours <- gsub("^0", "", cet_hours)
  datetimeCondition <- paste0(
    "year = ", cet_years, " AND ",
    "month = ", cet_months, " AND ",
    "day = ", cet_days, " AND ", 
    "hour = ", cet_hours
  )
  datetimeCondition <- paste("(", 
                             datetimeCondition, 
                             ")",
                             collapse = " OR ", 
                             sep = "")
  
  
  # - WHERE condition: create eventBannerName_condition
  if (length(uri_query_filter) > 1) {
    eventBannerName_condition <- paste0("(",
                                        paste(
                                          paste0("event.bannerName LIKE '%", uri_query_filter, "%'"),
                                          collapse = " OR ", sep = " "),
                                        ")"
    )
  } else {
    eventBannerName_condition = paste0("event.bannerName LIKE '%", uri_query_filter, "%'")
  }
  
  # - compose HiveQL query
  hiveQuery <- paste0( 
    "select dt, event.bannerName, event.bannerAction, event.bannerImpressions, event.userID from event.wmdebannerinteractions 
    WHERE (",
    eventBannerName_condition, " AND ",
    "(", datetimeCondition, ")",
    ");"
  )
  
  # - write hql
  write(hiveQuery, paste0(dataDir, queryFile))
  # - execute hql script:
  kerberosPrefix <- 
    'sudo -u analytics-privatedata kerberos-run-command analytics-privatedata '
  # - Kerberos init
  system(command = paste0(kerberosPrefix, ' hdfs dfs -ls'), 
         wait = T)
  # - Run query
  query <- system(command = paste(kerberosPrefix, 
                                  '/usr/local/bin/beeline --incremental=true --silent -f "',
                                  paste0(dataDir, queryFile),
                                  '" > ', dataDir, fileName,
                                  sep = ""),
                  wait = TRUE)
  
  # - Wrangle Banner Interactions
  # - load
  bannerData <- tryCatch({
    as.data.frame(fread(paste0(dataDir, fileName)))
  },
  error = function(condition) {
    return(FALSE)
  })
  
  # - process
  if (class(bannerData) == 'logical') {
    return(FALSE) 
  } else { 
    # - bannerSeen
    bannerSeen <- bannerData %>% 
      dplyr::select(bannername, userid)
    # - bannerSeen <- bannerSeen[!duplicated(bannerSeen), ]
    bannerSeen <- bannerSeen %>% 
      dplyr::select(bannername) %>% 
      dplyr::group_by(bannername) %>% 
      dplyr::summarise(seen_by = n())
    # - bannerClosed
    bannerClosed <- bannerData %>% 
      dplyr::filter(banneraction == "banner-closed") %>% 
      dplyr::select(bannername, bannerimpressions) %>% 
      dplyr::group_by(bannername) %>%
      dplyr::summarise(closed_by = n(), 
                       mean_close_imp = round(mean(bannerimpressions), 2))
    # - bannerClicked
    bannerClicked <- bannerData %>% 
      dplyr::filter(banneraction == "banner-clicked") %>% 
      dplyr::select(bannername, bannerimpressions) %>% 
      dplyr::group_by(bannername) %>%
      dplyr::summarise(clicked_by = n(), 
                       mean_click_imp = round(mean(bannerimpressions), 2))
    # - whoClicked
    whoClicked <- bannerData %>% 
      dplyr::filter(banneraction == "banner-clicked")
    whoClicked <- data.frame(userid = unique(whoClicked$userid))
    # - store:
    write.csv(whoClicked, 
              paste0(analyticsDir, 
                     "whoClicked_",
                     strsplit(
                       strsplit(fileName, split = "_", fixed = T)[[1]][2],
                       split = ".", 
                       fixed = T)[[1]][1],
                     ".csv"
              )
    )
    # - join
    bannerData <- bannerSeen %>% 
      dplyr::left_join(bannerClosed, 'bannername') %>% 
      dplyr::left_join(bannerClicked, 'bannername')
    bannerData$close_rate <- round(bannerData$closed_by/bannerData$seen_by, 2)
    bannerData$click_rate <- round(bannerData$clicked_by/bannerData$seen_by, 2)
    # - date, campaign
    bannerData$day <- cetDay
    bannerData$campaign <- campaignName
    
    # - store:
    write.csv(bannerData, 
              paste0(analyticsDir, 
                     "bannerInteractionsAggregated_",
                     strsplit(
                       strsplit(fileName, split = "_", fixed = T)[[1]][2],
                       split = ".", 
                       fixed = T)[[1]][1],
                     ".csv"
              )
    )
  }
  
}

# - set params for wmde_banner_actions()
queryFile <- paste0(campaignName, "_bannerInteractions.hql")
fileName <- paste0("bannerInteractions_", cetDay, ".tsv")
uri_query_filter <- '_wp20_aw_'
banner_status <- wmde_banner_actions(uri_query_filter = uri_query_filter,
                                     cetDay = cetDay,
                                     queryFile = queryFile,
                                     fileName = fileName,
                                     analyticsDir = analyticsDir, 
                                     campaignName = campaignName)

### ----------------------------------------------------------
### --- Pageviews
### ----------------------------------------------------------

# - function: wmde_collect_pageviews
wmde_collect_pageviews <- function(uri_host,
                                   uri_path,
                                   cetDay,
                                   queryFile,
                                   fileName,
                                   dataDir) {
  
  # - NOTE:
  # - expected format for cetDay is: YYYY-MM-DD
  
  # - to dataDir
  setwd(dataDir)
  
  # - libraries
  library(stringr)
  
  # - WHERE condition: create datetime_condition
  cet_condition <- seq(
    from = as.POSIXct(paste0(cetDay," 0:00"), tz = "Europe/Berlin"),
    to = as.POSIXct(paste0(cetDay," 23:00"), tz = "Europe/Berlin"),
    by = "hour"
  ) 
  attr(cet_condition, "tzone") <- "UTC"
  cet_condition <- as.character(cet_condition)
  cet_condition <- unlist(str_extract_all(cet_condition, "^([[:digit:]]|\\s|-)*"))
  cet_years <- sapply(
    strsplit(cet_condition, split = " ", fixed = T), function(x) {
      strsplit(x, split = "-")[[1]][1]
    })
  cet_months <- sapply(
    strsplit(cet_condition, split = " ", fixed = T), function(x) {
      strsplit(x, split = "-")[[1]][2]
    })
  cet_months <- gsub("^0", "", cet_months)
  cet_days <- sapply(
    strsplit(cet_condition, split = " ", fixed = T), function(x) {
      strsplit(x, split = "-")[[1]][3]
    })
  cet_days <- gsub("^0", "", cet_days)
  cet_hours <- sapply(strsplit(cet_condition, split = " ", fixed = T), 
                      function(x) {
                        x[2]
                      })
  cet_hours <- gsub("^0", "", cet_hours)
  datetimeCondition <- paste0(
    "year = ", cet_years, " AND ",
    "month = ", cet_months, " AND ",
    "day = ", cet_days, " AND ", 
    "hour = ", cet_hours
  )
  datetimeCondition <- paste("(", 
                             datetimeCondition, 
                             ")",
                             collapse = " OR ", 
                             sep = "")
  
  # - WHERE condition: create uri_host_condition
  if (length(uri_host) > 1) {
    uri_host_condition <- paste0("(",
                                 paste(
                                   paste0("uri_host = '", uri_host, "'"),
                                   collapse = " OR ", sep = " "),
                                 ")"
    )
  } else {
    uri_host_condition = paste0("uri_host = '", uri_host, "'")
  }
  
  # - WHERE condition: create uri_path_condition
  if (length(uri_path) > 1) {
    uri_path_condition <- paste0("(",
                                 paste(
                                   paste0("uri_path = '", uri_path, "'"),
                                   collapse = " OR ", sep = " "),
                                 ")"
    )
  } else {
    uri_path_condition = paste0("uri_path = '", uri_path, "'")
  }
  
  # - compose HiveQL query
  hiveQuery <- paste0( 
    "USE wmf;
    SELECT uri_host, uri_path, uri_query, referer FROM webrequest
    WHERE (",
    uri_host_condition, " AND ",
    uri_path_condition, " AND ",
    "(", datetimeCondition, ")",
    ");"
  )
  
  # - write hql
  write(hiveQuery, queryFile)
  # - execute hql script:
  kerberosPrefix <- 
    'sudo -u analytics-privatedata kerberos-run-command analytics-privatedata '
  # - Kerberos init
  system(command = paste0(kerberosPrefix, ' hdfs dfs -ls'), 
         wait = T)
  # - Run query
  query <- system(command = paste(kerberosPrefix, 
                                  '/usr/local/bin/beeline --incremental=true --silent -f "',
                                  paste0(dataDir, queryFile),
                                  '" > ', dataDir, fileName,
                                  sep = ""),
                  wait = TRUE)
}

# - set params to wmde_collect_pageviews
uri_host <- c('de.wikipedia.org', 'de.m.wikipedia.org')
uri_path  <- c('/wiki/Wikipedia:Edit-a-thon/Wirtschaftsuniversit%C3%A4t_Wien_zum_Internationalen_Frauentag_2021',
               '/wiki/Wikipedia:GLAM',
               '/wiki/Wikipedia:Wikimedia_Deutschland/LerneWikipedia',
               '/wiki/Wikipedia:WikiProjekt_Frauen/Frauen_in_Rot',
               '/wiki/Wikipedia:Redaktionen',
               '/wiki/Wikipedia:Stuttgart#Workshop_Frauen_in_der_Politik',
               '/wiki/Wikipedia:WomenEdit',
               '/wiki/Wikipedia:FemNetz2021',
               '/wiki/Wikipedia:60_Minuten',
               '/wiki/Wikipedia%3AWho_writes_his_tory%3F',
               '/wiki/Wikipedia:Stuttgart',
               '/wiki/Benutzer:Ykskwadrat',
               '/wiki/Benutzer:Matthias_S%C3%BC%C3%9Fen',
               '/wiki/Benutzer:Cirdan',
               '/wiki/Benutzer:Mellebga',
               '/wiki/Benutzer:DerHexer',
               '/wiki/Benutzer:Nexo20')

queryFile <- 'Actionweek2021_Pageviews.hql'
fileName <- paste0("pageviews_", cetDay, ".tsv")

# - collect Pageviews data
wmde_collect_pageviews(uri_host,
                       uri_path,
                       cetDay,
                       queryFile,
                       fileName,
                       dataDir)

### --- Wrangle Pageviews
# - function: wmde_process_pageviews
wmde_process_pageviews <- function(fileName,
                                   dataDir, 
                                   uri_query_filter,
                                   page_filter,
                                   cetDay = cetDay,
                                   campaignName = campaignName) {
  
  # - to dataDir
  setwd(dataDir)
  
  # - libraries
  library(stringr)
  library(dplyr)
  library(tidyr)
  library(data.table)
  
  # - load
  pageviewsData <- readLines(fileName)
  wStart <- which(grepl("uri_host", pageviewsData))
  pageviewsData <- pageviewsData[(wStart + 1):(length(pageviewsData) - 1)]
  pageviewsData <- data.frame(dat = pageviewsData, 
                              stringsAsFactors = F)
  pageviewsData <- separate(pageviewsData,
                            dat,
                            into = c('uri_host', 'uri_path', 'uri_query', 'referer'),
                            sep = "\t")
  # - apply page_filter
  pageviewsData$page <- paste0(pageviewsData$uri_host, 
                               pageviewsData$uri_path,
                               pageviewsData$uri_query)
  wFilter <- sapply(pageviewsData$page, function(x) {
    sapply(page_filter, function(y) {
      grepl(y, x)
    })
  })
  wFilter <- colSums(wFilter)
  wFilter <- which(wFilter > 0)
  pageviewsData <- pageviewsData[wFilter, ]
  # - apply uri_query_filter
  # - NOTE: looking in both: uri_query, referer 
  w_uri_query <- which(grepl(uri_query_filter, pageviewsData$page))
  
  if (length(w_uri_query) > 0) {
    
    # - filter for w_uri_query
    pageviewsData <- pageviewsData[w_uri_query, ] 
    
    # - aggregate:
    pageviewsData$uri_path <- paste0(pageviewsData$uri_host, pageviewsData$uri_path)
    pageviewsData$uri_host <- NULL
    pageviewsData$page <- NULL
    pageviewsData$referer <- NULL
    pageviewsData <- pageviewsData %>% 
      dplyr::select(uri_query, uri_path) %>% 
      dplyr::group_by(uri_query, uri_path) %>% 
      dplyr::summarise(pageviews = n())
    colnames(pageviewsData) <- c('Tag', 'Page', 'Pageviews')
    
    # - add cetDay, campaignName
    pageviewsData$date <- cetDay
    pageviewsData$campaign <- campaignName
    
    # - store:
    write.csv(pageviewsData, 
              paste0(analyticsDir, 
                     "pageviewsAggregated_",
                     strsplit(
                       strsplit(fileName, split = "_", fixed = T)[[1]][2],
                       split = ".", 
                       fixed = T)[[1]][1],
                     ".csv"
              )
    )
    
  }
  
}

# - set params to wmde_process_pageviews
# - for the WMDE 2020_EmailCampaignWikipediaChallenge
uri_query_filter <- 'WMDE_aw21'
page_filter <- c('de.wikipedia.org/wiki/Wikipedia:Edit-a-thon/Wirtschaftsuniversit%C3%A4t_Wien_zum_Internationalen_Frauentag_2021',
                 'de.wikipedia.org/wiki/Wikipedia:GLAM',
                 'de.wikipedia.org/wiki/Wikipedia:Wikimedia_Deutschland/LerneWikipedia',
                 'de.wikipedia.org/wiki/Wikipedia:WikiProjekt_Frauen/Frauen_in_Rot',
                 'de.wikipedia.org/wiki/Wikipedia:Redaktionen',
                 'de.wikipedia.org/wiki/Wikipedia:Stuttgart#Workshop_Frauen_in_der_Politik',
                 'de.wikipedia.org/wiki/Wikipedia:WomenEdit',
                 'de.wikipedia.org/wiki/Wikipedia:FemNetz2021',
                 'de.wikipedia.org/wiki/Wikipedia:60_Minuten',
                 'de.wikipedia.org/wiki/Wikipedia%3AWho_writes_his_tory%3F',
                 'de.wikipedia.org/wiki/Wikipedia:Stuttgart',
                 'de.wikipedia.org/wiki/Benutzer:Ykskwadrat',
                 'de.wikipedia.org/wiki/Benutzer:Matthias_S%C3%BC%C3%9Fen',
                 'de.wikipedia.org/wiki/Benutzer:Cirdan',
                 'de.wikipedia.org/wiki/Benutzer:Mellebga',
                 'de.wikipedia.org/wiki/Benutzer:DerHexer',
                 'de.wikipedia.org/wiki/Benutzer:Nexo20')

# - wrangle pageviews
wmde_process_pageviews(fileName = fileName,
                       dataDir = dataDir,
                       uri_query_filter = uri_query_filter, 
                       page_filter = page_filter,
                       cetDay = cetDay,
                       campaignName = campaignName)

### ----------------------------------------------------------
### --- User Registrations
### --- HiveQL query: event.ServerSideAccountCreation table
### ----------------------------------------------------------
hiveQL <-
  "SELECT year, month, day, hour,  event.campaign, event.userId, event.userName, webhost, wiki
      FROM event.serversideaccountcreation
      WHERE
        YEAR = 2021
        AND month = 3
        AND event.campaign LIKE '%WMDE_aw21%' 
        AND wiki = 'dewiki';"
# - write hql
write(hiveQL, paste0(dataDir, 'user_registrations.hql'))
### --- output filename
filename <- paste0(analyticsDir, 'user_registrations.tsv')
### --- execute hql script:
hiveArgs <- 'sudo -u analytics-privatedata kerberos-run-command analytics-privatedata /usr/local/bin/beeline --silent --incremental=true --verbose=false -f'
hiveInput <- paste(paste0(dataDir, 'user_registrations.hql'),
                   " > ",
                   filename,
                   sep = "")
# - command:
hiveCommand <- paste(hiveArgs, hiveInput)
system(command = hiveCommand, wait = TRUE)

### ----------------------------------------------------------
### --- User Edits
### --- User Edits via revision_actor_temp
### ----------------------------------------------------------

library(bit64)

startTimestamp <- '20210308000000'
# - get user ids
userRegistrations <- fread(paste0(analyticsDir,
                                  'user_registrations.tsv'))
dim(userRegistrations)
# - clean up from test accounts
userRegistrations <- filter(userRegistrations, 
                            !grepl("^test|^Test", 
                                   userRegistrations$username))
dim(userRegistrations)
head(userRegistrations)
rev_user <- userRegistrations$userid
# - iterate over rev_user
for (i in 1:length(rev_user)) {
  # - SQL query
  sqlQuery <- paste("\"SELECT actor.actor_id, 
                        actor.actor_user, 
                        actor.actor_name, 
                        revision_actor_temp.revactor_timestamp 
                    FROM actor 
                    LEFT JOIN revision_actor_temp ON (actor.actor_id = revision_actor_temp.revactor_actor) 
                    WHERE (revision_actor_temp.revactor_timestamp >= 20210308000000 AND actor.actor_user = ", rev_user[i], ");\"");
  ### --- output filename
  filename <- paste(dataDir,'userEdits', "_", i, ".tsv", sep = "")
  ### --- execute sql script:
  sqlLogInPre <- paste0('/usr/local/bin/analytics-mysql dewiki -e ')
  sqlInput <- paste(sqlQuery,
                    " > ",
                    filename,
                    sep = "")
  # - command:
  sqlCommand <- paste(sqlLogInPre, sqlInput)
  system(command = sqlCommand, wait = TRUE)
  # - report
  print(paste0("DONE: user ", i, "."))
}
### --- END run SQL scripts
# - load user edits:
lF <- list.files(dataDir)
lF <- lF[grepl("^userEdits_", lF)]
userEdits <- lapply(paste0(dataDir, lF), fread)
edited <- sapply(userEdits, function(x){dim(x)[1] > 0})
userEdits <- userEdits[edited]
userEdits <- rbindlist(userEdits)
# - store user edits:
write.csv(userEdits, paste0(analyticsDir, 'userEdits.csv'))

0. Campaign Banners

This section presents all data and statistics on the campaign banners. The following chunk loads and then re-structures the banners dataset a bit:

lF <- list.files('_analytics')
lF <- lF[grepl("^bannerInteractions", lF)]
dataSet <- lapply(paste0("_analytics/", lF), fread)
dataSet <- rbindlist(dataSet)
dataSet$V1 <- NULL
dataSet$campaign <- NULL
# - NAs to zero
dataSet[which(is.na(dataSet), arr.ind = T)] <- 0
# - Design
dataSet$banner <- sapply(dataSet$bannername, function(x) {
  if (grepl("b1", x)) {return("b1")} else {return("b2")}
})
dataSet$device <- gsub("WMDE_wp20_aw_b1_|WMDE_wp20_aw_b2_", "", dataSet$bannername)

1. Campaign Pageviews

This section presents all data and statistics on the campaign pages.

The following chunk loads and then re-structures the dataset a bit.

lF <- list.files('_analytics')
lF <- lF[grepl("^pageviewsAggregated", lF)]
dataSet <- lapply(paste0("_analytics/", lF), fread)
dataSet <- rbindlist(dataSet)
dataSet$V1 <- NULL
dataSet$campaign <- NULL
# - expand grid to account for missing observations per day
dS <- expand.grid(unique(dataSet$Tag), 
                  unique(dataSet$Page), 
                  unique(dataSet$date), 
                  stringsAsFactors = F)
colnames(dS) <- c('Tag', 'Page', 'date')
dS <- dS %>% 
  left_join(dataSet, 
            by = c("Tag", "Page", "date"))
dataSet <- dS; rm(dS)
dataSet$Pageviews[is.na(dataSet$Pageviews)] <- 0
dataSet$Tag <- gsub("&.+$", "", dataSet$Tag)
dataSet$Tag <- gsub("\\?campaign=", "", dataSet$Tag)
dataSet$date <- sapply(dataSet$date, function(x) {
  d <- strsplit(x, split = "-")[[1]]
  if (nchar(d[3]) == 1) {
    d[3] <- paste0("0", d[3])
  }
  return(paste(d, collapse = "-"))
})
dataSet <- dataSet %>% 
  filter(!(grepl("^%", dataSet$Tag)))

1.1 Pageviews Overview

Chart 1.1.1 Daily Pageviews, aggregated across the campaign channels.

pFrame <- dataSet %>% 
  select(date, Page, Pageviews) %>% 
  group_by(date, Page) %>% 
  summarise(Pageviews = sum(Pageviews))
pFrame <- arrange(pFrame, date)
ggplot(pFrame, aes(x = date,
                   y = Pageviews,
                   label = Pageviews,
                    )) + 
  geom_path(size = .5, group = 1) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  scale_y_continuous(labels = comma) +
  facet_wrap(~Page, ncol = 2) + 
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Pageviews") + 
  theme_minimal() + 
  # geom_text_repel(size = 3.5, show.legend = FALSE) + 
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 10)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")

Table 1.1.1 Pageviews totals

tFrame <- pFrame %>% 
  select(Page, Pageviews) %>% 
  group_by(Page) %>% 
  summarise(totalPageviews = sum(Pageviews)) %>% 
  arrange(desc(totalPageviews))
datatable(tFrame)

1.2 Pageviews: Campaign Channels

Chart 1.2.1 Pageviews, by channels

pFrame <- dataSet %>% 
  select(date, Tag, Pageviews) %>% 
  group_by(date, Tag) %>% 
  summarise(Pageviews = sum(Pageviews))
pFrame <- arrange(pFrame, date)
ggplot(pFrame, aes(x = date,
                   y = Pageviews,
                   label = Pageviews,
                    )) + 
  geom_path(size = .5, group = 1) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  scale_y_continuous(labels = comma) +
  facet_wrap(~Tag, ncol = 2) + 
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Pageviews") + 
  theme_minimal() + 
  # geom_text_repel(size = 3.5, show.legend = FALSE) + 
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 6)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")

Chart 1.2.2 Total Pageviews, by channels

pFrame <- pFrame %>% 
  select(Tag, Pageviews) %>% 
  group_by(Tag) %>% 
  summarise(Pageviews = sum(Pageviews)) %>% 
  arrange(desc(Pageviews))
pFrame$Tag <- factor(pFrame$Tag,
                     levels = pFrame$Tag,
                     ordered = T)
ggplot(pFrame, aes(x = Tag,
                   y = Pageviews,
                   label = Pageviews,
                    )) + 
  geom_bar(stat = "identity", color = "blue", fill = "white") + 
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Pageviews") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")

Table 2.2.1 Total pageviews, by channels and by pages.

tFrame <- dataSet %>% 
  select(Tag, Page, Pageviews) %>% 
  group_by(Tag, Page) %>% 
  summarise(totalPageviews = sum(Pageviews)) %>% 
  arrange(desc(totalPageviews))
datatable(tFrame)

2. User Registrations

This section presents all data and statistics on the user registrations.

2.1 User Registrations Overview

lF <- list.files("_analytics")
lF <- lF[grepl("^user_registrations", lF)]
dataSet <- fread(paste0("_analytics/", lF))
dataSet <- filter(dataSet, 
                  !grepl("^test|^Test", dataSet$username))
dataSet$date <- paste(dataSet$year, 
                      ifelse(nchar(dataSet$month) == 1, paste0("0", dataSet$month), dataSet$month),
                      ifelse(nchar(dataSet$day) == 1, paste0("0", dataSet$day), dataSet$day),
                      sep = "-")
regUsers <- select(dataSet,
                   userid,
                   username, 
                   campaign)
# - total number of registered users
#  - total_registered <- dim(regUsers)[1]
# - expand grid to account for missing observations per day
dateSpan <- seq(from = as.Date("2021-03-08"), 
                to = as.Date("2021-03-21"), 
                by = "day")
dateSpan <- as.character(dateSpan)
dS <- expand.grid(dateSpan, 
                  unique(dataSet$campaign), 
                  stringsAsFactors = F)
colnames(dS) <- c('date', 'campaign')
dS <- dS %>% 
  left_join(select(dataSet, date, campaign, userid),
            by = c("date", "campaign"))
dS$userid <- ifelse(is.na(dS$userid), 0, 1)
total_registered <- sum(dS$userid)
pFrame <- dS %>% 
  select(date, userid) %>% 
  group_by(date) %>% 
  summarise(registrations = sum(userid)) %>% 
  arrange(date)
ggplot(pFrame, aes(x = date,
                   y = registrations,
                   label = registrations,
                    )) + 
  geom_path(size = .25, group = 1) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Registrations") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 7)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")

The total number of users registered in this campaign is 17.

2.2 User Registrations per Campaign Channel

pFrame <- dS %>% 
  group_by(date, campaign) %>% 
  summarise(registrations = sum(userid)) %>% 
  arrange(date)
ggplot(pFrame, aes(x = date,
                   y = registrations,
                   label = registrations,
                    )) + 
  geom_path(size = .25, group = 1) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  facet_wrap(~campaign, ncol = 2) + 
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Registrations") + 
  theme_minimal() + 
  # geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 6)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")

2.3 Total User Registrations per Campaign Channel

pFrame <- dS %>% 
  select(campaign, userid) %>% 
  group_by( campaign) %>% 
  summarise(registrations = sum(userid)) %>% 
  arrange(desc(registrations))
pFrame$campaign <- factor(pFrame$campaign, 
                          levels = pFrame$campaign, 
                          ordered = T)
ggplot(pFrame, aes(x = campaign,
                   y = registrations,
                   label = registrations,
                    )) + 
  geom_bar(stat = "identity", color = "blue", fill = "white") + 
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Registrations") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")

3. User Edits

This section presents all data and statistics on the user edits.

3.1 User Edits Overview

The following chunk loads the dataset of user interactions with campaign channels and then re-structures the dataset a bit.

lF <- list.files("_analytics/")
lF <- lF[grepl("^userEdits", lF)]
editSet <- fread(paste0("_analytics/", lF))
editSet <- select(editSet,
                  actor_name, 
                  revactor_timestamp)                 
colnames(editSet) <- c('username', 'rev_timestamp')
dataSet <- left_join(editSet,
                     select(regUsers, 
                            username,
                            campaign),
                     by = "username")
dataSet <- dataSet[complete.cases(dataSet), ]
dataSet$year <- substr(dataSet$rev_timestamp, 1, 4)
dataSet$month <- substr(dataSet$rev_timestamp, 5, 6)
dataSet$day <- substr(dataSet$rev_timestamp, 7, 8)
dataSet$rev_timestamp <- NULL
dataSet$date <- paste(dataSet$year,
                       dataSet$month,
                       dataSet$day,
                       sep = "-")
dataSet <- select(dataSet,
                  date,
                  username,
                  campaign)
dateSpan <- seq(from = as.Date("2021-03-08"), 
                to = as.Date(max(dataSet$date)), 
                by = "day")
dateSpan <- as.character(dateSpan)
dS <- expand.grid(dateSpan, 
                  unique(dataSet$campaign),
                  stringsAsFactors = F)
colnames(dS) <- c('date', 'campaign')
dS <- dS %>% 
  left_join(select(dataSet, date, campaign, username),
            by = c("date", "campaign"))
dS$username <- ifelse(is.na(dS$username), 0, 1)
pFrame <- dS %>%
  select(date, username) %>% 
  group_by(date) %>% 
  summarise(edits = sum(username)) %>% 
  arrange(date)
ggplot(pFrame, aes(x = date,
                   y = edits,
                   label = edits,
                    )) + 
  geom_path(size = .25, group = 1) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Edits") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 7)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")

3.2 User Edits by Campaign Channels

pFrame <- dS %>% 
  group_by(date, campaign) %>% 
  summarise(edits = sum(username)) %>% 
  arrange(date)
ggplot(pFrame, aes(x = date,
                   y = edits,
                   label = edits,
                    )) + 
  geom_path(size = .25, group = 1) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  facet_wrap(~campaign, ncol = 2) + 
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Edits") + 
  theme_minimal() + 
  # geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 5.5)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")

3.3 Total User Edits by Campaign Channels

pFrame <- dS %>% 
  select(campaign, username) %>% 
  group_by( campaign) %>% 
  summarise(edits = sum(username)) %>% 
  arrange(desc(edits))
pFrame$campaign <- factor(pFrame$campaign, 
                          levels = pFrame$campaign, 
                          ordered = T)
ggplot(pFrame, aes(x = campaign,
                   y = edits,
                   label = edits,
                    )) + 
  geom_bar(stat = "identity", color = "blue", fill = "white") + 
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Edits") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")

3.4 Edit Classes

3.4.1 Edit Classes: all users

userClass <- dataSet %>% 
  select(username) %>% 
  group_by(username) %>% 
  summarise(edits = n())
editBoundaries <- list(
  c(0, 1), 
  c(2, 4),
  c(5, 9),
  c(10, 20),
  c(21, 50),
  c(51, 100), 
  c(101, 1000000)
)
userClass$editClass <- sapply(userClass$edits, function(x) {
  wEC <- sapply(editBoundaries, function(y) {
    x >= y[1] & x <= y[2]
  })
  if (sum(wEC) == 0) {
    return("> 100")
  } else {
    return(paste0("(",
                  editBoundaries[[which(wEC)]][1],
                  " - ",
                  editBoundaries[[which(wEC)]][2], 
                  ")"
                  )
    )
  }
})
editClass <- as.data.frame(table(userClass$editClass), 
                           stringsAsFactors = F)
colnames(editClass) <- c('Edit Class', 'Num.Users')
editClass$order <- as.numeric(sapply(editClass$`Edit Class`, function(x) {
  lower <- str_extract(x, '[[:digit:]]+')
}))
editClass <- arrange(editClass, order)
editClass$order <- NULL
editClass$`Edit Class`[editClass$`Edit Class` == '(0 - 1)'] <- '(1)'
editClass$`Edit Class`[editClass$`Edit Class` == '(101 - 1e+06)'] <- '(> 100)'
datatable(editClass)

3.4.2 Edit Classes per Campaign Channels

edClasses <- lapply(unique(dataSet$campaign), function(x) {
  userClass <- dataSet %>% 
  filter(campaign == x) %>% 
  select(username) %>% 
  group_by(username) %>% 
  summarise(edits = n())
editBoundaries <- list(
  c(0, 1), 
  c(2, 4),
  c(5, 9),
  c(10, 20),
  c(21, 50),
  c(51, 100), 
  c(101, 1000000)
)
userClass$editClass <- sapply(userClass$edits, function(x) {
  wEC <- sapply(editBoundaries, function(y) {
    x >= y[1] & x <= y[2]
  })
  if (sum(wEC) == 0) {
    return("> 100")
  } else {
    return(paste0("(",
                  editBoundaries[[which(wEC)]][1],
                  " - ",
                  editBoundaries[[which(wEC)]][2], 
                  ")"
                  )
    )
  }
})
userClass$editClass[userClass$editClass == '(0 - 1)'] <- '(1)'
userClass$editClass[userClass$editClass == '(101 - 1e+06)'] <- '(> 100)'
editClass <- as.data.frame(table(userClass$editClass), 
                           stringsAsFactors = F)
colnames(editClass) <- c('Edit Class', 'Num.Users')
editClass$order <- as.numeric(sapply(editClass$`Edit Class`, function(x) {
  lower <- str_extract(x, '[[:digit:]]+')
}))
editClass <- arrange(editClass, order)
editClass$order <- NULL
editClass$campaign <- x
return(editClass)
})
edClasses <- rbindlist(edClasses)
edClasses$campaign <- sapply(edClasses$campaign, function(x) {
  d <- strsplit(x, "_")[[1]]
  if (nchar(tail(d, 1)) == 1) {
    d[length(d)] <- paste0("0", d[length(d)])
  }
  d <- paste(d, collapse = "_")
  return(d)
  })
edClasses <- arrange(edClasses, campaign)
edClasses$`Edit Class`[edClasses$`Edit Class` == '(0 - 1)'] <- '(1)'
edClasses$`Edit Class` <- factor(edClasses$`Edit Class`, 
                                 levels = c('(1)',
                                            '(2 - 4)', 
                                            '(5 - 9)',
                                            '(10 - 20)',
                                            '(21 - 50)', 
                                            '(51 - 100)',
                                            '(> 100)'),
                                 ordered = T
                                 )
ggplot(edClasses, 
       aes(x = `Edit Class`, 
           y = `Num.Users`, 
           label = `Num.Users`)) + 
  geom_bar(stat = "identity", color = "black", fill = "white") + 
  facet_wrap(~campaign) + 
  ggtitle('2021 Actionweek') +
  xlab("Edit Class") + ylab("Num.users") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")

---
title: '2021 Actionweek (Preliminary Report)'
author: "Goran S. Milovanovic, Data Scientist, WMDE"
date: "March 29, 2021"
output:
  html_notebook:
    code_folding: hide
    theme: simplex
    toc: yes
    toc_float: yes
    toc_depth: 5
  html_document:
    toc: yes
    toc_depth: 5
---

**Feedback** should be send to `goran.milovanovic_ext@wikimedia.de`. 

The campaign was run from 2021/03/08 to 2021/03/21.

**CURRENT UPDATE:** Dataset as of 2021/03/21; Report updated w. user edits data on 2021/03/29. 

```{r, echo = F, message = F, warning = F}
# !diagnostics off
### --- Setup
library(kableExtra)
library(rmarkdown)
library(knitr)
library(tidyverse)
library(data.table)
library(reshape2)
library(DT)
library(ggrepel)
library(scales)
library(RColorBrewer)
```

## Data Acquisiton

**NOTE:** the Data Acquisition code chunk is not fully reproducible from this Report. The data are collected by running the script `2021_Actionweek_PRODUCTION.R` on `stat1007.eqiad.wmnet` Analytics Client, collecting the data as `.tsv` and `.csv` files, copying manually, and processing locally. A daily crontab job was run from `2021/03/08` to `2021/03/21` to collect the data for daily reporting. User edits are then collected until 2021/05/02. The data used in this report are aggregates of the daily datasets, sanitized and anonymized.   

**DATA:** (1) The `event.wmdebannerinteractions` schema is used for banner clicks/closing (see: [Phab](https://phabricator.wikimedia.org/T262534#6501793)); (2) the `wmf.webrequest` table is a source of banner impressions data; (3) pageviews, user registrations, and user edits are collected by following the standard procedure. 

### Daily Update

```{r, echo = T, eval = F}
### ----------------------------------------------------------------
### --- WMDE 2021_Actionweek
### --- https://phabricator.wikimedia.org/T274563
### ----------------------------------------------------------------

### ----------------------------------------------------------------
### --- Campaign Description and Parameters
### ----------------------------------------------------------------

### --- Tracking Part 1: banner campaign
### --- Please provide daily reports during the banner campaign (08th of March to 21th of March).

# - Tracking Part 2: Landingpage links to Wikipedia
# - Please provide a tracking report for the Links from the Landingpage of the campaign 
# - until 8 weeks after the end of the campaign.

### --- Timeline for the whole project
# - Start of the banner campaign: 08. March 2020
# - End of the banner campaign: 21. March 2020
# - Tracking test: first week of March
# - Last User signing up: 21.March 2020
# - Preliminary report for tracking part 1: sometime till end of March
# - Track editing behavior eight weeks after end of campaign: 02. May 2020
# - final report for tracking part 1 and 2: sometime till end of May

### --- Tracking Part 1: banner campaign
# - Campaign Tag used for Links
# -------- ?campaign=WMDE_aw21
# -------- this is only for pageviews, this tag is picked-up at our WMDE landing page
# - the Banners can carry one of the two following tags:
# -------- ?banner=WMDE_aw2021_var
# -------- ?banner=WMDE_aw2021_ctrl

### --- Timeline for the whole project
# - Start of the banner campaign: 1. October 2020
# - End of the banner campaign: 11. October 2020
# - Tracking test: End of September
# - Last User signing up: 11. October
# - Preliminary report for tracking part 1: sometime until end of October
# - Last User finishing mailing campaign: November 11th
# - Track editing behavior six weeks after end of campaign: December 23
# - final report for tracking part 1 and 2: beginning of January

### --- Daily Tracking
# - daily reporting during campaign
# - The following information should be included in the daily reporting:
# --- impressions per banner
# --- clicks per banner/ page views per landing page
# --- closing rate of banners
# --- If possible with the distinction whether the person was logged in or not.

### --- libraries
library(tidyverse)
library(data.table)
library(lubridate)

### --- dir structure
campaignPath <- '/home/goransm/Analytics/NewEditors/Campaigns/2021_Actionweek/'
dataDir <- paste0(campaignPath, "_data/")
analyticsDir <- paste0(campaignPath, "_analytics/")
### --- campaign specifics
campaignName <- 'Actionweek_2021'

### --- determine cetDay
cetDay <- Sys.time()
cetDay
attr(cetDay, "tzone") <- "Europe/Berlin"
# - one day behind for crontab
# - (i.e. waiting for wmf.webrequest to complete is data acquisition)
cetDay <- ymd(
  strsplit(as.character(cetDay), 
           split = " ", 
           fixed = T)[[1]][1]
) - 1

### ----------------------------------------------------------
### --- Banner Interactions:
### --- via event.WMDEBannerInteractions
### --- https://meta.wikimedia.org/wiki/Schema:WMDEBannerInteractions
### ----------------------------------------------------------

# - select dt, event.bannerName, event.bannerAction, event.bannerImpressions, event.userID 
# - from event.wmdebannerinteractions where year=2020 and month=5 and (day=11 or day=12 or day=13);

# - function: wmde_banner_actions
wmde_banner_actions <- function(uri_query_filter,
                                cetDay,
                                queryFile,
                                fileName,
                                analyticsDir,
                                campaignName) {
  
  # - NOTE:
  # - expected format for cetDay is: YYYY-MM-DD
  
  # - WHERE condition: create datetime_condition
  cet_condition <- seq(
    from = as.POSIXct(paste0(cetDay," 0:00"), tz = "Europe/Berlin"),
    to = as.POSIXct(paste0(cetDay," 23:00"), tz = "Europe/Berlin"),
    by = "hour"
  ) 
  attr(cet_condition, "tzone") <- "UTC"
  cet_condition <- as.character(cet_condition)
  cet_condition <- unlist(str_extract_all(cet_condition, "^([[:digit:]]|\\s|-)*"))
  cet_years <- sapply(
    strsplit(cet_condition, split = " ", fixed = T), function(x) {
      strsplit(x, split = "-")[[1]][1]
    })
  cet_months <- sapply(
    strsplit(cet_condition, split = " ", fixed = T), function(x) {
      strsplit(x, split = "-")[[1]][2]
    })
  cet_months <- gsub("^0", "", cet_months)
  cet_days <- sapply(
    strsplit(cet_condition, split = " ", fixed = T), function(x) {
      strsplit(x, split = "-")[[1]][3]
    })
  cet_days <- gsub("^0", "", cet_days)
  cet_hours <- sapply(strsplit(cet_condition, split = " ", fixed = T), 
                      function(x) {
                        x[2]
                      })
  cet_hours <- gsub("^0", "", cet_hours)
  datetimeCondition <- paste0(
    "year = ", cet_years, " AND ",
    "month = ", cet_months, " AND ",
    "day = ", cet_days, " AND ", 
    "hour = ", cet_hours
  )
  datetimeCondition <- paste("(", 
                             datetimeCondition, 
                             ")",
                             collapse = " OR ", 
                             sep = "")
  
  
  # - WHERE condition: create eventBannerName_condition
  if (length(uri_query_filter) > 1) {
    eventBannerName_condition <- paste0("(",
                                        paste(
                                          paste0("event.bannerName LIKE '%", uri_query_filter, "%'"),
                                          collapse = " OR ", sep = " "),
                                        ")"
    )
  } else {
    eventBannerName_condition = paste0("event.bannerName LIKE '%", uri_query_filter, "%'")
  }
  
  # - compose HiveQL query
  hiveQuery <- paste0( 
    "select dt, event.bannerName, event.bannerAction, event.bannerImpressions, event.userID from event.wmdebannerinteractions 
    WHERE (",
    eventBannerName_condition, " AND ",
    "(", datetimeCondition, ")",
    ");"
  )
  
  # - write hql
  write(hiveQuery, paste0(dataDir, queryFile))
  # - execute hql script:
  kerberosPrefix <- 
    'sudo -u analytics-privatedata kerberos-run-command analytics-privatedata '
  # - Kerberos init
  system(command = paste0(kerberosPrefix, ' hdfs dfs -ls'), 
         wait = T)
  # - Run query
  query <- system(command = paste(kerberosPrefix, 
                                  '/usr/local/bin/beeline --incremental=true --silent -f "',
                                  paste0(dataDir, queryFile),
                                  '" > ', dataDir, fileName,
                                  sep = ""),
                  wait = TRUE)
  
  # - Wrangle Banner Interactions
  # - load
  bannerData <- tryCatch({
    as.data.frame(fread(paste0(dataDir, fileName)))
  },
  error = function(condition) {
    return(FALSE)
  })
  
  # - process
  if (class(bannerData) == 'logical') {
    return(FALSE) 
  } else { 
    # - bannerSeen
    bannerSeen <- bannerData %>% 
      dplyr::select(bannername, userid)
    # - bannerSeen <- bannerSeen[!duplicated(bannerSeen), ]
    bannerSeen <- bannerSeen %>% 
      dplyr::select(bannername) %>% 
      dplyr::group_by(bannername) %>% 
      dplyr::summarise(seen_by = n())
    # - bannerClosed
    bannerClosed <- bannerData %>% 
      dplyr::filter(banneraction == "banner-closed") %>% 
      dplyr::select(bannername, bannerimpressions) %>% 
      dplyr::group_by(bannername) %>%
      dplyr::summarise(closed_by = n(), 
                       mean_close_imp = round(mean(bannerimpressions), 2))
    # - bannerClicked
    bannerClicked <- bannerData %>% 
      dplyr::filter(banneraction == "banner-clicked") %>% 
      dplyr::select(bannername, bannerimpressions) %>% 
      dplyr::group_by(bannername) %>%
      dplyr::summarise(clicked_by = n(), 
                       mean_click_imp = round(mean(bannerimpressions), 2))
    # - whoClicked
    whoClicked <- bannerData %>% 
      dplyr::filter(banneraction == "banner-clicked")
    whoClicked <- data.frame(userid = unique(whoClicked$userid))
    # - store:
    write.csv(whoClicked, 
              paste0(analyticsDir, 
                     "whoClicked_",
                     strsplit(
                       strsplit(fileName, split = "_", fixed = T)[[1]][2],
                       split = ".", 
                       fixed = T)[[1]][1],
                     ".csv"
              )
    )
    # - join
    bannerData <- bannerSeen %>% 
      dplyr::left_join(bannerClosed, 'bannername') %>% 
      dplyr::left_join(bannerClicked, 'bannername')
    bannerData$close_rate <- round(bannerData$closed_by/bannerData$seen_by, 2)
    bannerData$click_rate <- round(bannerData$clicked_by/bannerData$seen_by, 2)
    # - date, campaign
    bannerData$day <- cetDay
    bannerData$campaign <- campaignName
    
    # - store:
    write.csv(bannerData, 
              paste0(analyticsDir, 
                     "bannerInteractionsAggregated_",
                     strsplit(
                       strsplit(fileName, split = "_", fixed = T)[[1]][2],
                       split = ".", 
                       fixed = T)[[1]][1],
                     ".csv"
              )
    )
  }
  
}

# - set params for wmde_banner_actions()
queryFile <- paste0(campaignName, "_bannerInteractions.hql")
fileName <- paste0("bannerInteractions_", cetDay, ".tsv")
uri_query_filter <- '_wp20_aw_'
banner_status <- wmde_banner_actions(uri_query_filter = uri_query_filter,
                                     cetDay = cetDay,
                                     queryFile = queryFile,
                                     fileName = fileName,
                                     analyticsDir = analyticsDir, 
                                     campaignName = campaignName)

### ----------------------------------------------------------
### --- Pageviews
### ----------------------------------------------------------

# - function: wmde_collect_pageviews
wmde_collect_pageviews <- function(uri_host,
                                   uri_path,
                                   cetDay,
                                   queryFile,
                                   fileName,
                                   dataDir) {
  
  # - NOTE:
  # - expected format for cetDay is: YYYY-MM-DD
  
  # - to dataDir
  setwd(dataDir)
  
  # - libraries
  library(stringr)
  
  # - WHERE condition: create datetime_condition
  cet_condition <- seq(
    from = as.POSIXct(paste0(cetDay," 0:00"), tz = "Europe/Berlin"),
    to = as.POSIXct(paste0(cetDay," 23:00"), tz = "Europe/Berlin"),
    by = "hour"
  ) 
  attr(cet_condition, "tzone") <- "UTC"
  cet_condition <- as.character(cet_condition)
  cet_condition <- unlist(str_extract_all(cet_condition, "^([[:digit:]]|\\s|-)*"))
  cet_years <- sapply(
    strsplit(cet_condition, split = " ", fixed = T), function(x) {
      strsplit(x, split = "-")[[1]][1]
    })
  cet_months <- sapply(
    strsplit(cet_condition, split = " ", fixed = T), function(x) {
      strsplit(x, split = "-")[[1]][2]
    })
  cet_months <- gsub("^0", "", cet_months)
  cet_days <- sapply(
    strsplit(cet_condition, split = " ", fixed = T), function(x) {
      strsplit(x, split = "-")[[1]][3]
    })
  cet_days <- gsub("^0", "", cet_days)
  cet_hours <- sapply(strsplit(cet_condition, split = " ", fixed = T), 
                      function(x) {
                        x[2]
                      })
  cet_hours <- gsub("^0", "", cet_hours)
  datetimeCondition <- paste0(
    "year = ", cet_years, " AND ",
    "month = ", cet_months, " AND ",
    "day = ", cet_days, " AND ", 
    "hour = ", cet_hours
  )
  datetimeCondition <- paste("(", 
                             datetimeCondition, 
                             ")",
                             collapse = " OR ", 
                             sep = "")
  
  # - WHERE condition: create uri_host_condition
  if (length(uri_host) > 1) {
    uri_host_condition <- paste0("(",
                                 paste(
                                   paste0("uri_host = '", uri_host, "'"),
                                   collapse = " OR ", sep = " "),
                                 ")"
    )
  } else {
    uri_host_condition = paste0("uri_host = '", uri_host, "'")
  }
  
  # - WHERE condition: create uri_path_condition
  if (length(uri_path) > 1) {
    uri_path_condition <- paste0("(",
                                 paste(
                                   paste0("uri_path = '", uri_path, "'"),
                                   collapse = " OR ", sep = " "),
                                 ")"
    )
  } else {
    uri_path_condition = paste0("uri_path = '", uri_path, "'")
  }
  
  # - compose HiveQL query
  hiveQuery <- paste0( 
    "USE wmf;
    SELECT uri_host, uri_path, uri_query, referer FROM webrequest
    WHERE (",
    uri_host_condition, " AND ",
    uri_path_condition, " AND ",
    "(", datetimeCondition, ")",
    ");"
  )
  
  # - write hql
  write(hiveQuery, queryFile)
  # - execute hql script:
  kerberosPrefix <- 
    'sudo -u analytics-privatedata kerberos-run-command analytics-privatedata '
  # - Kerberos init
  system(command = paste0(kerberosPrefix, ' hdfs dfs -ls'), 
         wait = T)
  # - Run query
  query <- system(command = paste(kerberosPrefix, 
                                  '/usr/local/bin/beeline --incremental=true --silent -f "',
                                  paste0(dataDir, queryFile),
                                  '" > ', dataDir, fileName,
                                  sep = ""),
                  wait = TRUE)
}

# - set params to wmde_collect_pageviews
uri_host <- c('de.wikipedia.org', 'de.m.wikipedia.org')
uri_path  <- c('/wiki/Wikipedia:Edit-a-thon/Wirtschaftsuniversit%C3%A4t_Wien_zum_Internationalen_Frauentag_2021',
               '/wiki/Wikipedia:GLAM',
               '/wiki/Wikipedia:Wikimedia_Deutschland/LerneWikipedia',
               '/wiki/Wikipedia:WikiProjekt_Frauen/Frauen_in_Rot',
               '/wiki/Wikipedia:Redaktionen',
               '/wiki/Wikipedia:Stuttgart#Workshop_Frauen_in_der_Politik',
               '/wiki/Wikipedia:WomenEdit',
               '/wiki/Wikipedia:FemNetz2021',
               '/wiki/Wikipedia:60_Minuten',
               '/wiki/Wikipedia%3AWho_writes_his_tory%3F',
               '/wiki/Wikipedia:Stuttgart',
               '/wiki/Benutzer:Ykskwadrat',
               '/wiki/Benutzer:Matthias_S%C3%BC%C3%9Fen',
               '/wiki/Benutzer:Cirdan',
               '/wiki/Benutzer:Mellebga',
               '/wiki/Benutzer:DerHexer',
               '/wiki/Benutzer:Nexo20')

queryFile <- 'Actionweek2021_Pageviews.hql'
fileName <- paste0("pageviews_", cetDay, ".tsv")

# - collect Pageviews data
wmde_collect_pageviews(uri_host,
                       uri_path,
                       cetDay,
                       queryFile,
                       fileName,
                       dataDir)

### --- Wrangle Pageviews
# - function: wmde_process_pageviews
wmde_process_pageviews <- function(fileName,
                                   dataDir, 
                                   uri_query_filter,
                                   page_filter,
                                   cetDay = cetDay,
                                   campaignName = campaignName) {
  
  # - to dataDir
  setwd(dataDir)
  
  # - libraries
  library(stringr)
  library(dplyr)
  library(tidyr)
  library(data.table)
  
  # - load
  pageviewsData <- readLines(fileName)
  wStart <- which(grepl("uri_host", pageviewsData))
  pageviewsData <- pageviewsData[(wStart + 1):(length(pageviewsData) - 1)]
  pageviewsData <- data.frame(dat = pageviewsData, 
                              stringsAsFactors = F)
  pageviewsData <- separate(pageviewsData,
                            dat,
                            into = c('uri_host', 'uri_path', 'uri_query', 'referer'),
                            sep = "\t")
  # - apply page_filter
  pageviewsData$page <- paste0(pageviewsData$uri_host, 
                               pageviewsData$uri_path,
                               pageviewsData$uri_query)
  wFilter <- sapply(pageviewsData$page, function(x) {
    sapply(page_filter, function(y) {
      grepl(y, x)
    })
  })
  wFilter <- colSums(wFilter)
  wFilter <- which(wFilter > 0)
  pageviewsData <- pageviewsData[wFilter, ]
  # - apply uri_query_filter
  # - NOTE: looking in both: uri_query, referer 
  w_uri_query <- which(grepl(uri_query_filter, pageviewsData$page))
  
  if (length(w_uri_query) > 0) {
    
    # - filter for w_uri_query
    pageviewsData <- pageviewsData[w_uri_query, ] 
    
    # - aggregate:
    pageviewsData$uri_path <- paste0(pageviewsData$uri_host, pageviewsData$uri_path)
    pageviewsData$uri_host <- NULL
    pageviewsData$page <- NULL
    pageviewsData$referer <- NULL
    pageviewsData <- pageviewsData %>% 
      dplyr::select(uri_query, uri_path) %>% 
      dplyr::group_by(uri_query, uri_path) %>% 
      dplyr::summarise(pageviews = n())
    colnames(pageviewsData) <- c('Tag', 'Page', 'Pageviews')
    
    # - add cetDay, campaignName
    pageviewsData$date <- cetDay
    pageviewsData$campaign <- campaignName
    
    # - store:
    write.csv(pageviewsData, 
              paste0(analyticsDir, 
                     "pageviewsAggregated_",
                     strsplit(
                       strsplit(fileName, split = "_", fixed = T)[[1]][2],
                       split = ".", 
                       fixed = T)[[1]][1],
                     ".csv"
              )
    )
    
  }
  
}

# - set params to wmde_process_pageviews
# - for the WMDE 2020_EmailCampaignWikipediaChallenge
uri_query_filter <- 'WMDE_aw21'
page_filter <- c('de.wikipedia.org/wiki/Wikipedia:Edit-a-thon/Wirtschaftsuniversit%C3%A4t_Wien_zum_Internationalen_Frauentag_2021',
                 'de.wikipedia.org/wiki/Wikipedia:GLAM',
                 'de.wikipedia.org/wiki/Wikipedia:Wikimedia_Deutschland/LerneWikipedia',
                 'de.wikipedia.org/wiki/Wikipedia:WikiProjekt_Frauen/Frauen_in_Rot',
                 'de.wikipedia.org/wiki/Wikipedia:Redaktionen',
                 'de.wikipedia.org/wiki/Wikipedia:Stuttgart#Workshop_Frauen_in_der_Politik',
                 'de.wikipedia.org/wiki/Wikipedia:WomenEdit',
                 'de.wikipedia.org/wiki/Wikipedia:FemNetz2021',
                 'de.wikipedia.org/wiki/Wikipedia:60_Minuten',
                 'de.wikipedia.org/wiki/Wikipedia%3AWho_writes_his_tory%3F',
                 'de.wikipedia.org/wiki/Wikipedia:Stuttgart',
                 'de.wikipedia.org/wiki/Benutzer:Ykskwadrat',
                 'de.wikipedia.org/wiki/Benutzer:Matthias_S%C3%BC%C3%9Fen',
                 'de.wikipedia.org/wiki/Benutzer:Cirdan',
                 'de.wikipedia.org/wiki/Benutzer:Mellebga',
                 'de.wikipedia.org/wiki/Benutzer:DerHexer',
                 'de.wikipedia.org/wiki/Benutzer:Nexo20')

# - wrangle pageviews
wmde_process_pageviews(fileName = fileName,
                       dataDir = dataDir,
                       uri_query_filter = uri_query_filter, 
                       page_filter = page_filter,
                       cetDay = cetDay,
                       campaignName = campaignName)

### ----------------------------------------------------------
### --- User Registrations
### --- HiveQL query: event.ServerSideAccountCreation table
### ----------------------------------------------------------
hiveQL <-
  "SELECT year, month, day, hour,  event.campaign, event.userId, event.userName, webhost, wiki
      FROM event.serversideaccountcreation
      WHERE
        YEAR = 2021
        AND month = 3
        AND event.campaign LIKE '%WMDE_aw21%' 
        AND wiki = 'dewiki';"
# - write hql
write(hiveQL, paste0(dataDir, 'user_registrations.hql'))
### --- output filename
filename <- paste0(analyticsDir, 'user_registrations.tsv')
### --- execute hql script:
hiveArgs <- 'sudo -u analytics-privatedata kerberos-run-command analytics-privatedata /usr/local/bin/beeline --silent --incremental=true --verbose=false -f'
hiveInput <- paste(paste0(dataDir, 'user_registrations.hql'),
                   " > ",
                   filename,
                   sep = "")
# - command:
hiveCommand <- paste(hiveArgs, hiveInput)
system(command = hiveCommand, wait = TRUE)

### ----------------------------------------------------------
### --- User Edits
### --- User Edits via revision_actor_temp
### ----------------------------------------------------------

library(bit64)

startTimestamp <- '20210308000000'
# - get user ids
userRegistrations <- fread(paste0(analyticsDir,
                                  'user_registrations.tsv'))
dim(userRegistrations)
# - clean up from test accounts
userRegistrations <- filter(userRegistrations, 
                            !grepl("^test|^Test", 
                                   userRegistrations$username))
dim(userRegistrations)
head(userRegistrations)
rev_user <- userRegistrations$userid
# - iterate over rev_user
for (i in 1:length(rev_user)) {
  # - SQL query
  sqlQuery <- paste("\"SELECT actor.actor_id, 
                        actor.actor_user, 
                        actor.actor_name, 
                        revision_actor_temp.revactor_timestamp 
                    FROM actor 
                    LEFT JOIN revision_actor_temp ON (actor.actor_id = revision_actor_temp.revactor_actor) 
                    WHERE (revision_actor_temp.revactor_timestamp >= 20210308000000 AND actor.actor_user = ", rev_user[i], ");\"");
  ### --- output filename
  filename <- paste(dataDir,'userEdits', "_", i, ".tsv", sep = "")
  ### --- execute sql script:
  sqlLogInPre <- paste0('/usr/local/bin/analytics-mysql dewiki -e ')
  sqlInput <- paste(sqlQuery,
                    " > ",
                    filename,
                    sep = "")
  # - command:
  sqlCommand <- paste(sqlLogInPre, sqlInput)
  system(command = sqlCommand, wait = TRUE)
  # - report
  print(paste0("DONE: user ", i, "."))
}
### --- END run SQL scripts
# - load user edits:
lF <- list.files(dataDir)
lF <- lF[grepl("^userEdits_", lF)]
userEdits <- lapply(paste0(dataDir, lF), fread)
edited <- sapply(userEdits, function(x){dim(x)[1] > 0})
userEdits <- userEdits[edited]
userEdits <- rbindlist(userEdits)
# - store user edits:
write.csv(userEdits, paste0(analyticsDir, 'userEdits.csv'))
```

## 0. Campaign Banners

This section presents all data and statistics on the campaign banners. The following chunk loads and then re-structures the banners dataset a bit:

```{r echo = T, eval = T, warning = 'hide', message = FALSE}
lF <- list.files('_analytics')
lF <- lF[grepl("^bannerInteractions", lF)]
dataSet <- lapply(paste0("_analytics/", lF), fread)
dataSet <- rbindlist(dataSet)
dataSet$V1 <- NULL
dataSet$campaign <- NULL
# - NAs to zero
dataSet[which(is.na(dataSet), arr.ind = T)] <- 0
# - Design
dataSet$banner <- sapply(dataSet$bannername, function(x) {
  if (grepl("b1", x)) {return("b1")} else {return("b2")}
})
dataSet$device <- gsub("WMDE_wp20_aw_b1_|WMDE_wp20_aw_b2_", "", dataSet$bannername)
```

### 0.1 Banner Actions Overview

**Chart 0.1.1** Daily Banner Impressions, Clicks, and Closures, aggregated across the campaign banners.
**NOTE.** Log(y) scales were used to help readability; data labels are exact values.

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 8, fig.height = 5}
pFrame <- dataSet %>% 
  select(day, seen_by, clicked_by, closed_by) %>% 
  group_by(day) %>% 
  summarise(seen_by = sum(seen_by), 
            closed_by = sum(closed_by), 
            clicked_by = sum(clicked_by))
pFrame$close_rate = round(pFrame$closed_by/pFrame$seen_by, 2)
pFrame$click_rate = round(pFrame$clicked_by/pFrame$seen_by, 2)
pFrame <- arrange(pFrame, day)
# - Visualize w. {ggplot2}: seen_by, clicked_by, closed_by, daily:
pF <- pFrame %>% 
  select(day, seen_by, closed_by, clicked_by) %>% 
  pivot_longer(c('seen_by', 'closed_by', 'clicked_by'), 
               names_to = "Action", 
               values_to = "Value")
ggplot(pF, aes(x = day,
               y = log(Value),
               group = Action,
               color = Action,
               fill = Action,
               label = Value,
                    )) + 
  geom_path(size = .5) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("log(Actions)") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) + 
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "top")
```

**Chart 0.1.2** Total, Mean per day, and Median per day Banner Impressions, Clicks, and Closures, aggregated across the campaign banners.

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 8, fig.height = 5}
pFrameTotal <- dataSet %>% 
  select(day, seen_by, clicked_by, closed_by) %>% 
  group_by(day) %>% 
  summarise(seen_by = sum(seen_by), 
            closed_by = sum(closed_by), 
            clicked_by = sum(clicked_by)
            ) 
pFrameTotal <- pFrameTotal %>% 
  summarise(seen_by__total_in_campaign = sum(seen_by), 
            closed_by__total_in_campaign = sum(closed_by), 
            clicked_by__total_in_campaign = sum(clicked_by),
            seen_by__mean_per_day = mean(seen_by), 
            closed_by__mean_per_day = mean(closed_by), 
            clicked_by__mean_per_day = mean(clicked_by),
            seen_by__median_per_day = median(seen_by), 
            closed_by__median_per_day = median(closed_by), 
            clicked_by__median_per_day = median(clicked_by)
            ) %>% 
  t()
pFrameTotal[, 1] <- round(pFrameTotal[, 1], 2)
pFrameTotal <- as.data.frame(pFrameTotal) %>% 
  rownames_to_column('Measure')
colnames(pFrameTotal)[2] <- 'Value'
pFrameTotal <-pFrameTotal %>% 
  separate('Measure', 
           sep = "__", 
           into = c('Action', 'Statistic'))
# - Visualize w. {ggplot2}: seen_by, clicked_by, closed_by, daily:
ggplot(pFrameTotal, aes(x = Action,
                        y = log(Value),
                        color = Statistic,
                        group = Statistic,
                        label = Value,
                    )) +
  geom_path(size = .5) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +  
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Action") + ylab("log(Value)") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) + 
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")
```

**Chart 0.1.3** Daily Banner Click and Close rates, aggregated across the campaign banners.

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 8, fig.height = 5}
# - Visualize w. {ggplot2}: close_rate, click_rate, daily:
pF <- pFrame %>% 
  select(day, click_rate, close_rate) %>% 
  pivot_longer(c('click_rate', 'close_rate'), 
               names_to = "Action", 
               values_to = "Value")
ggplot(pF, aes(x = day,
               y = Value,
               group = Action,
               color = Action,
               fill = Action,
               label = Value,
                    )) + 
  geom_path(size = .5) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Actions") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) + 
  scale_y_continuous(labels = comma) + 
  ylim(c(0, .25)) + 
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "top")
```

**Table 0.1.4** Mean and Median Click and Close rates (across days)
**Note.** The data are produce by computing means/medians across the daily click/close rates.

```{r echo = T, eval = T, warning = 'hide', message = FALSE}
tFrame <- data.frame(click_rate_mean = round(mean(pFrame$click_rate), 5), 
                     click_rate_median = round(median(pFrame$click_rate), 5),
                     close_rate_mean = round(mean(pFrame$close_rate), 5),
                     close_rate_median = round(median(pFrame$close_rate), 5)
)
datatable(tFrame)
```

**Table 0.1.5** De facto Click and Close rates in percents
**Note.** The data are produce by computing means/medians across the overall sums of clicks and closures in the campaign (i.e. by aggregating across the campaign days).

```{r echo = T, eval = T, warning = 'hide', message = FALSE}
tFrame <- data.frame(click_rate = round(sum(pFrame$clicked_by)/sum(pFrame$seen_by)*100, 5), 
                     close_rate = round(sum(pFrame$closed_by)/sum(pFrame$seen_by)*100, 5))
datatable(tFrame)
```

**Chart 0.1.6** Daily Banner Click and Close rates, by **devices**, aggregated across the campaign banners.

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 8, fig.height = 10}
pFrame <- dataSet %>% 
  select(day, seen_by, clicked_by, closed_by, device) %>% 
  group_by(day, device) %>% 
  summarise(seen_by = sum(seen_by), 
            closed_by = sum(closed_by), 
            clicked_by = sum(clicked_by))
pFrame$close_rate = round(pFrame$closed_by/pFrame$seen_by, 2)
pFrame$click_rate = round(pFrame$clicked_by/pFrame$seen_by, 2)
pFrame <- arrange(pFrame, day)
# - Visualize w. {ggplot2}: seen_by, clicked_by, closed_by, daily:
pF <- pFrame %>% 
  select(day, device, seen_by, closed_by, clicked_by) %>% 
  pivot_longer(c('seen_by', 'closed_by', 'clicked_by'), 
               names_to = "Action", 
               values_to = "Value")
ggplot(pF, aes(x = day,
               y = Value,
               group = Action,
               color = Action,
               fill = Action
               )) + 
  geom_path(size = .5) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  scale_y_continuous(labels = comma) +
  facet_wrap(~device, nrow = 3, scales = "free") + 
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Actions") + 
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "top")
```

**Chart 0.1.6B** Mean, median, and total Impressions, clicks and closing clicks **per banner** , **per device**.
**NOTE.** Log(y) scales were used to help readability; data labels are exact values.

```{r echo = T, eval = T, message = FALSE, fig.width = 8, fig.height = 30}
pFrame <- dataSet %>% 
  dplyr::select(seen_by, clicked_by, closed_by, device, banner)
pFrame <- pFrame %>% 
  dplyr::group_by(device, banner) %>% 
  summarise(mean0seen_by = mean(seen_by),
            mean0clicked_by = mean(clicked_by),
            mean0closed_by = mean(closed_by),
            median0seen_by = median(seen_by),
            median0clicked_by = median(clicked_by),
            median0closed_by = median(closed_by),
            total0seen_by = sum(seen_by),
            total0clicked_by = sum(clicked_by),
            total0closed_by = sum(closed_by)
            )
pFrame <- pFrame %>% 
  pivot_longer(cols = c('mean0seen_by', 'mean0clicked_by', 'mean0closed_by',
                        'median0seen_by', 'median0clicked_by', 'median0closed_by',
                        'total0seen_by', 'total0clicked_by', 'total0closed_by'), 
               names_to = 'measure', 
               values_to = 'value')
pFrame <- pFrame %>% 
  separate(measure,
           into = c('measure', 'action'),
           sep = "0")
pFrame$value <- round(pFrame$value, 1)
ggplot(pFrame, aes(x = measure,
               y = log(value),
               group = action,
               color = action,
               fill = action,
               label = value,
                    )) + 
  geom_path(size = .5) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  geom_text_repel(size = 3.5, show.legend = FALSE) + 
  scale_y_continuous(labels = comma) +
  facet_wrap(device~banner, ncol = 2, scales = "free") + 
  ggtitle('2021 Actionweek') +
  xlab("Measure") + ylab("log(Value)") + 
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "top")
```


**Chart 0.1.7** Daily Banner Click and Close rates, by **b1/b2**, aggregated across the campaign banners.

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 8, fig.height = 12}
pFrame <- dataSet %>% 
  select(day, seen_by, clicked_by, closed_by, banner) %>% 
  group_by(day, banner) %>% 
  summarise(seen_by = sum(seen_by), 
            closed_by = sum(closed_by), 
            clicked_by = sum(clicked_by))
pFrame$close_rate = round(pFrame$closed_by/pFrame$seen_by, 2)
pFrame$click_rate = round(pFrame$clicked_by/pFrame$seen_by, 2)
pFrame <- arrange(pFrame, day)
# - Visualize w. {ggplot2}: seen_by, clicked_by, closed_by, daily:
pF <- pFrame %>% 
  select(day, banner, seen_by, closed_by, clicked_by) %>% 
  pivot_longer(c('seen_by', 'closed_by', 'clicked_by'), 
               names_to = "Action", 
               values_to = "Value")
ggplot(pF, aes(x = day,
               y = Value,
               group = Action,
               color = Action,
               fill = Action,
               label = Value,
                    )) + 
  geom_path(size = .5) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  geom_text_repel(size = 3.5, show.legend = FALSE) + 
  scale_y_continuous(labels = comma) +
  facet_wrap(~banner, nrow = 2, scales = "free") + 
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Actions") + 
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "top")
```

**Chart 0.1.7B** Mean, median, and total Impressions, clicks and closing clicks **per banner**.
**NOTE.** Log(y) scales were used to help readability; data labels are exact values.

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 8, fig.height = 6.5}
pFrame <- dataSet %>% 
  dplyr::select(seen_by, clicked_by, closed_by, banner)
pFrame <- pFrame %>% 
  dplyr::group_by(banner) %>% 
  summarise(mean0seen_by = mean(seen_by),
            mean0clicked_by = mean(clicked_by),
            mean0closed_by = mean(closed_by),
            median0seen_by = median(seen_by),
            median0clicked_by = median(clicked_by),
            median0closed_by = median(closed_by),
            total0seen_by = sum(seen_by),
            total0clicked_by = sum(clicked_by),
            total0closed_by = sum(closed_by)
            )
pFrame <- pFrame %>% 
  pivot_longer(cols = c('mean0seen_by', 'mean0clicked_by', 'mean0closed_by',
                        'median0seen_by', 'median0clicked_by', 'median0closed_by',
                        'total0seen_by', 'total0clicked_by', 'total0closed_by'), 
               names_to = 'measure', 
               values_to = 'value')
pFrame <- pFrame %>% 
  separate(measure,
           into = c('measure', 'action'),
           sep = "0")
pFrame$value <- round(pFrame$value, 1)
ggplot(pFrame, aes(x = measure,
               y = log(value),
               group = action,
               color = action,
               fill = action,
               label = value,
                    )) + 
  geom_path(size = .5) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  geom_text_repel(size = 3.5, show.legend = FALSE) + 
  scale_y_continuous(labels = comma) +
  facet_wrap(~banner, nrow = 3, scales = "free") + 
  ggtitle('2021 Actionweek') +
  xlab("Measure") + ylab("log(Value)") + 
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "top")
```


**Table 0.1.8** Daily Banner Impressions: seen by, closed by, clicked by - full dataset.

```{r echo = T, eval = T, warning = 'hide', message = FALSE}
### --- Full Dataset (Table Report)
dS <- dataSet
dS$bannername <- gsub("WMDE_wp20_aw_", "", dS$bannername)
dS <- dS[,
         c('bannername', 'day', 'device', 'banner', 
           'seen_by', 'closed_by', 'clicked_by')]
datatable(dS, 
          options = list(pageLength = 30)
          )
```

## 1. Campaign Pageviews

This section presents all data and statistics on the campaign pages.

The following chunk loads and then re-structures the dataset a bit.

```{r echo = T, eval = T, warning = 'hide', message = FALSE}
lF <- list.files('_analytics')
lF <- lF[grepl("^pageviewsAggregated", lF)]
dataSet <- lapply(paste0("_analytics/", lF), fread)
dataSet <- rbindlist(dataSet)
dataSet$V1 <- NULL
dataSet$campaign <- NULL
# - expand grid to account for missing observations per day
dS <- expand.grid(unique(dataSet$Tag), 
                  unique(dataSet$Page), 
                  unique(dataSet$date), 
                  stringsAsFactors = F)
colnames(dS) <- c('Tag', 'Page', 'date')
dS <- dS %>% 
  left_join(dataSet, 
            by = c("Tag", "Page", "date"))
dataSet <- dS; rm(dS)
dataSet$Pageviews[is.na(dataSet$Pageviews)] <- 0
dataSet$Tag <- gsub("&.+$", "", dataSet$Tag)
dataSet$Tag <- gsub("\\?campaign=", "", dataSet$Tag)
dataSet$date <- sapply(dataSet$date, function(x) {
  d <- strsplit(x, split = "-")[[1]]
  if (nchar(d[3]) == 1) {
    d[3] <- paste0("0", d[3])
  }
  return(paste(d, collapse = "-"))
})
dataSet <- dataSet %>% 
  filter(!(grepl("^%", dataSet$Tag)))
```

### 1.1 Pageviews Overview

**Chart 1.1.1** Daily Pageviews, aggregated across the campaign channels.

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 10, fig.height = 6}
pFrame <- dataSet %>% 
  select(date, Page, Pageviews) %>% 
  group_by(date, Page) %>% 
  summarise(Pageviews = sum(Pageviews))
pFrame <- arrange(pFrame, date)
ggplot(pFrame, aes(x = date,
                   y = Pageviews,
                   label = Pageviews,
                    )) + 
  geom_path(size = .5, group = 1) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  scale_y_continuous(labels = comma) +
  facet_wrap(~Page, ncol = 2) + 
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Pageviews") + 
  theme_minimal() + 
  # geom_text_repel(size = 3.5, show.legend = FALSE) + 
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 10)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")
```

**Table 1.1.1** Pageviews totals

```{r echo = T, eval = T, warning = 'hide', message = FALSE}
tFrame <- pFrame %>% 
  select(Page, Pageviews) %>% 
  group_by(Page) %>% 
  summarise(totalPageviews = sum(Pageviews)) %>% 
  arrange(desc(totalPageviews))
datatable(tFrame)
```

### 1.2 Pageviews: Campaign Channels

**Chart 1.2.1** Pageviews, by **channels**

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 10, fig.height = 6}
pFrame <- dataSet %>% 
  select(date, Tag, Pageviews) %>% 
  group_by(date, Tag) %>% 
  summarise(Pageviews = sum(Pageviews))
pFrame <- arrange(pFrame, date)
ggplot(pFrame, aes(x = date,
                   y = Pageviews,
                   label = Pageviews,
                    )) + 
  geom_path(size = .5, group = 1) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  scale_y_continuous(labels = comma) +
  facet_wrap(~Tag, ncol = 2) + 
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Pageviews") + 
  theme_minimal() + 
  # geom_text_repel(size = 3.5, show.legend = FALSE) + 
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 6)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")
```

**Chart 1.2.2** Total Pageviews, by **channels**

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 8, fig.height = 5}
pFrame <- pFrame %>% 
  select(Tag, Pageviews) %>% 
  group_by(Tag) %>% 
  summarise(Pageviews = sum(Pageviews)) %>% 
  arrange(desc(Pageviews))
pFrame$Tag <- factor(pFrame$Tag,
                     levels = pFrame$Tag,
                     ordered = T)
ggplot(pFrame, aes(x = Tag,
                   y = Pageviews,
                   label = Pageviews,
                    )) + 
  geom_bar(stat = "identity", color = "blue", fill = "white") + 
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Pageviews") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")
```

**Table 2.2.1** Total pageviews, by **channels** and by **pages**.

```{r echo = T, eval = T, warning = 'hide', message = FALSE}
tFrame <- dataSet %>% 
  select(Tag, Page, Pageviews) %>% 
  group_by(Tag, Page) %>% 
  summarise(totalPageviews = sum(Pageviews)) %>% 
  arrange(desc(totalPageviews))
datatable(tFrame)
```

## 2. User Registrations

This section presents all data and statistics on the user registrations.

### 2.1 User Registrations Overview

```{r echo = T, eval = T, warning = 'hide', message = FALSE}
lF <- list.files("_analytics")
lF <- lF[grepl("^user_registrations", lF)]
dataSet <- fread(paste0("_analytics/", lF))
dataSet <- filter(dataSet, 
                  !grepl("^test|^Test", dataSet$username))
dataSet$date <- paste(dataSet$year, 
                      ifelse(nchar(dataSet$month) == 1, paste0("0", dataSet$month), dataSet$month),
                      ifelse(nchar(dataSet$day) == 1, paste0("0", dataSet$day), dataSet$day),
                      sep = "-")
regUsers <- select(dataSet,
                   userid,
                   username, 
                   campaign)
# - total number of registered users
#  - total_registered <- dim(regUsers)[1]
# - expand grid to account for missing observations per day
dateSpan <- seq(from = as.Date("2021-03-08"), 
                to = as.Date("2021-03-21"), 
                by = "day")
dateSpan <- as.character(dateSpan)
dS <- expand.grid(dateSpan, 
                  unique(dataSet$campaign), 
                  stringsAsFactors = F)
colnames(dS) <- c('date', 'campaign')
dS <- dS %>% 
  left_join(select(dataSet, date, campaign, userid),
            by = c("date", "campaign"))
dS$userid <- ifelse(is.na(dS$userid), 0, 1)
total_registered <- sum(dS$userid)
pFrame <- dS %>% 
  select(date, userid) %>% 
  group_by(date) %>% 
  summarise(registrations = sum(userid)) %>% 
  arrange(date)
ggplot(pFrame, aes(x = date,
                   y = registrations,
                   label = registrations,
                    )) + 
  geom_path(size = .25, group = 1) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Registrations") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 7)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")
```
The total number of users registered in this campaign is `r {total_registered}`.


### 2.2 User Registrations per Campaign Channel

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 10, fig.height = 8}
pFrame <- dS %>% 
  group_by(date, campaign) %>% 
  summarise(registrations = sum(userid)) %>% 
  arrange(date)
ggplot(pFrame, aes(x = date,
                   y = registrations,
                   label = registrations,
                    )) + 
  geom_path(size = .25, group = 1) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  facet_wrap(~campaign, ncol = 2) + 
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Registrations") + 
  theme_minimal() + 
  # geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 6)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")
```

### 2.3 Total User Registrations per Campaign Channel

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 8, fig.height = 5}
pFrame <- dS %>% 
  select(campaign, userid) %>% 
  group_by( campaign) %>% 
  summarise(registrations = sum(userid)) %>% 
  arrange(desc(registrations))
pFrame$campaign <- factor(pFrame$campaign, 
                          levels = pFrame$campaign, 
                          ordered = T)
ggplot(pFrame, aes(x = campaign,
                   y = registrations,
                   label = registrations,
                    )) + 
  geom_bar(stat = "identity", color = "blue", fill = "white") + 
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Registrations") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")
```

## 3. User Edits

This section presents all data and statistics on the user edits.

### 3.1 User Edits Overview

The following chunk loads the dataset of user interactions with campaign channels and then re-structures the dataset a bit.

```{r echo = T, eval = T, warning = 'hide', message = FALSE}
lF <- list.files("_analytics/")
lF <- lF[grepl("^userEdits", lF)]
editSet <- fread(paste0("_analytics/", lF))
editSet <- select(editSet,
                  actor_name, 
                  revactor_timestamp)                 
colnames(editSet) <- c('username', 'rev_timestamp')
dataSet <- left_join(editSet,
                     select(regUsers, 
                            username,
                            campaign),
                     by = "username")
dataSet <- dataSet[complete.cases(dataSet), ]
dataSet$year <- substr(dataSet$rev_timestamp, 1, 4)
dataSet$month <- substr(dataSet$rev_timestamp, 5, 6)
dataSet$day <- substr(dataSet$rev_timestamp, 7, 8)
dataSet$rev_timestamp <- NULL
dataSet$date <- paste(dataSet$year,
                       dataSet$month,
                       dataSet$day,
                       sep = "-")
dataSet <- select(dataSet,
                  date,
                  username,
                  campaign)
dateSpan <- seq(from = as.Date("2021-03-08"), 
                to = as.Date(max(dataSet$date)), 
                by = "day")
dateSpan <- as.character(dateSpan)
dS <- expand.grid(dateSpan, 
                  unique(dataSet$campaign),
                  stringsAsFactors = F)
colnames(dS) <- c('date', 'campaign')
dS <- dS %>% 
  left_join(select(dataSet, date, campaign, username),
            by = c("date", "campaign"))
dS$username <- ifelse(is.na(dS$username), 0, 1)
pFrame <- dS %>%
  select(date, username) %>% 
  group_by(date) %>% 
  summarise(edits = sum(username)) %>% 
  arrange(date)
ggplot(pFrame, aes(x = date,
                   y = edits,
                   label = edits,
                    )) + 
  geom_path(size = .25, group = 1) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Edits") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 7)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")
```

### 3.2 User Edits by Campaign Channels

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 10, fig.height = 8}
pFrame <- dS %>% 
  group_by(date, campaign) %>% 
  summarise(edits = sum(username)) %>% 
  arrange(date)
ggplot(pFrame, aes(x = date,
                   y = edits,
                   label = edits,
                    )) + 
  geom_path(size = .25, group = 1) + 
  geom_point(size = 1.5) +
  geom_point(size = 1, fill = "white", color = "white") +
  facet_wrap(~campaign, ncol = 2) + 
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Edits") + 
  theme_minimal() + 
  # geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 5.5)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")
```

### 3.3 Total User Edits by Campaign Channels

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 8, fig.height = 5}
pFrame <- dS %>% 
  select(campaign, username) %>% 
  group_by( campaign) %>% 
  summarise(edits = sum(username)) %>% 
  arrange(desc(edits))
pFrame$campaign <- factor(pFrame$campaign, 
                          levels = pFrame$campaign, 
                          ordered = T)
ggplot(pFrame, aes(x = campaign,
                   y = edits,
                   label = edits,
                    )) + 
  geom_bar(stat = "identity", color = "blue", fill = "white") + 
  scale_y_continuous(labels = comma) +
  ggtitle('2021 Actionweek') +
  xlab("Date") + ylab("Edits") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")
```

### 3.4 Edit Classes

#### 3.4.1 Edit Classes: all users

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 8, fig.height = 6.5}
userClass <- dataSet %>% 
  select(username) %>% 
  group_by(username) %>% 
  summarise(edits = n())
editBoundaries <- list(
  c(0, 1), 
  c(2, 4),
  c(5, 9),
  c(10, 20),
  c(21, 50),
  c(51, 100), 
  c(101, 1000000)
)
userClass$editClass <- sapply(userClass$edits, function(x) {
  wEC <- sapply(editBoundaries, function(y) {
    x >= y[1] & x <= y[2]
  })
  if (sum(wEC) == 0) {
    return("> 100")
  } else {
    return(paste0("(",
                  editBoundaries[[which(wEC)]][1],
                  " - ",
                  editBoundaries[[which(wEC)]][2], 
                  ")"
                  )
    )
  }
})
editClass <- as.data.frame(table(userClass$editClass), 
                           stringsAsFactors = F)
colnames(editClass) <- c('Edit Class', 'Num.Users')
editClass$order <- as.numeric(sapply(editClass$`Edit Class`, function(x) {
  lower <- str_extract(x, '[[:digit:]]+')
}))
editClass <- arrange(editClass, order)
editClass$order <- NULL
editClass$`Edit Class`[editClass$`Edit Class` == '(0 - 1)'] <- '(1)'
editClass$`Edit Class`[editClass$`Edit Class` == '(101 - 1e+06)'] <- '(> 100)'
datatable(editClass)
```

#### 3.4.2 Edit Classes per Campaign Channels

```{r echo = T, eval = T, warning = 'hide', message = FALSE, fig.width = 8, fig.height = 6.5}
edClasses <- lapply(unique(dataSet$campaign), function(x) {
  userClass <- dataSet %>% 
  filter(campaign == x) %>% 
  select(username) %>% 
  group_by(username) %>% 
  summarise(edits = n())
editBoundaries <- list(
  c(0, 1), 
  c(2, 4),
  c(5, 9),
  c(10, 20),
  c(21, 50),
  c(51, 100), 
  c(101, 1000000)
)
userClass$editClass <- sapply(userClass$edits, function(x) {
  wEC <- sapply(editBoundaries, function(y) {
    x >= y[1] & x <= y[2]
  })
  if (sum(wEC) == 0) {
    return("> 100")
  } else {
    return(paste0("(",
                  editBoundaries[[which(wEC)]][1],
                  " - ",
                  editBoundaries[[which(wEC)]][2], 
                  ")"
                  )
    )
  }
})
userClass$editClass[userClass$editClass == '(0 - 1)'] <- '(1)'
userClass$editClass[userClass$editClass == '(101 - 1e+06)'] <- '(> 100)'
editClass <- as.data.frame(table(userClass$editClass), 
                           stringsAsFactors = F)
colnames(editClass) <- c('Edit Class', 'Num.Users')
editClass$order <- as.numeric(sapply(editClass$`Edit Class`, function(x) {
  lower <- str_extract(x, '[[:digit:]]+')
}))
editClass <- arrange(editClass, order)
editClass$order <- NULL
editClass$campaign <- x
return(editClass)
})
edClasses <- rbindlist(edClasses)
edClasses$campaign <- sapply(edClasses$campaign, function(x) {
  d <- strsplit(x, "_")[[1]]
  if (nchar(tail(d, 1)) == 1) {
    d[length(d)] <- paste0("0", d[length(d)])
  }
  d <- paste(d, collapse = "_")
  return(d)
  })
edClasses <- arrange(edClasses, campaign)
edClasses$`Edit Class`[edClasses$`Edit Class` == '(0 - 1)'] <- '(1)'
edClasses$`Edit Class` <- factor(edClasses$`Edit Class`, 
                                 levels = c('(1)',
                                            '(2 - 4)', 
                                            '(5 - 9)',
                                            '(10 - 20)',
                                            '(21 - 50)', 
                                            '(51 - 100)',
                                            '(> 100)'),
                                 ordered = T
                                 )
ggplot(edClasses, 
       aes(x = `Edit Class`, 
           y = `Num.Users`, 
           label = `Num.Users`)) + 
  geom_bar(stat = "identity", color = "black", fill = "white") + 
  facet_wrap(~campaign) + 
  ggtitle('2021 Actionweek') +
  xlab("Edit Class") + ylab("Num.users") + 
  theme_minimal() + 
  geom_text_repel(size = 3.5, show.legend = FALSE) +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  theme(plot.title = element_text(size = 10)) +
  theme(legend.title = element_blank()) + 
  theme(legend.position = "right")
```

