Sys.setenv("http_proxy" = "http://webproxy.eqiad.wmnet:8080") library(WikidataR) library(WMUtils) top_editors <- global_query("SELECT rev_user_text, COUNT(*) AS edits, MAX(rev_timestamp) AS recent_edit FROM revision INNER JOIN tag_summary ON ts_rev_id = rev_id WHERE rev_timestamp >= '20141125203901' AND rev_user > 0 AND ts_tags RLIKE('visual') GROUP BY rev_user_text;") top_editors$recent_edit <- as.numeric(top_editors$recent_edit) top_editors <- top_editors[, j = { if(length(recent_edit[recent_edit > 20150126000000]) == 0){ NULL } else { home_wiki <- project[edits == max(edits)] if(length(home_wiki) > 1){ home_wiki <- home_wiki[1] } output <- data.table(edits = sum(edits), home_wiki = home_wiki) output } }, by = "rev_user_text"] setnames(top_editors,1,"user") top_editors <- top_editors[order(top_editors$edits, decreasing = TRUE),] write.table(top_editors, "top_ve_editors.tsv", row.names = FALSE, sep = "\t", quote = TRUE) feedback_data <- do.call("rbind",lapply(get_item("Q12060906")$sitelinks,function(x){ split_title <- unlist(strsplit(x$title,":")) page_title <- split_title[nchar(split_title) == max(nchar(split_title))] if(length(page_title) > 1){ page_title <- page_title[2] } page_title <- gsub(x = page_title, pattern = " ", replacement = "_") contributors <- mysql_query(paste0("SELECT rev_user_text AS user, COUNT(*) AS edits FROM revision WHERE rev_page = ( SELECT page_id FROM page WHERE page_namespace = 4 AND page_title = '", page_title, "') AND rev_user_text NOT LIKE('%(WMF)') AND rev_user_text IN(SELECT DISTINCT(rc_user_text) FROM recentchanges WHERE rc_bot=0) GROUP BY rev_user_text HAVING COUNT(*) >= 3 ORDER BY COUNT(*) DESC;"), x$site) contributors$project <- x$site return(contributors) })) de_data <- mysql_query(paste0("SELECT rev_user_text AS user, COUNT(*) AS edits FROM revision WHERE rev_page = ( SELECT page_id FROM page WHERE page_namespace = 4 AND page_title = 'Technik/Text/Edit/VisualEditor/Beta2013-07') AND rev_user_text NOT LIKE('%(WMF)') AND rev_user_text IN(SELECT DISTINCT(rc_user_text) FROM recentchanges WHERE rc_bot=0) GROUP BY rev_user_text HAVING COUNT(*) >= 3 ORDER BY COUNT(*) DESC;"), "dewiki") feedback_data <- feedback_data[!feedback_data$user == "MediaWiki message delivery"] feedback_data <- feedback_data[!feedback_data$user %in% legacy_data$username,] feedback_data <- feedback_data[, j = { if(nrow(.SD) <= 50){ .SD } else { .SD[1:50,] } }, by = "project"] write.table(feedback_data, "ve_feedback_providers.tsv", row.names = FALSE, sep = "\t", quote = TRUE) de_data <- de_data[!de_data$user %in% feedback_data$user,] de_data <- de_data[!de_data$user == "MediaWiki message delivery"] write.table(de_data, "de_ve_feedback_providers.tsv", row.names = FALSE, sep = "\t", quote = TRUE) switched_users <- global_query("SELECT rev_user_text AS username, COUNT(*) AS edits FROM revision INNER JOIN tag_summary ON rev_id = ts_rev_id WHERE ts_tags LIKE('%isualeditor-switch%') GROUP BY rev_user_text") switched_users <- switched_users[,j={ home_wiki <- project[edits == max(edits)] if(length(home_wiki) > 1){ home_wiki <- home_wiki[1] } output <- data.table(edits = sum(edits), home_wiki = home_wiki) output }, by = "username"] switched_users <- switched_users[!switched_users$username %in% feedback_data$user & !switched_users$username %in% top_editors$user,] switched_users <- switched_users[order(switched_users$edits, decreasing = TRUE),][1:200] write.table(switched_users, "ve_switched_users.tsv", row.names = FALSE, sep = "\t", quote = TRUE) most_used <- global_query("SELECT rev_user_text AS username, COUNT(*) AS ve_edits FROM revision INNER JOIN tag_summary ON rev_id = ts_rev_id WHERE ts_tags RLIKE('visual') AND rev_timestamp BETWEEN '20121201000000' AND '20130630235959' AND rev_user_text IN (SELECT DISTINCT(rc_user_text) FROM recentchanges) GROUP BY rev_user_text ORDER BY COUNT(*) DESC;") most_used <- most_used[order(most_used$ve_edits, decreasing = TRUE),] most_used <- most_used[!most_used$username %in% feedback_data$user & !most_used$username %in% top_editors$user & !most_used$username %in% switched_users$username,][1:200,] write.table(most_used, "ve_most_used_before_deploy.tsv", row.names = FALSE, sep = "\t", quote = TRUE) stoppedusing <- global_query("SELECT rc_user_text AS username, COUNT(*) AS edits FROM recentchanges LEFT JOIN tag_summary ON rc_id = ts_rc_id AND ts_tags NOT RLIKE('visual') WHERE rc_bot = 0 AND rc_user > 0 AND rc_namespace = 0 AND rc_user_text IN (SELECT rev_user_text FROM revision INNER JOIN tag_summary ON rev_id = ts_rev_id WHERE rev_timestamp BETWEEN '20140901000000' AND '20141231000000' AND rev_user > 0 AND ts_tags RLIKE('visual') GROUP BY rev_user_text HAVING COUNT(*) >= 10 ) AND rc_user_text NOT IN (SELECT rev_user_text FROM revision INNER JOIN tag_summary ON rev_id = ts_rev_id WHERE rev_timestamp > '20141231000000' AND rev_user > 0 AND ts_tags RLIKE('visual')) GROUP BY rc_user_text HAVING COUNT(*) >= 10 ORDER BY COUNT(*) DESC") stoppedusing <- stoppedusing[!stoppedusing$username %in% feedback_data$user & !stoppedusing$username %in% top_editors$user & !stoppedusing$username %in% switched_users$username & !stoppedusing$username %in% most_used$username,] write.table(stoppedusing, "stopped_using.tsv", row.names = FALSE, sep = "\t", quote = TRUE) el_data <- mysql_query("SELECT `event_user.id` AS userid, wiki, COUNT(*) AS aborted_edits FROM Edit_11448630 WHERE event_editor = 'visualeditor' AND event_action = 'abort' AND `event_action.abort.type` = 'nochange' AND `event_user.id` > 0 GROUP BY `event_user.id`, wiki","log") wikis <- mysql_query(query = "SELECT wiki FROM wiki_info", db = "staging")$wiki el_data <- el_data[el_data$wiki %in% wikis,] el_results <- el_data[,j={ ids <- paste0("(",paste0(.SD$userid, collapse = ","),")") usernames <- as.data.table(mysql_query(paste0("SELECT user_id AS userid,user_name FROM user WHERE user_id IN",ids),wiki[1])) results <- merge(.SD, usernames, by = "userid", all.x=TRUE) results[,"userid" :=NULL,] }, by = "wiki"] de_data <- de_data[!de_data$user %in% feedback_data$user & !de_data$user %in% top_editors$user & !de_data$user %in% switched_users$username & !de_data$user %in% most_used$username,] write.table(de_data, "de_ve_feedback_providers.tsv", row.names = FALSE, sep = "\t", quote = TRUE) el_results <- el_results[!el_results$user_name %in% feedback_data$user & !el_results$user_name %in% top_editors$user & !el_results$user_name %in% switched_users$username & !el_results$user_name %in% most_used$username & !el_results$user_name %in% de_data$user,] el_results <- el_results[order(el_results$aborted_edits, decreasing = TRUE),] el_results <- el_results[1:200,] el_results <- el_results[aborted_edits >=3,] write.table(el_results, "visualeditor_aborted.tsv", row.names = FALSE, sep = "\t", quote = TRUE) mediawiki_contributors <- mysql_query("SELECT th_user_text AS username, COUNT(*) AS edits FROM thread_history WHERE th_thread IN (SELECT DISTINCT(thread_id) FROM thread WHERE thread_article_id = '74615' OR (thread_article_title = 'VisualEditor/Feedback' AND thread_article_namespace = '0')) AND th_user > 0 AND th_user_text NOT LIKE('%WMF%') GROUP BY th_user_text ORDER BY COUNT(*) DESC;","mediawikiwiki") mediawiki_contributors <- mediawiki_contributors[!mediawiki_contributors$username %in% feedback_data$user & !mediawiki_contributors$username %in% top_editors$user & !mediawiki_contributors$username %in% switched_users$username & !mediawiki_contributors$username %in% most_used$username & !mediawiki_contributors$username %in% de_data$user & !mediawiki_contributors$username %in% el_results$user_name, ] write.table(mediawiki_contributors, "mediawiki_feedback.tsv", row.names = FALSE, sep = "\t", quote = TRUE)