{ "cells": [ { "cell_type": "markdown", "id": "5c70634b-6401-4264-b01b-6bf06da926da", "metadata": { "tags": [] }, "source": [ "# Quarterly Reporting - Q1 / 2023\n", "\n", "### Notes\n", "\n", "
\n", "Open for notes\n", " \n", "- Documentation for wmfdata-python can be found at: https://github.com/wikimedia/wmfdata-python\n", "- A Kerberos connection needs to be made via `kinit` in a Jupyter Terminal for data access\n", " - The password is your Kerberos password\n", "- This notebook is ran quarterly one week before the end of the quarter\n", " - A new repo is made in `wmde-analytics/reporting/quarterly` for the given period\n", " - Stakeholders are notified via email at the end of the run\n", " - Analytics is prompted via email to add analysis of the generated metrics\n", " \n", "
\n", "\n", "### Metrics\n", "\n", "- Aggregates, one year retrospective aggregates and percentage change for the following:\n", " - Total items\n", " - Admins (interface-admin)\n", " - Bureaucrats\n", " - Property creators\n", " - Active editors (5 edits per month)\n", " - Items per active editor" ] }, { "cell_type": "code", "execution_count": 70, "id": "5a624ea0-0a19-44ab-b45e-db4d618d6b52", "metadata": {}, "outputs": [], "source": [ "from datetime import datetime\n", "from dateutil.relativedelta import relativedelta\n", "from math import ceil, floor\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import wmfdata as wmf" ] }, { "cell_type": "code", "execution_count": 68, "id": "b1af87b4-68a5-4ac7-859b-9e618f2ccdc7", "metadata": {}, "outputs": [], "source": [ "def percentage_change(first: str, second: str):\n", " change = round((second - first) / first, 4) * 100\n", " if change == 0:\n", " print(\"No change between the periods.\")\n", " elif change > 0:\n", " print(\"{:.2f} percent increase between the periods.\".format(change))\n", " elif change < 0:\n", " print(\"{:.2f} percent decrease between the periods.\".format(change))\n", " \n", " if int(repr(change)[-1]) > 4:\n", " return ceil(change * 10 ** 2) / 10 ** 2\n", " \n", " else:\n", " return floor(change * 10 ** 2) / 10 ** 2" ] }, { "cell_type": "code", "execution_count": 7, "id": "2d419b77-932c-42b8-ab9f-212974871d16", "metadata": {}, "outputs": [], "source": [ "def get_first_date_of_quarter(t: str):\n", " \"\"\"\n", " Returns a datetime.datetime string of the first date of the quarter.\n", " \"\"\"\n", " month = int(str(t).split(\"-\")[1])\n", " assert 1 <= month <= 12 , \"An invalid date was passed.\"\n", " \n", " t = t.replace(day=1)\n", " \n", " if 1 <= month < 4:\n", " t = t.replace(month = 1)\n", " \n", " elif 4 <= month < 7:\n", " t = t.replace(month = 4)\n", " \n", " elif 7 <= month < 10:\n", " t = t.replace(month = 7)\n", " \n", " elif 10 <= month < 12:\n", " t = t.replace(month = 10)\n", " \n", " return t" ] }, { "cell_type": "code", "execution_count": 8, "id": "c0e9225e-4ba1-4faa-9fb1-4ae307d77e35", "metadata": {}, "outputs": [], "source": [ "# Define the time period for the report.\n", "date_of_run = datetime.today()\n", "first_date_current_q = get_first_date_of_quarter(date_of_run)\n", "\n", "# Get values needed for subsetting.\n", "last_date_last_q = first_date_current_q - relativedelta(days = 1)\n", "first_date_last_q = first_date_current_q - relativedelta(months = 3)\n", "last_date_2q_ago = first_date_last_q - relativedelta(days = 1)\n", "first_date_2q_ago = first_date_last_q - relativedelta(months = 3)\n", "\n", "last_date_last_q_1y_ago = last_date_last_q - relativedelta(years = 1)\n", "first_date_last_q_1y_ago = first_date_last_q - relativedelta(years = 1)\n", "last_date_2q_ago_1y_ago = last_date_2q_ago - relativedelta(years = 1)\n", "first_date_2q_ago_1y_ago = first_date_2q_ago - relativedelta(years = 1)\n", "\n", "# Convert to strings for usage within query f-strings.\n", "# Note that months are for snapshots and dates are for event_timestamps. \n", "last_date_last_q = last_date_last_q.strftime('%Y-%m-%d')\n", "last_month_last_q = \"-\".join(last_date_last_q.split(\"-\")[:-1])\n", "first_date_last_q = first_date_last_q.strftime('%Y-%m-%d')\n", "\n", "last_date_2q_ago = last_date_2q_ago.strftime('%Y-%m-%d')\n", "last_month_2q_ago = \"-\".join(last_date_2q_ago.split(\"-\")[:-1])\n", "first_date_2q_ago = first_date_2q_ago.strftime('%Y-%m-%d')\n", "\n", "# One year ago date values for YOY chages.\n", "last_date_last_q_1y_ago = last_date_last_q_1y_ago.strftime('%Y-%m-%d')\n", "last_month_last_q_1y_ago = \"-\".join(last_date_last_q_1y_ago.split(\"-\")[:-1])\n", "first_date_last_q_1y_ago = first_date_last_q_1y_ago.strftime('%Y-%m-%d')\n", "\n", "last_date_2q_ago_1y_ago = last_date_2q_ago_1y_ago.strftime('%Y-%m-%d')\n", "last_month_2q_ago_1y_ago = \"-\".join(last_date_2q_ago_1y_ago.split(\"-\")[:-1])\n", "first_date_2q_ago_1y_ago = first_date_2q_ago_1y_ago.strftime('%Y-%m-%d')" ] }, { "cell_type": "code", "execution_count": 9, "id": "05880bc8-c4cd-4789-8c09-0b4ede2033b7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dates to consider for this notebook:\n", "\n", "2023-03-31\n", "2023-03\n", "2023-01-01\n", "2022-12-31\n", "2022-12\n", "2022-10-01\n", "-----\n", "2022-03-31\n", "2022-03\n", "2022-01-01\n", "2021-12-31\n", "2021-12\n", "2021-10-01\n" ] } ], "source": [ "print(\"Dates to consider for this notebook:\")\n", "print(\"\")\n", "\n", "print(last_date_last_q)\n", "print(last_month_last_q)\n", "print(first_date_last_q)\n", "print(last_date_2q_ago)\n", "print(last_month_2q_ago)\n", "print(first_date_2q_ago)\n", "print(\"-----\")\n", "print(last_date_last_q_1y_ago)\n", "print(last_month_last_q_1y_ago)\n", "print(first_date_last_q_1y_ago)\n", "print(last_date_2q_ago_1y_ago)\n", "print(last_month_2q_ago_1y_ago)\n", "print(first_date_2q_ago_1y_ago)" ] }, { "cell_type": "markdown", "id": "16220383-228a-48d0-8ca8-c32a0fd14eb5", "metadata": { "tags": [] }, "source": [ "## Items" ] }, { "cell_type": "code", "execution_count": 10, "id": "9072a781-915b-4a4a-8684-5b0bad99d34c", "metadata": {}, "outputs": [], "source": [ "def get_wd_total_pages_query(snapshot_month: str):\n", " return f\"\"\"\n", " SELECT \n", " COUNT(DISTINCT page_id) AS total_pages\n", "\n", " FROM \n", " wmf_raw.mediawiki_page\n", "\n", " WHERE \n", " wiki_db = 'wikidatawiki'\n", " AND snapshot = '{snapshot_month}'\n", " -- Main namespace for wikibase-items.\n", " AND page_namespace = 0\n", " AND page_is_redirect = false\n", " \"\"\"" ] }, { "cell_type": "markdown", "id": "72963586-f28c-46f9-ac03-c405bb7cbda8", "metadata": {}, "source": [ "### Last Quarter" ] }, { "cell_type": "code", "execution_count": 11, "id": "c5703247-9170-419a-937e-f0f2f8ec88f2", "metadata": {}, "outputs": [], "source": [ "wd_total_pages_last_q = wmf.presto.run(\n", " commands=get_wd_total_pages_query(snapshot_month=last_month_last_q)\n", ")[\"total_pages\"][0]" ] }, { "cell_type": "code", "execution_count": 12, "id": "668d4395-7d03-4dc1-87da-38f20d8fc024", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "101354507" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wd_total_pages_last_q" ] }, { "cell_type": "markdown", "id": "8b7d25fe-dc1b-4748-ad02-b064749396f9", "metadata": {}, "source": [ "### Two Quarters Ago" ] }, { "cell_type": "code", "execution_count": 13, "id": "717f49d6-1371-4534-a450-e1b2a1a78fc9", "metadata": {}, "outputs": [], "source": [ "wd_total_pages_2q_ago = wmf.presto.run(\n", " commands=get_wd_total_pages_query(snapshot_month=last_month_2q_ago)\n", ")[\"total_pages\"][0]" ] }, { "cell_type": "code", "execution_count": 14, "id": "f65c1b30-ed34-488d-a18d-5bf816d82315", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "100202415" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wd_total_pages_2q_ago" ] }, { "cell_type": "markdown", "id": "2e94a717-2be8-4902-b854-ec5929c503ee", "metadata": {}, "source": [ "### Five Quarters Ago" ] }, { "cell_type": "code", "execution_count": 15, "id": "7b565515-75d8-4f11-b548-223878cdb40c", "metadata": {}, "outputs": [], "source": [ "wd_total_pages_last_q_1y_ago = wmf.presto.run(\n", " commands=get_wd_total_pages_query(snapshot_month=last_month_last_q_1y_ago)\n", ")[\"total_pages\"][0]" ] }, { "cell_type": "code", "execution_count": 16, "id": "5fabd116-7b57-4b1d-bc04-2dcda07f7b37", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wd_total_pages_last_q_1y_ago" ] }, { "cell_type": "markdown", "id": "6cf130d4-cd7d-4185-a800-0946607aaf27", "metadata": {}, "source": [ "**Note**: we don't have total page data for five and six quarters ago." ] }, { "cell_type": "markdown", "id": "b4bda389-e0f9-4c86-bee9-d6f9d6bafb73", "metadata": {}, "source": [ "### Percentage Change" ] }, { "cell_type": "code", "execution_count": 59, "id": "4ced82e7-2c76-40c8-85a0-60f150f28f49", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.15 percent increase between the periods.\n" ] }, { "data": { "text/plain": [ "1.15" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wd_total_pages_2q_ago_last_q = percentage_change(\n", " first=wd_total_pages_2q_ago, second=wd_total_pages_last_q\n", ")\n", "wd_total_pages_2q_ago_last_q" ] }, { "cell_type": "markdown", "id": "3e0f73e7-ba2f-492c-a1d2-c57cdb7a53ff", "metadata": {}, "source": [ "## Admins" ] }, { "cell_type": "code", "execution_count": 18, "id": "eafc5a00-d823-4ba1-860c-001824be88fa", "metadata": {}, "outputs": [], "source": [ "def get_admins_query(snapshot_month: str):\n", " return f\"\"\"\n", " SELECT \n", " COUNT(DISTINCT ug_user) AS total_admins\n", "\n", " FROM \n", " wmf_raw.mediawiki_user_groups\n", "\n", " WHERE \n", " wiki_db = 'wikidatawiki'\n", " AND snapshot = '{snapshot_month}'\n", " AND ug_group = 'interface-admin'\n", " \"\"\"" ] }, { "cell_type": "markdown", "id": "e65fb2b5-a218-4d0f-9273-16b59ccc5502", "metadata": {}, "source": [ "### Last Quarter" ] }, { "cell_type": "code", "execution_count": 19, "id": "869a22b5-0a56-4fe9-8bdf-1eb06fdbda79", "metadata": {}, "outputs": [], "source": [ "total_admins_last_q = wmf.presto.run(\n", " commands=get_admins_query(snapshot_month=last_month_last_q)\n", ")[\"total_admins\"][0]" ] }, { "cell_type": "code", "execution_count": 20, "id": "bce1042d-cba6-49f9-bab2-2de698b8cd5c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "14" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_admins_last_q" ] }, { "cell_type": "markdown", "id": "f75afd8a-a2c4-40cb-a9f9-edae83907d94", "metadata": {}, "source": [ "### Two Quarters Ago" ] }, { "cell_type": "code", "execution_count": 21, "id": "7f78d289-7c64-4573-82fe-bea1652064dd", "metadata": {}, "outputs": [], "source": [ "total_admins_2q_ago = wmf.presto.run(\n", " commands=get_admins_query(snapshot_month=last_month_2q_ago)\n", ")[\"total_admins\"][0]" ] }, { "cell_type": "code", "execution_count": 22, "id": "62e24da7-de95-4be1-aaa4-ce387bd0dccd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "14" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_admins_2q_ago" ] }, { "cell_type": "markdown", "id": "f2368db2-479d-4a96-a424-f2a6519e472d", "metadata": {}, "source": [ "### Five Quarters Ago" ] }, { "cell_type": "code", "execution_count": 23, "id": "55a1380d-9546-4bf5-86a7-70cb46231a1b", "metadata": {}, "outputs": [], "source": [ "total_admins_last_q_1y_ago = wmf.presto.run(\n", " commands=get_admins_query(snapshot_month=last_month_last_q_1y_ago)\n", ")[\"total_admins\"][0]" ] }, { "cell_type": "code", "execution_count": 24, "id": "de387a38-8c94-4736-976a-cb17081d9950", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_admins_last_q_1y_ago" ] }, { "cell_type": "markdown", "id": "e52d3a12-b75e-4a25-85af-ddc4ee9660d9", "metadata": {}, "source": [ "### Percentage Change" ] }, { "cell_type": "code", "execution_count": 60, "id": "8c2dd199-cc45-417c-b648-6aa7f8273066", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No change between the periods.\n" ] }, { "data": { "text/plain": [ "0.0" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_admins_2q_ago_last_q = percentage_change(\n", " first=total_admins_2q_ago, second=total_admins_last_q\n", ")\n", "total_admins_2q_ago_last_q" ] }, { "cell_type": "markdown", "id": "09697366-9909-49e3-b6fd-49a11b727a05", "metadata": {}, "source": [ "## Bureaucrats" ] }, { "cell_type": "markdown", "id": "6133e02b-179a-4d45-9872-4e81fec94adf", "metadata": {}, "source": [ "### Last Quarter" ] }, { "cell_type": "code", "execution_count": 26, "id": "d082a0ea-6e38-4847-8dfe-174e25b83101", "metadata": {}, "outputs": [], "source": [ "def get_bureaucrats_query(snapshot_month: str):\n", " return f\"\"\"\n", " SELECT \n", " COUNT(DISTINCT ug_user) AS total_bureaucrats\n", "\n", " FROM \n", " wmf_raw.mediawiki_user_groups\n", "\n", " WHERE \n", " wiki_db = 'wikidatawiki'\n", " AND snapshot = '{snapshot_month}'\n", " AND ug_group = 'bureaucrat'\n", " \"\"\"" ] }, { "cell_type": "code", "execution_count": 27, "id": "8fd83fda-d8bd-473c-9f7f-1d11689710aa", "metadata": {}, "outputs": [], "source": [ "total_bureaucrats_last_q = wmf.presto.run(\n", " commands=get_bureaucrats_query(snapshot_month=last_month_last_q)\n", ")[\"total_bureaucrats\"][0]" ] }, { "cell_type": "code", "execution_count": 28, "id": "5e7dac9a-7049-406c-90f0-848227db0acb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_bureaucrats_last_q" ] }, { "cell_type": "markdown", "id": "8acf4904-4054-4b27-836e-0faa432a6241", "metadata": {}, "source": [ "### Two Quarters Ago" ] }, { "cell_type": "code", "execution_count": 29, "id": "99e29542-a32b-49d9-bc73-f844a4c995a3", "metadata": {}, "outputs": [], "source": [ "total_bureaucrats_2q_ago = wmf.presto.run(\n", " commands=get_bureaucrats_query(snapshot_month=last_month_2q_ago)\n", ")[\"total_bureaucrats\"][0]" ] }, { "cell_type": "code", "execution_count": 30, "id": "465260d2-b699-4754-b4ba-2bd350107d3b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_bureaucrats_2q_ago" ] }, { "cell_type": "markdown", "id": "dbdb308b-af1e-4ed4-a599-4eca74aba871", "metadata": {}, "source": [ "### Percentage Change" ] }, { "cell_type": "code", "execution_count": 61, "id": "4a3c4628-b0eb-4a49-8985-cd40b287220c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No change between the periods.\n" ] }, { "data": { "text/plain": [ "0.0" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_bureaucrats_2q_ago_last_q = percentage_change(\n", " first=total_bureaucrats_2q_ago, second=total_bureaucrats_last_q\n", ")\n", "total_bureaucrats_2q_ago_last_q" ] }, { "cell_type": "markdown", "id": "7911fc70-904c-48f1-ab44-77a7f8098820", "metadata": { "tags": [] }, "source": [ "## Property Creators" ] }, { "cell_type": "code", "execution_count": 32, "id": "2a53c73c-1c6b-47b3-84fe-4dfafad7b1f4", "metadata": {}, "outputs": [], "source": [ "def get_prop_creator_query(snapshot_month: str):\n", " return f\"\"\"\n", " SELECT \n", " COUNT(DISTINCT ug_user) AS total_prop_creators\n", "\n", " FROM \n", " wmf_raw.mediawiki_user_groups\n", "\n", " WHERE \n", " wiki_db = 'wikidatawiki'\n", " AND snapshot = '{snapshot_month}'\n", " AND ug_group = 'propertycreator'\n", " \"\"\"" ] }, { "cell_type": "markdown", "id": "afc230d2-3bf9-4722-b66b-294743691f25", "metadata": {}, "source": [ "### Last Quarter" ] }, { "cell_type": "code", "execution_count": 33, "id": "53bef33c-1820-463e-9821-32d068fb9ae4", "metadata": {}, "outputs": [], "source": [ "total_prop_creators_last_q = wmf.presto.run(\n", " commands=get_prop_creator_query(snapshot_month=last_month_last_q)\n", ")[\"total_prop_creators\"][0]" ] }, { "cell_type": "code", "execution_count": 34, "id": "0573ab2c-1271-40ba-804d-f79f18db1597", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "52" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_prop_creators_last_q" ] }, { "cell_type": "markdown", "id": "1ae44983-82ee-43bb-b832-7336a7233a2b", "metadata": {}, "source": [ "### Two Quarters Ago" ] }, { "cell_type": "code", "execution_count": 35, "id": "0c97145e-17d6-4b8d-87f5-86ac80c8d99c", "metadata": {}, "outputs": [], "source": [ "total_prop_creators_2q_ago = wmf.presto.run(\n", " commands=get_prop_creator_query(snapshot_month=last_month_2q_ago)\n", ")[\"total_prop_creators\"][0]" ] }, { "cell_type": "code", "execution_count": 36, "id": "7006f75b-0faa-4792-bbdb-0b022c997b76", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "50" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_prop_creators_2q_ago" ] }, { "cell_type": "markdown", "id": "33816232-382d-425e-a103-4b20fa8f1ce2", "metadata": {}, "source": [ "### Percentage Change" ] }, { "cell_type": "code", "execution_count": 64, "id": "f006c628-8111-4222-9e56-f72134ddcb5f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4.00 percent increase between the periods.\n" ] }, { "data": { "text/plain": [ "4.0" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_prop_creators_2q_ago_last_q = percentage_change(\n", " first=total_prop_creators_2q_ago, second=total_prop_creators_last_q\n", ")\n", "total_prop_creators_2q_ago_last_q" ] }, { "cell_type": "markdown", "id": "0d55aa39-af18-4438-9c18-f53c7c94a800", "metadata": {}, "source": [ "## Active Editors" ] }, { "cell_type": "code", "execution_count": 41, "id": "89dffd3f-174a-424d-a66d-4a4bd03fe003", "metadata": {}, "outputs": [], "source": [ "def get_active_editors_query(end_date: str, snapshot_month: str):\n", " return f\"\"\"\n", " WITH edits_per_user AS (\n", " SELECT \n", " DISTINCT event_user_id AS user_id,\n", " COUNT(event_user_id) AS total_edits\n", "\n", " FROM \n", " wmf.mediawiki_history\n", "\n", " WHERE \n", " wiki_db = 'wikidatawiki'\n", " AND event_timestamp BETWEEN CAST(DATE '{end_date}' - INTERVAL '30' DAY AS varchar) AND '{end_date}'\n", " AND snapshot = '{snapshot_month}'\n", " AND event_entity = 'revision'\n", " \n", " GROUP BY\n", " event_user_id\n", " )\n", " \n", " SELECT \n", " COUNT(DISTINCT user_id) AS total_active_editors\n", "\n", " FROM \n", " edits_per_user\n", "\n", " WHERE \n", " total_edits > 4\n", " \"\"\"" ] }, { "cell_type": "markdown", "id": "907a63f5-d03f-4fe1-87ea-f6c3720cb81c", "metadata": {}, "source": [ "### Last Quarter" ] }, { "cell_type": "code", "execution_count": 42, "id": "381541a6-5dda-4903-b9bb-7c05c95a2279", "metadata": {}, "outputs": [], "source": [ "total_active_editors_last_q = wmf.presto.run(\n", " commands=get_active_editors_query(\n", " end_date=last_date_last_q, snapshot_month=last_month_last_q\n", " )\n", ")[\"total_active_editors\"][0]" ] }, { "cell_type": "code", "execution_count": 43, "id": "ad40d018-bc35-4db0-836a-54d1302b9353", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12657" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_active_editors_last_q" ] }, { "cell_type": "markdown", "id": "0fd7b34b-d15b-4a16-bf21-dd128a122cd0", "metadata": {}, "source": [ "### Two Quarters Ago" ] }, { "cell_type": "code", "execution_count": 45, "id": "9f90c1d9-8198-4ca3-9792-b3d005ce72a0", "metadata": {}, "outputs": [], "source": [ "total_active_editors_2q_ago = wmf.presto.run(\n", " commands=get_active_editors_query(\n", " end_date=last_date_2q_ago, snapshot_month=last_month_2q_ago\n", " )\n", ")[\"total_active_editors\"][0]" ] }, { "cell_type": "code", "execution_count": 46, "id": "8ccdffe4-ad76-416c-ac25-0888495b6767", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12514" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_active_editors_2q_ago" ] }, { "cell_type": "markdown", "id": "e89c788c-3ec0-4f99-b1ab-2a340376274c", "metadata": {}, "source": [ "### Percentage Change" ] }, { "cell_type": "code", "execution_count": 71, "id": "5021eaa9-5cc9-4a85-99c2-16772fafb2a8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.14 percent increase between the periods.\n" ] }, { "data": { "text/plain": [ "1.14" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_active_editors_2q_ago_last_q = percentage_change(\n", " first=total_active_editors_2q_ago, second=total_active_editors_last_q\n", ")\n", "total_active_editors_2q_ago_last_q" ] }, { "cell_type": "markdown", "id": "645a5dbc-d911-4f32-b560-8c49b067bfde", "metadata": {}, "source": [ "## Items per Active Editor" ] }, { "cell_type": "markdown", "id": "55270fb7-5a2a-4a50-8c5d-fd062ab627dd", "metadata": {}, "source": [ "### Last Quarter" ] }, { "cell_type": "code", "execution_count": 72, "id": "7b171711-b21e-4f66-84cd-33503754fbf2", "metadata": {}, "outputs": [], "source": [ "ipae_last_q = wd_total_pages_last_q / total_active_editors_last_q" ] }, { "cell_type": "code", "execution_count": 73, "id": "d96686d2-4ab7-47ba-b642-97c90166598c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8007.782807932369" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ipae_last_q" ] }, { "cell_type": "markdown", "id": "649cdc0b-b109-4db4-a995-6c08b34f4a05", "metadata": {}, "source": [ "### Two Quarters Ago" ] }, { "cell_type": "code", "execution_count": 74, "id": "a4229e54-e248-4be6-8b7d-e40d295fe501", "metadata": {}, "outputs": [], "source": [ "ipae_2q_ago = wd_total_pages_2q_ago / total_active_editors_2q_ago" ] }, { "cell_type": "code", "execution_count": 75, "id": "79f112ac-e48a-4a06-b6c2-d3cbd3a841e6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8007.225107879175" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ipae_2q_ago" ] }, { "cell_type": "markdown", "id": "72748825-df53-41b7-815b-aeea124ac96d", "metadata": {}, "source": [ "### Percentage Change" ] }, { "cell_type": "code", "execution_count": 76, "id": "2c3d0f49-52f3-446b-a6f1-2dbf724d15e7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.01 percent increase between the periods.\n" ] }, { "data": { "text/plain": [ "0.01" ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ipae_2q_ago_last_q = percentage_change(\n", " first=ipae_2q_ago, second=ipae_last_q\n", ")\n", "ipae_2q_ago_last_q" ] }, { "cell_type": "markdown", "id": "72b67fe5-b1d6-450a-8478-519c357bcfab", "metadata": {}, "source": [ "## Output DB" ] }, { "cell_type": "code", "execution_count": 77, "id": "0b5c7ceb-0b1c-4dfd-b798-a6d975b3d3af", "metadata": {}, "outputs": [], "source": [ "output_db_cols = [\n", " \"year\",\n", " \"quarter\",\n", " \"total_pages\",\n", " \"total_pages_1q_change\", \n", " \"active_editors\",\n", " \"active_editors_1q_change\",\n", " \"items_per_active_editor\",\n", " \"items_per_active_editor_1q_change\",\n", " \"total_admins\",\n", " \"total_admins_1q_change\",\n", " \"total_bureaucrats\", \n", " \"total_bureaucrats_1q_change\",\n", " \"total_prop_creators\",\n", " \"total_prop_creators_1q_change\"\n", "]\n", "reporting_db = pd.DataFrame(index=range(2), columns=output_db_cols)" ] }, { "cell_type": "code", "execution_count": 78, "id": "21d6a570-fc16-4cf4-b90a-d58a38bacbf1", "metadata": {}, "outputs": [], "source": [ "reporting_db.loc[0] = [\n", " 2023, \n", " \"Q1\", \n", " wd_total_pages_last_q, \n", " wd_total_pages_2q_ago_last_q,\n", " total_active_editors_last_q,\n", " total_active_editors_2q_ago_last_q,\n", " ipae_last_q,\n", " ipae_2q_ago_last_q,\n", " total_admins_last_q,\n", " total_admins_2q_ago_last_q,\n", " total_bureaucrats_last_q,\n", " total_bureaucrats_2q_ago_last_q,\n", " total_prop_creators_last_q,\n", " total_prop_creators_2q_ago_last_q\n", "]\n", "reporting_db.loc[1] = [\n", " 2022,\n", " \"Q4\", \n", " wd_total_pages_2q_ago, \n", " np.nan,\n", " total_active_editors_2q_ago,\n", " np.nan,\n", " ipae_2q_ago,\n", " np.nan,\n", " total_admins_2q_ago,\n", " np.nan,\n", " total_bureaucrats_2q_ago,\n", " np.nan,\n", " total_prop_creators_2q_ago,\n", " np.nan\n", "]" ] }, { "cell_type": "code", "execution_count": 79, "id": "29175014-12fb-409c-9c9c-a7cb1587ef79", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearquartertotal_pagestotal_pages_1q_changeactive_editorsactive_editors_1q_changeitems_per_active_editoritems_per_active_editor_1q_changetotal_adminstotal_admins_1q_changetotal_bureaucratstotal_bureaucrats_1q_changetotal_prop_creatorstotal_prop_creators_1q_change
02023Q11013545071.15126571.148007.7828080.01140.030.0524.0
12022Q4100202415NaN12514NaN8007.225108NaN14NaN3NaN50NaN
\n", "
" ], "text/plain": [ " year quarter total_pages total_pages_1q_change active_editors \\\n", "0 2023 Q1 101354507 1.15 12657 \n", "1 2022 Q4 100202415 NaN 12514 \n", "\n", " active_editors_1q_change items_per_active_editor \\\n", "0 1.14 8007.782808 \n", "1 NaN 8007.225108 \n", "\n", " items_per_active_editor_1q_change total_admins total_admins_1q_change \\\n", "0 0.01 14 0.0 \n", "1 NaN 14 NaN \n", "\n", " total_bureaucrats total_bureaucrats_1q_change total_prop_creators \\\n", "0 3 0.0 52 \n", "1 3 NaN 50 \n", "\n", " total_prop_creators_1q_change \n", "0 4.0 \n", "1 NaN " ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reporting_db" ] }, { "cell_type": "code", "execution_count": 152, "id": "a0f0a5c9-6fb0-49df-a6b5-fed64dea3aad", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Notebook last run at 2023-05-30 10:47 using Wmfdata v2.0.0.\n" ] } ], "source": [ "now = datetime.now().isoformat(sep=' ', timespec='minutes')\n", "version = wmf.metadata.version\n", "print(f\"Notebook last run at {now} using Wmfdata v{version}.\")" ] }, { "cell_type": "code", "execution_count": null, "id": "016dc260-30b1-43e0-a1bd-559e312ad537", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }