{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "1d91d9c4-e0d9-4747-aac3-2ad59f3f80b3", "metadata": {}, "outputs": [], "source": [ "import wmfdata" ] }, { "cell_type": "code", "execution_count": 2, "id": "50446eed-3a67-4146-92cc-e7e782530b13", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SPARK_HOME: /usr/lib/spark3\n", "Using Hadoop client lib jars at 3.2.0, provided by Spark.\n", "PYSPARK_PYTHON=/opt/conda-analytics/bin/python3\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Setting default log level to \"WARN\".\n", "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", "23/08/03 15:16:14 WARN SparkConf: Note that spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone/kubernetes and LOCAL_DIRS in YARN).\n", "23/08/03 15:16:14 WARN Utils: Service 'sparkDriver' could not bind on port 12000. Attempting port 12001.\n", "23/08/03 15:16:14 WARN Utils: Service 'sparkDriver' could not bind on port 12001. Attempting port 12002.\n", "23/08/03 15:16:14 WARN Utils: Service 'sparkDriver' could not bind on port 12002. Attempting port 12003.\n", "23/08/03 15:16:15 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.\n", "23/08/03 15:16:15 WARN Utils: Service 'SparkUI' could not bind on port 4041. Attempting port 4042.\n", "23/08/03 15:16:15 WARN Utils: Service 'SparkUI' could not bind on port 4042. Attempting port 4043.\n", "23/08/03 15:16:22 WARN Utils: Service 'org.apache.spark.network.netty.NettyBlockTransferService' could not bind on port 13000. Attempting port 13001.\n", "23/08/03 15:16:22 WARN Utils: Service 'org.apache.spark.network.netty.NettyBlockTransferService' could not bind on port 13001. Attempting port 13002.\n", "23/08/03 15:16:22 WARN Utils: Service 'org.apache.spark.network.netty.NettyBlockTransferService' could not bind on port 13002. Attempting port 13003.\n", "23/08/03 15:16:22 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Attempted to request executors before the AM has registered!\n" ] } ], "source": [ "session = wmfdata.spark.create_session()" ] }, { "cell_type": "code", "execution_count": 3, "id": "df4575c7-ace3-485f-8332-6abbe56d0408", "metadata": {}, "outputs": [], "source": [ "query = \"\"\"SELECT * from event.mediawiki_edit_attempt WHERE year=2023 AND month=7 AND day=26\"\"\"" ] }, { "cell_type": "code", "execution_count": 5, "id": "532739d5-1674-4169-93d9-e0046fc00e34", "metadata": {}, "outputs": [], "source": [ "df = session.sql(query)" ] }, { "cell_type": "code", "execution_count": 6, "id": "e979c5b6-92f3-4dd3-b386-d7bf5e1e9bae", "metadata": {}, "outputs": [], "source": [ "from pyspark.sql import functions as fun" ] }, { "cell_type": "code", "execution_count": 7, "id": "de26e70d-e6b3-4c80-99f4-37c7342fdeca", "metadata": {}, "outputs": [], "source": [ "df_ea = df.select(fun.monotonically_increasing_id().alias(\"_id\"), \"*\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "76b5e57c-dd8e-42d7-a4f6-6d4bd3f90fff", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "23/08/03 15:16:59 WARN SessionState: METASTORE_FILTER_HOOK will be ignored, since hive.security.authorization.manager is set to instance of HiveAuthorizerFactory.\n", "[Stage 1:> (0 + 1) / 1]\r" ] }, { "name": "stdout", "output_type": "stream", "text": [ "+---+-------+-------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------+----------------------------------------+----------+----+-----+---+----+\n", "|_id|_schema|agent |custom_data |dt |http |mediawiki |meta |name |page |performer |user_agent_map |is_wmf_domain|normalized_host |datacenter|year|month|day|hour|\n", "+---+-------+-------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------+----------------------------------------+----------+----+-----+---+----+\n", "|0 |null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, integration -> {string, page}, init_mechanism -> {string, click}, editing_session_id -> {string, d60155bf719757080298}, editor_interface -> {string, wikitext-2017}, wiki -> {string, ruwiki}, init_type -> {string, page}, skin -> {string, vector-2022}} |2023-07-26T19:01:35.838Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, ruwiki} |{ru.wikipedia.org, 2023-07-26T19:01:47.840Z, d1b2ecad-1085-42e2-b800-9a51c33e6fd7, 16c3d18c-00af-4063-91be-ac0e498e4f52, mediawiki.edit_attempt, null} |eas.ve.init |{121-я танковая бригада, null, 1236815, null, 0, null, 131961493, null, null, null, null} |{null, 9336, null, null, 2052914, null, true, null, null, null, 68528870035b7abb26cb, null, 08c48af152809ef43796} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 115, browser_family -> Firefox, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, ru, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|1 |null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, ready_timing -> {number, 449}, integration -> {string, page}, editing_session_id -> {string, 1c5703880d6fa833048c}, editor_interface -> {string, visualeditor}, wiki -> {string, plwiki}, skin -> {string, vector-2022}} |2023-07-26T19:39:13.742Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, plwiki} |{pl.wikipedia.org, 2023-07-26T19:39:14.047Z, 49e42291-5677-4f7e-8189-68e37cc6a0b8, 48a29708-16bc-415e-884e-137e96544e4b, mediawiki.edit_attempt, null} |eas.ve.ready |{Kate Roven, null, 0, null, 0, null, 0, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, 326374386faf1d59b4b0, null, 7f3c93b8bfb99bd0e315} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 115, browser_family -> Firefox, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, pl, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|2 |null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, ready_timing -> {number, 1341}, integration -> {string, page}, editing_session_id -> {string, 2c28625514186907b8c0d27419b4afc8}, editor_interface -> {string, wikitext}, wiki -> {string, commonswiki}, skin -> {string, vector}} |2023-07-26T19:27:31.097Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0}, null, null} |{false, null, null, null, null, 1.41.0-wmf.19, commonswiki} |{commons.wikimedia.org, 2023-07-26T19:27:44.230Z, 7af3e8f0-8ca8-4888-84bd-672260433137, 1df4aecf-50cf-41da-ae68-6ca08e3fe084, mediawiki.edit_attempt, null}|eas.wt.ready |{January 2020 in the Städteregion Aachen, null, 133514399, null, 14, null, 0, null, null, null, null} |{null, 34970, null, null, 474, null, true, null, null, null, 59f669d029c32f1669b2, null, 1dc86d2517470b28f243} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 115, browser_family -> Firefox, device_family -> Other, wmf_app_version -> -} |true |{wikimedia, commons, [], org, wikimedia}|eqiad |2023|7 |26 |19 |\n", "|3 |null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, abort_timing -> {number, 1005}, integration -> {string, page}, editing_session_id -> {string, d80989b05ddef6c3720d3b1056616c19}, editor_interface -> {string, wikitext}, bucket -> {string, test}, abort_type -> {string, nochange}, wiki -> {string, dewiki}, skin -> {string, vector}} |2023-07-26T19:24:24.909Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.183}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, dewiki} |{de.wikipedia.org, 2023-07-26T19:24:24.906Z, 68127ebf-004c-409c-8b40-ffb70a56fe8a, cf22af7c-dae9-41d3-aa3f-f6c98cf54fd4, mediawiki.edit_attempt, null} |eas.wt.abort |{Michael Kalkbrenner, null, 6424900, null, 0, null, 0, null, null, null, null} |{null, 103138, null, null, 1405358, null, true, null, null, null, a84e8fd0455acd6e5044, null, edaa3d287401dfe0a86f}|{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 115, browser_family -> Edge, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, de, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|4 |null |{null, mediawiki_js, mobile_browser} |{is_bot -> {boolean, false}, integration -> {string, page}, init_mechanism -> {string, click}, editing_session_id -> {string, dc7c45039b1e468b78a4}, editor_interface -> {string, wikitext}, wiki -> {string, frwiki}, init_type -> {string, section}, skin -> {string, minerva}} |2023-07-26T19:29:57.236Z|{null, null, null, {user-agent -> Mozilla/5.0 (iPhone; CPU iPhone OS 16_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/115.0.5790.130 Mobile/15E148 Safari/604.1}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, frwiki} |{fr.wikipedia.org, 2023-07-26T19:30:27.745Z, 617d7f09-89b9-4feb-b771-f259949109ab, 1e206d12-9384-49f9-aa0d-a432201a441a, mediawiki.edit_attempt, null} |eas.mf.init |{Liste des pseudonymes utilisés par Arsène Lupin, null, 11563402, null, 0, null, 206306391, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, fb20e5eb3d1a430a83dc, null, ef6890da04ade47a25bc} |{os_family -> iOS, os_major -> 16, os_minor -> 1, browser_major -> 115, browser_family -> Chrome Mobile iOS, device_family -> iPhone, wmf_app_version -> -} |true |{wikipedia, fr, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|5 |null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, abort_timing -> {number, 24139}, integration -> {string, page}, editing_session_id -> {string, f7ee8ad605eeb176702d340e92e8ffb0}, editor_interface -> {string, wikitext}, abort_type -> {string, nochange}, wiki -> {string, enwiki}, skin -> {string, vector-2022}} |2023-07-26T19:53:55.983Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, enwiki} |{en.wikipedia.org, 2023-07-26T19:53:55.785Z, b94af115-3b27-47e2-a622-c02734812ed4, 78c180ee-a7ca-4c50-84b3-477c1f5d0e1e, mediawiki.edit_attempt, null} |eas.wt.abort |{Richard C. Kessler, null, 33314642, null, 0, null, 0, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, bf3432f4ed580757d5a6, null, 2e126ff98ed2a642a16d} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 114, browser_family -> Chrome, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, en, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|6 |null |{null, mediawiki_js, mobile_browser} |{is_bot -> {boolean, false}, abort_timing -> {number, 3467}, integration -> {string, page}, abort_mechanism -> {string, cancel}, editing_session_id -> {string, dc9cffe33d14f106f195}, editor_interface -> {string, wikitext}, abort_type -> {string, nochange}, wiki -> {string, ruwikisource}, skin -> {string, minerva}}|2023-07-26T19:56:11.915Z|{null, null, null, {user-agent -> Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.19, ruwikisource}|{ru.wikisource.org, 2023-07-26T19:56:16.564Z, 589e4658-dc01-4e2b-8982-841a7111596c, badea4ef-2581-43fd-9ec3-e4ad1492d58f, mediawiki.edit_attempt, null} |eas.mf.abort |{Дьявол в быту, легенде и в литературе Средних веков (Амфитеатров), null, 932745, null, 0, null, 3996770, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, a6e23d16d90eba52ac30, null, 657f6d5d0005d2bf0241} |{os_family -> Android, os_major -> 10, os_minor -> -, browser_major -> 114, browser_family -> Chrome Mobile, device_family -> K, wmf_app_version -> -} |true |{wikisource, ru, [], org, wikisource} |eqiad |2023|7 |26 |19 |\n", "|7 |null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, integration -> {string, page}, init_mechanism -> {string, url-new}, editing_session_id -> {string, 3308687f7a1ee1786c2c}, editor_interface -> {string, visualeditor}, wiki -> {string, ruwiki}, init_type -> {string, page}, skin -> {string, vector}} |2023-07-26T19:30:54.979Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, ruwiki} |{ru.wikipedia.org, 2023-07-26T19:31:09.310Z, d74e1240-f6ab-48dd-9a0d-3a2af06177b4, cf299e08-afe6-49f2-a012-546cc25ec4c2, mediawiki.edit_attempt, null} |eas.ve.init |{Charlotte of Naples, null, 0, null, 0, null, 0, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, ae70394f54438601c69a, null, 00c8329f90fc1a8864ab} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 114, browser_family -> Chrome, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, ru, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|8 |null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, integration -> {string, page}, loaded_timing -> {number, 829}, editing_session_id -> {string, d3e90fe416693a039cb8af14261216ab}, editor_interface -> {string, wikitext}, wiki -> {string, enwiki}, skin -> {string, vector-2022}} |2023-07-26T19:54:16.327Z|{null, null, null, {user-agent -> Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.2 Safari/605.1.15}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, enwiki} |{en.wikipedia.org, 2023-07-26T19:54:18.633Z, 8f3d4559-7f85-48cf-a494-18bcd7611114, dacc17b7-5c8a-4829-a91d-c1ebced86d41, mediawiki.edit_attempt, null} |eas.wt.loaded |{1844 United States presidential election, null, 40514, null, 0, null, 0, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, 6f77953b9d7cfa29c8c6, null, 951a1a46099b897e0869} |{os_family -> Mac OS X, os_major -> 10, os_minor -> 15, browser_major -> 16, browser_family -> Safari, device_family -> Mac, wmf_app_version -> -} |true |{wikipedia, en, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|9 |null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, ready_timing -> {number, 894}, integration -> {string, page}, editing_session_id -> {string, 7f82db3fc6b8f03054b017b055767f25}, editor_interface -> {string, wikitext}, bucket -> {string, test}, wiki -> {string, ukwiki}, skin -> {string, vector}} |2023-07-26T19:29:04.448Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, ukwiki} |{uk.wikipedia.org, 2023-07-26T19:29:10.680Z, 6da03dc6-074d-4157-a9be-8340527b4c36, 81fd1d3c-19e9-4c16-abfe-1092c31588b3, mediawiki.edit_attempt, null} |eas.wt.ready |{Сезон збірної України з футболу 2001, null, 1274226, null, 0, null, 0, null, null, null, null} |{null, 5788, null, null, 123842, null, true, null, null, null, bb136b590ac410796a33, null, 075b81a52decdfdaa3f0} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 114, browser_family -> Chrome, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, uk, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|10 |null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, ready_timing -> {number, 699}, integration -> {string, page}, editing_session_id -> {string, 233cf7dabfd7790b3f20}, editor_interface -> {string, visualeditor}, wiki -> {string, dewiki}, skin -> {string, vector}} |2023-07-26T19:47:35.686Z|{null, null, null, {user-agent -> Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.2 Safari/605.1.15}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, dewiki} |{de.wikipedia.org, 2023-07-26T19:47:44.579Z, 056769a6-5454-4518-b76a-fdc88ab5c8b0, cdb8976c-6f19-46e0-bc68-59ad963a3f59, mediawiki.edit_attempt, null} |eas.ve.ready |{SE & Co. KG, null, 0, null, 0, null, 0, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, 305b11d3010464c83d8e, null, 4436b42f66d5f66a677e} |{os_family -> Mac OS X, os_major -> 10, os_minor -> 15, browser_major -> 16, browser_family -> Safari, device_family -> Mac, wmf_app_version -> -} |true |{wikipedia, de, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|11 |null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, integration -> {string, page}, first_change_timing -> {number, 1988}, editing_session_id -> {string, 5f268ed5c720152087ea62979f89d717}, editor_interface -> {string, wikitext}, wiki -> {string, commonswiki}, skin -> {string, vector}} |2023-07-26T19:37:15.954Z|{null, null, null, {user-agent -> Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.19, commonswiki} |{commons.wikimedia.org, 2023-07-26T19:37:22.061Z, 41d920c2-9cdc-447d-81b2-b3f737512ae9, 3e48c299-5daa-4503-9305-270a59f3ab6c, mediawiki.edit_attempt, null}|eas.wt.first_change|{The site of King Edward III Manor House - geograph.org.uk - 4003462.jpg, null, 125553725, null, 6, null, 0, null, null, null, null}|{null, null, null, null, 0, null, false, null, null, null, 779d8980d30ff2b774bf, null, b7e526c05fb3dfc22a45} |{os_family -> Chrome OS, os_major -> 14541, os_minor -> 0, browser_major -> 115, browser_family -> Chrome, device_family -> Other, wmf_app_version -> -} |true |{wikimedia, commons, [], org, wikimedia}|eqiad |2023|7 |26 |19 |\n", "|12 |null |{null, mediawiki_js, mobile_browser} |{is_bot -> {boolean, false}, abort_timing -> {number, 2216}, integration -> {string, page}, abort_mechanism -> {string, cancel}, editing_session_id -> {string, 4cc2c5bda9ddb512bbfd}, editor_interface -> {string, wikitext}, abort_type -> {string, nochange}, wiki -> {string, fawiki}, skin -> {string, minerva}} |2023-07-26T19:44:46.839Z|{null, null, null, {user-agent -> Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-N950F) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/22.0 Chrome/111.0.5563.116 Mobile Safari/537.36}, null, null}|{false, null, null, null, null, 1.41.0-wmf.18, fawiki} |{fa.wikipedia.org, 2023-07-26T19:45:16.535Z, 559e2595-e3b7-4cb5-b74b-f1afc09781f0, 3c8f2337-88e3-4beb-b484-59b9c183b2b7, mediawiki.edit_attempt, null} |eas.mf.abort |{اعصاب مغزی, null, 416379, null, 0, null, 37561026, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, 0c110774119ad03420f0, null, 652e2a319110eb8f3a5f} |{os_family -> Android, os_major -> 9, os_minor -> -, browser_major -> 22, browser_family -> Samsung Internet, device_family -> Samsung SM-N950F, wmf_app_version -> -}|true |{wikipedia, fa, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|13 |null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, integration -> {string, page}, first_change_timing -> {number, 2867}, editing_session_id -> {string, e1951bdcb0953a49f42832efdbf1a15f}, editor_interface -> {string, wikitext}, wiki -> {string, enwiki}, skin -> {string, vector-2022}} |2023-07-26T19:39:15.626Z|{null, null, null, {user-agent -> Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, enwiki} |{en.wikipedia.org, 2023-07-26T19:39:42.980Z, 56fab6de-8e63-4d31-a9dd-eaef679f5861, 323f5159-cff8-4e57-93f3-1ebc70ece3c3, mediawiki.edit_attempt, null} |eas.wt.first_change|{Carlton Lassiter, null, 5926955, null, 0, null, 0, null, null, null, null} |{null, 219, null, null, 46259126, null, true, null, null, null, 972b7dd5f7ffebaebe1a, null, 0cd2442ab7eff16d4726} |{os_family -> Mac OS X, os_major -> 10, os_minor -> 15, browser_major -> 14, browser_family -> Safari, device_family -> Mac, wmf_app_version -> -} |true |{wikipedia, en, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|14 |null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, abort_timing -> {number, 1290}, integration -> {string, page}, editing_session_id -> {string, 79c72f2adf8c862adce29880e8292212}, editor_interface -> {string, wikitext}, abort_type -> {string, nochange}, wiki -> {string, elwiktionary}, skin -> {string, vector}} |2023-07-26T19:28:58.158Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.19, elwiktionary}|{el.wiktionary.org, 2023-07-26T19:28:57.847Z, 7dc16438-8f8b-4875-bca7-339c23d13830, fff5ba7f-ec3a-4af2-abab-86e8323773f3, mediawiki.edit_attempt, null} |eas.wt.abort |{Γκιόκας, null, 887003, null, 0, null, 0, null, null, null, null} |{null, 4622, null, null, 35893, null, true, null, null, null, 7576e35b08da46c3f54c, null, 8d2f108776cc895c24ca} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 114, browser_family -> Chrome, device_family -> Other, wmf_app_version -> -} |true |{wiktionary, el, [], org, wiktionary} |eqiad |2023|7 |26 |19 |\n", "|15 |null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, integration -> {string, page}, save_success_timing -> {number, 866}, editing_session_id -> {string, ba43cb803df367d00f71}, editor_interface -> {string, visualeditor}, wiki -> {string, frwiki}, revision_id -> {number, 206387050}, skin -> {string, timeless}} |2023-07-26T19:02:57.186Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, frwiki} |{fr.wikipedia.org, 2023-07-26T19:03:16.192Z, a909ae47-3f02-4216-9438-d0fbe22432c0, 1fd8502f-d184-42cd-b7f4-7a691787d75c, mediawiki.edit_attempt, null} |eas.ve.save_success|{Tak (jeu), null, 13423315, null, 0, null, 205367041, null, null, null, null} |{null, 16809, null, null, 2968598, null, true, null, null, null, 6ddb48b477818a359967, null, 95784e0a47688cc55cab} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 115, browser_family -> Firefox, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, fr, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|16 |null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, integration -> {string, page}, editing_session_id -> {string, 79b0ba226894f5205ad0}, editor_interface -> {string, visualeditor}, wiki -> {string, enwiki}, skin -> {string, vector}, save_attempt_timing -> {number, 1179}} |2023-07-26T19:00:11.490Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, enwiki} |{en.wikipedia.org, 2023-07-26T19:00:13.296Z, a1e3d2a5-c4d7-4107-81fe-c6e81af283a2, 81e3aa4a-1d67-44dd-b0cc-977bc48ddae1, mediawiki.edit_attempt, null} |eas.ve.save_attempt|{Free city of stratford ok/sandbox, null, 70941645, null, 2, null, 1167099610, null, null, null, null} |{null, 928, null, null, 43447169, null, true, null, null, null, 015723936ef3c944fea2, null, fa5602cdb9bdf8356757} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 114, browser_family -> Chrome, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, en, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|17 |null |{null, mediawiki_js, mobile_browser} |{is_bot -> {boolean, false}, integration -> {string, page}, init_mechanism -> {string, new}, editing_session_id -> {string, 00f8b90178d20635adb6}, editor_interface -> {string, wikitext}, wiki -> {string, elwiki}, init_type -> {string, section}, skin -> {string, minerva}} |2023-07-26T19:22:33.883Z|{null, null, null, {user-agent -> Mozilla/5.0 (Linux; Android 13; 2201116SG) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36 EdgA/114.0.1823.74}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, elwiki} |{el.wikipedia.org, 2023-07-26T19:22:44.460Z, 23b61bbf-cd37-402e-bb1a-cbfa4b33acb1, d7bf5aa8-4196-4b4f-84de-98789d56b2d8, mediawiki.edit_attempt, null} |eas.mf.init |{Ιφιγένεια Ματάτη, null, 0, null, 0, null, 0, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, 58c9cd6f07694191b531, null, f1f8a649ccb64c6018ac} |{os_family -> Android, os_major -> 13, os_minor -> -, browser_major -> 114, browser_family -> Edge Mobile, device_family -> 2201116SG, wmf_app_version -> -} |true |{wikipedia, el, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|18 |null |{null, mediawiki_js, mobile_browser} |{is_bot -> {boolean, false}, integration -> {string, page}, init_mechanism -> {string, click}, editing_session_id -> {string, 285fe186cd7886c55b7c}, editor_interface -> {string, wikitext}, wiki -> {string, eswiki}, init_type -> {string, section}, skin -> {string, minerva}} |2023-07-26T19:42:40.867Z|{null, null, null, {user-agent -> Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, eswiki} |{es.wikipedia.org, 2023-07-26T19:43:12.936Z, 2298c7da-643a-451f-9795-fcbf9a90e57b, 3578e193-0074-48a0-ab35-8773857ff05d, mediawiki.edit_attempt, null} |eas.mf.init |{Ecosistema acuático, null, 7510, null, 0, null, 152452771, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, f264ff3092db2b5a3f08, null, 606d94e031a382436cc1} |{os_family -> Android, os_major -> 10, os_minor -> -, browser_major -> 114, browser_family -> Chrome Mobile, device_family -> K, wmf_app_version -> -} |true |{wikipedia, es, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "|19 |null |{null, mediawiki_js, mobile_browser} |{is_bot -> {boolean, false}, ready_timing -> {number, 1433}, integration -> {string, page}, editing_session_id -> {string, 747d2f001621dc4652d2}, editor_interface -> {string, wikitext}, wiki -> {string, enwiki}, skin -> {string, minerva}} |2023-07-26T19:06:49.842Z|{null, null, null, {user-agent -> Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, enwiki} |{en.wikipedia.org, 2023-07-26T19:07:18.572Z, daedc6fd-8905-430b-9170-604cce6c90fb, 1aabfa2e-4845-4bcf-a926-6286f9334533, mediawiki.edit_attempt, null} |eas.mf.ready |{List of Grey's Anatomy episodes, null, 2805792, null, 0, null, 1163904955, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, 07ae964802322b1e730d, null, 014a72327a2c49b40847} |{os_family -> Android, os_major -> 10, os_minor -> -, browser_major -> 114, browser_family -> Chrome Mobile, device_family -> K, wmf_app_version -> -} |true |{wikipedia, en, [], org, wikipedia} |eqiad |2023|7 |26 |19 |\n", "+---+-------+-------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------+----------------------------------------+----------+----+-----+---+----+\n", "only showing top 20 rows\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " \r" ] } ], "source": [ "df_ea.show(truncate=False)" ] }, { "cell_type": "code", "execution_count": 9, "id": "49a526c1-3154-4949-af94-cfa814a13c9f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " \r" ] } ], "source": [ "df_ea.write.saveAsTable(\"cjming.mp_ea_copy_with_id\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "04a4d9a5-b547-4841-96ff-5ec289da5f7d", "metadata": {}, "outputs": [], "source": [ "df_ea_cd = df.select(fun.monotonically_increasing_id().alias(\"_id\"), \"custom_data\")" ] }, { "cell_type": "code", "execution_count": 11, "id": "026fd098-f5b1-46c6-bf52-14ad1818b4ef", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[Stage 3:> (0 + 1) / 1]\r" ] }, { "name": "stdout", "output_type": "stream", "text": [ "+---+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", "|_id|custom_data |\n", "+---+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", "|0 |{is_bot -> {boolean, false}, integration -> {string, page}, init_mechanism -> {string, click}, editing_session_id -> {string, d60155bf719757080298}, editor_interface -> {string, wikitext-2017}, wiki -> {string, ruwiki}, init_type -> {string, page}, skin -> {string, vector-2022}} |\n", "|1 |{is_bot -> {boolean, false}, ready_timing -> {number, 449}, integration -> {string, page}, editing_session_id -> {string, 1c5703880d6fa833048c}, editor_interface -> {string, visualeditor}, wiki -> {string, plwiki}, skin -> {string, vector-2022}} |\n", "|2 |{is_bot -> {boolean, false}, ready_timing -> {number, 1341}, integration -> {string, page}, editing_session_id -> {string, 2c28625514186907b8c0d27419b4afc8}, editor_interface -> {string, wikitext}, wiki -> {string, commonswiki}, skin -> {string, vector}} |\n", "|3 |{is_bot -> {boolean, false}, abort_timing -> {number, 1005}, integration -> {string, page}, editing_session_id -> {string, d80989b05ddef6c3720d3b1056616c19}, editor_interface -> {string, wikitext}, bucket -> {string, test}, abort_type -> {string, nochange}, wiki -> {string, dewiki}, skin -> {string, vector}} |\n", "|4 |{is_bot -> {boolean, false}, integration -> {string, page}, init_mechanism -> {string, click}, editing_session_id -> {string, dc7c45039b1e468b78a4}, editor_interface -> {string, wikitext}, wiki -> {string, frwiki}, init_type -> {string, section}, skin -> {string, minerva}} |\n", "|5 |{is_bot -> {boolean, false}, abort_timing -> {number, 24139}, integration -> {string, page}, editing_session_id -> {string, f7ee8ad605eeb176702d340e92e8ffb0}, editor_interface -> {string, wikitext}, abort_type -> {string, nochange}, wiki -> {string, enwiki}, skin -> {string, vector-2022}} |\n", "|6 |{is_bot -> {boolean, false}, abort_timing -> {number, 3467}, integration -> {string, page}, abort_mechanism -> {string, cancel}, editing_session_id -> {string, dc9cffe33d14f106f195}, editor_interface -> {string, wikitext}, abort_type -> {string, nochange}, wiki -> {string, ruwikisource}, skin -> {string, minerva}}|\n", "|7 |{is_bot -> {boolean, false}, integration -> {string, page}, init_mechanism -> {string, url-new}, editing_session_id -> {string, 3308687f7a1ee1786c2c}, editor_interface -> {string, visualeditor}, wiki -> {string, ruwiki}, init_type -> {string, page}, skin -> {string, vector}} |\n", "|8 |{is_bot -> {boolean, false}, integration -> {string, page}, loaded_timing -> {number, 829}, editing_session_id -> {string, d3e90fe416693a039cb8af14261216ab}, editor_interface -> {string, wikitext}, wiki -> {string, enwiki}, skin -> {string, vector-2022}} |\n", "|9 |{is_bot -> {boolean, false}, ready_timing -> {number, 894}, integration -> {string, page}, editing_session_id -> {string, 7f82db3fc6b8f03054b017b055767f25}, editor_interface -> {string, wikitext}, bucket -> {string, test}, wiki -> {string, ukwiki}, skin -> {string, vector}} |\n", "|10 |{is_bot -> {boolean, false}, ready_timing -> {number, 699}, integration -> {string, page}, editing_session_id -> {string, 233cf7dabfd7790b3f20}, editor_interface -> {string, visualeditor}, wiki -> {string, dewiki}, skin -> {string, vector}} |\n", "|11 |{is_bot -> {boolean, false}, integration -> {string, page}, first_change_timing -> {number, 1988}, editing_session_id -> {string, 5f268ed5c720152087ea62979f89d717}, editor_interface -> {string, wikitext}, wiki -> {string, commonswiki}, skin -> {string, vector}} |\n", "|12 |{is_bot -> {boolean, false}, abort_timing -> {number, 2216}, integration -> {string, page}, abort_mechanism -> {string, cancel}, editing_session_id -> {string, 4cc2c5bda9ddb512bbfd}, editor_interface -> {string, wikitext}, abort_type -> {string, nochange}, wiki -> {string, fawiki}, skin -> {string, minerva}} |\n", "|13 |{is_bot -> {boolean, false}, integration -> {string, page}, first_change_timing -> {number, 2867}, editing_session_id -> {string, e1951bdcb0953a49f42832efdbf1a15f}, editor_interface -> {string, wikitext}, wiki -> {string, enwiki}, skin -> {string, vector-2022}} |\n", "|14 |{is_bot -> {boolean, false}, abort_timing -> {number, 1290}, integration -> {string, page}, editing_session_id -> {string, 79c72f2adf8c862adce29880e8292212}, editor_interface -> {string, wikitext}, abort_type -> {string, nochange}, wiki -> {string, elwiktionary}, skin -> {string, vector}} |\n", "|15 |{is_bot -> {boolean, false}, integration -> {string, page}, save_success_timing -> {number, 866}, editing_session_id -> {string, ba43cb803df367d00f71}, editor_interface -> {string, visualeditor}, wiki -> {string, frwiki}, revision_id -> {number, 206387050}, skin -> {string, timeless}} |\n", "|16 |{is_bot -> {boolean, false}, integration -> {string, page}, editing_session_id -> {string, 79b0ba226894f5205ad0}, editor_interface -> {string, visualeditor}, wiki -> {string, enwiki}, skin -> {string, vector}, save_attempt_timing -> {number, 1179}} |\n", "|17 |{is_bot -> {boolean, false}, integration -> {string, page}, init_mechanism -> {string, new}, editing_session_id -> {string, 00f8b90178d20635adb6}, editor_interface -> {string, wikitext}, wiki -> {string, elwiki}, init_type -> {string, section}, skin -> {string, minerva}} |\n", "|18 |{is_bot -> {boolean, false}, integration -> {string, page}, init_mechanism -> {string, click}, editing_session_id -> {string, 285fe186cd7886c55b7c}, editor_interface -> {string, wikitext}, wiki -> {string, eswiki}, init_type -> {string, section}, skin -> {string, minerva}} |\n", "|19 |{is_bot -> {boolean, false}, ready_timing -> {number, 1433}, integration -> {string, page}, editing_session_id -> {string, 747d2f001621dc4652d2}, editor_interface -> {string, wikitext}, wiki -> {string, enwiki}, skin -> {string, minerva}} |\n", "+---+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", "only showing top 20 rows\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " \r" ] } ], "source": [ "df_ea_cd.show(truncate=False)" ] }, { "cell_type": "code", "execution_count": 12, "id": "9eec156b-544f-4380-90e1-df1792ddffa9", "metadata": {}, "outputs": [], "source": [ "df_ea_cd1 = df_ea.select(\"_id\", fun.explode(\"custom_data\").alias(\"custom_data_name\", \"cd_value\"))" ] }, { "cell_type": "code", "execution_count": 13, "id": "de069a9f-14df-4b83-b355-7976409633f4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+---+------------------+------------------------------------------+\n", "|_id|custom_data_name |cd_value |\n", "+---+------------------+------------------------------------------+\n", "|0 |is_bot |{boolean, false} |\n", "|0 |integration |{string, page} |\n", "|0 |init_mechanism |{string, click} |\n", "|0 |editing_session_id|{string, d60155bf719757080298} |\n", "|0 |editor_interface |{string, wikitext-2017} |\n", "|0 |wiki |{string, ruwiki} |\n", "|0 |init_type |{string, page} |\n", "|0 |skin |{string, vector-2022} |\n", "|1 |is_bot |{boolean, false} |\n", "|1 |ready_timing |{number, 449} |\n", "|1 |integration |{string, page} |\n", "|1 |editing_session_id|{string, 1c5703880d6fa833048c} |\n", "|1 |editor_interface |{string, visualeditor} |\n", "|1 |wiki |{string, plwiki} |\n", "|1 |skin |{string, vector-2022} |\n", "|2 |is_bot |{boolean, false} |\n", "|2 |ready_timing |{number, 1341} |\n", "|2 |integration |{string, page} |\n", "|2 |editing_session_id|{string, 2c28625514186907b8c0d27419b4afc8}|\n", "|2 |editor_interface |{string, wikitext} |\n", "+---+------------------+------------------------------------------+\n", "only showing top 20 rows\n", "\n" ] } ], "source": [ "df_ea_cd1.show(truncate=False)" ] }, { "cell_type": "code", "execution_count": 14, "id": "eebcad61-b40d-4a93-98b7-b03a9bf5d47d", "metadata": {}, "outputs": [], "source": [ "df_ea_cd2 = df_ea_cd1.select(\"_id\", \"custom_data_name\", \"cd_value.*\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "f07ab1bb-4330-4454-a315-80d2537f7bc6", "metadata": {}, "outputs": [], "source": [ "df_ea_custom_data = df_ea_cd2.withColumnRenamed(\"data_type\", \"custom_data_type\").withColumnRenamed(\"value\", \"custom_data_value\")" ] }, { "cell_type": "code", "execution_count": 16, "id": "dd33f248-c588-4983-bace-a8da0d9dbfbe", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+---+------------------+----------------+--------------------------------+\n", "|_id|custom_data_name |custom_data_type|custom_data_value |\n", "+---+------------------+----------------+--------------------------------+\n", "|0 |is_bot |boolean |false |\n", "|0 |integration |string |page |\n", "|0 |init_mechanism |string |click |\n", "|0 |editing_session_id|string |d60155bf719757080298 |\n", "|0 |editor_interface |string |wikitext-2017 |\n", "|0 |wiki |string |ruwiki |\n", "|0 |init_type |string |page |\n", "|0 |skin |string |vector-2022 |\n", "|1 |is_bot |boolean |false |\n", "|1 |ready_timing |number |449 |\n", "|1 |integration |string |page |\n", "|1 |editing_session_id|string |1c5703880d6fa833048c |\n", "|1 |editor_interface |string |visualeditor |\n", "|1 |wiki |string |plwiki |\n", "|1 |skin |string |vector-2022 |\n", "|2 |is_bot |boolean |false |\n", "|2 |ready_timing |number |1341 |\n", "|2 |integration |string |page |\n", "|2 |editing_session_id|string |2c28625514186907b8c0d27419b4afc8|\n", "|2 |editor_interface |string |wikitext |\n", "+---+------------------+----------------+--------------------------------+\n", "only showing top 20 rows\n", "\n" ] } ], "source": [ "df_ea_custom_data.show(truncate=False)" ] }, { "cell_type": "code", "execution_count": 17, "id": "171dbc34-7305-4ab2-9156-1520354df1d1", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " \r" ] } ], "source": [ "df_ea_custom_data.write.saveAsTable(\"cjming.mp_ea_custom_data_vertical\")" ] }, { "cell_type": "code", "execution_count": 18, "id": "253a9545-4ae8-4082-ad66-403a341041d4", "metadata": {}, "outputs": [], "source": [ "df_ea_cd_flat = df_ea_custom_data.groupBy(\"_id\").agg(fun.first(\"custom_data_name\").alias(\"custom_data_name1\"),\n", " fun.first(\"custom_data_type\").alias(\"custom_data_type1\"),\n", " fun.first(\"custom_data_value\").alias(\"custom_data_value1\"),\n", " fun.last(\"custom_data_name\").alias(\"custom_data_name2\"),\n", " fun.last(\"custom_data_type\").alias(\"custom_data_type2\"),\n", " fun.last(\"custom_data_value\").alias(\"custom_data_value2\")\n", " )" ] }, { "cell_type": "code", "execution_count": 19, "id": "e23bb5a7-4682-4113-95bb-79d9bb3a3c0b", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[Stage 7:=======================================================> (64 + 2) / 66]\r" ] }, { "name": "stdout", "output_type": "stream", "text": [ "+----+-----------------+-----------------+------------------+-----------------+-----------------+------------------+\n", "|_id |custom_data_name1|custom_data_type1|custom_data_value1|custom_data_name2|custom_data_type2|custom_data_value2|\n", "+----+-----------------+-----------------+------------------+-----------------+-----------------+------------------+\n", "|93 |is_bot |boolean |false |skin |string |minerva |\n", "|171 |is_bot |boolean |false |skin |string |minerva |\n", "|232 |is_bot |boolean |false |skin |string |minerva |\n", "|271 |is_bot |boolean |false |skin |string |vector |\n", "|405 |is_bot |boolean |false |skin |string |vector |\n", "|849 |is_bot |boolean |false |skin |string |minerva |\n", "|1238|is_bot |boolean |false |skin |string |vector |\n", "|1365|is_bot |boolean |false |skin |string |vector-2022 |\n", "|1785|is_bot |boolean |false |skin |string |minerva |\n", "|1898|is_bot |boolean |false |skin |string |vector-2022 |\n", "|1990|is_bot |boolean |false |skin |string |vector |\n", "|2154|is_bot |boolean |false |skin |string |vector-2022 |\n", "|2353|is_bot |boolean |false |skin |string |monobook |\n", "|2835|is_bot |boolean |false |skin |string |minerva |\n", "|2851|is_bot |boolean |false |skin |string |vector-2022 |\n", "|3512|is_bot |boolean |false |skin |string |vector |\n", "|3554|is_bot |boolean |false |skin |string |vector |\n", "|3633|is_bot |boolean |false |skin |string |vector |\n", "|3662|is_bot |boolean |false |skin |string |vector-2022 |\n", "|3813|is_bot |boolean |false |skin |string |vector |\n", "+----+-----------------+-----------------+------------------+-----------------+-----------------+------------------+\n", "only showing top 20 rows\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " \r" ] } ], "source": [ "df_ea_cd_flat.show(truncate=False)" ] }, { "cell_type": "code", "execution_count": 20, "id": "2b5a51cb-7988-4d83-8fe8-8bb90e7d966a", "metadata": {}, "outputs": [], "source": [ "df_ea_core_custom_data_2 = df_ea.join(df_ea_cd_flat, df_ea._id == df_ea_cd_flat._id, \"inner\").drop(df_ea_cd_flat._id)" ] }, { "cell_type": "code", "execution_count": 21, "id": "5dc31712-45ce-48ba-a15f-0b8d9d8c4e41", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[Stage 11:> (0 + 1) / 1]\r" ] }, { "name": "stdout", "output_type": "stream", "text": [ "+-------+-------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+--------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------+------------------------------------------+----------+----+-----+---+----+----+-----------------+-----------------+------------------+-----------------+-----------------+------------------+\n", "|_schema|agent |custom_data |dt |http |mediawiki |meta |name |page |performer |user_agent_map |is_wmf_domain|normalized_host |datacenter|year|month|day|hour|_id |custom_data_name1|custom_data_type1|custom_data_value1|custom_data_name2|custom_data_type2|custom_data_value2|\n", "+-------+-------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+--------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------+------------------------------------------+----------+----+-----+---+----+----+-----------------+-----------------+------------------+-----------------+-----------------+------------------+\n", "|null |{null, mediawiki_js, mobile_browser} |{is_bot -> {boolean, false}, integration -> {string, discussiontools}, init_mechanism -> {string, click}, editing_session_id -> {string, 56fcd61deea43a86c34d}, editor_interface -> {string, visualeditor}, wiki -> {string, kawiki}, init_type -> {string, page}, skin -> {string, minerva}} |2023-07-26T19:23:48.916Z|{null, null, null, {user-agent -> Mozilla/5.0 (iPhone; CPU iPhone OS 16_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/273.0.547966426 Mobile/15E148 Safari/604.1}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, kawiki} |{ka.wikipedia.org, 2023-07-26T19:24:19.475Z, 1cd48294-a9d7-4732-bb5a-ac7b6546676d, f8c51f7d-142e-4785-abda-5d2ccd835ebc, mediawiki.edit_attempt, null} |eas.dt.init |{თოვლის პაპა, null, 169260, null, 1, null, 4534412, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, afc65473f575d89cc77b, null, 46c1506df3ed56182807} |{os_family -> iOS, os_major -> 16, os_minor -> 1, browser_major -> 273, browser_family -> Google, device_family -> iPhone, wmf_app_version -> -} |true |{wikipedia, ka, [], org, wikipedia} |eqiad |2023|7 |26 |19 |93 |is_bot |boolean |false |skin |string |minerva |\n", "|null |{null, mediawiki_js, mobile_browser} |{is_bot -> {boolean, false}, integration -> {string, page}, loaded_timing -> {number, 278}, editing_session_id -> {string, 9de1f0fdd2dc665f20cf}, editor_interface -> {string, wikitext}, wiki -> {string, enwiki}, skin -> {string, minerva}} |2023-07-26T19:56:16.984Z|{null, null, null, {user-agent -> Mozilla/5.0 (iPhone; CPU iPhone OS 16_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.2 Mobile/15E148 Safari/604.1}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, enwiki} |{en.wikipedia.org, 2023-07-26T19:56:21.626Z, 78af6445-5b61-4b46-9441-2cffeca72dd9, 7fc822c3-6edb-41e7-8659-1f619d8c83d1, mediawiki.edit_attempt, null} |eas.mf.loaded |{Boaz and Jachin, null, 2064582, null, 0, null, 1141310470, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, 262dbf594a742b1dbf5e, null, 1ac77fb79b9e2028e402} |{os_family -> iOS, os_major -> 16, os_minor -> 5, browser_major -> 16, browser_family -> Mobile Safari, device_family -> iPhone, wmf_app_version -> -} |true |{wikipedia, en, [], org, wikipedia} |eqiad |2023|7 |26 |19 |171 |is_bot |boolean |false |skin |string |minerva |\n", "|null |{null, mediawiki_js, mobile_browser} |{is_bot -> {boolean, false}, integration -> {string, page}, loaded_timing -> {number, 1376}, editing_session_id -> {string, ade67c1e6db884dc5063}, editor_interface -> {string, wikitext}, wiki -> {string, itwiki}, skin -> {string, minerva}} |2023-07-26T19:19:11.289Z|{null, null, null, {user-agent -> Mozilla/5.0 (iPhone; CPU iPhone OS 15_7_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.4 Mobile/15E148 Safari/604.1}, null, null} |{false, null, null, null, null, 1.41.0-wmf.19, itwiki} |{it.wikipedia.org, 2023-07-26T19:19:40.466Z, 609044c5-649a-47dc-8a83-89b6f30d01ce, 9dd88401-91c1-430c-9334-825767795caf, mediawiki.edit_attempt, null} |eas.mf.loaded |{Cima di rapa, null, 1314885, null, 0, null, 133575477, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, 3b2cd84f4d05fb8cd773, null, 4ce23cdf29be9a342654} |{os_family -> iOS, os_major -> 15, os_minor -> 7, browser_major -> 15, browser_family -> Mobile Safari, device_family -> iPhone, wmf_app_version -> -} |true |{wikipedia, it, [], org, wikipedia} |eqiad |2023|7 |26 |19 |232 |is_bot |boolean |false |skin |string |minerva |\n", "|null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, ready_timing -> {number, 2289}, integration -> {string, page}, editing_session_id -> {string, 343a76e62d9dc1abdcc08d7603515138}, editor_interface -> {string, wikitext}, wiki -> {string, eswiktionary}, skin -> {string, vector}} |2023-07-26T19:12:14.992Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0}, null, null} |{false, null, null, null, null, 1.41.0-wmf.19, eswiktionary}|{es.wiktionary.org, 2023-07-26T19:12:18.212Z, adf92b9c-556f-4aec-a8b3-dd010f203a2a, 6f6d9deb-59aa-4598-b87c-94fa410f06bd, mediawiki.edit_attempt, null} |eas.wt.ready |{ϋ, null, 0, null, 0, null, 0, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, a5fbc0b330bfd0feb156, null, da1246c04757970cb1c6} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 115, browser_family -> Firefox, device_family -> Other, wmf_app_version -> -} |true |{wiktionary, es, [], org, wiktionary} |eqiad |2023|7 |26 |19 |271 |is_bot |boolean |false |skin |string |vector |\n", "|null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, integration -> {string, page}, loaded_timing -> {number, 1711}, editing_session_id -> {string, ef54f77ccac73db3f876ce8dec76e4ed}, editor_interface -> {string, wikitext}, wiki -> {string, ruwiki}, skin -> {string, vector}} |2023-07-26T19:37:20.699Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 YaBrowser/23.7.0.2526 Yowser/2.5 Safari/537.36}, null, null}|{false, null, null, null, null, 1.41.0-wmf.18, ruwiki} |{ru.wikipedia.org, 2023-07-26T19:37:24.295Z, c2a6b023-79ee-4749-b480-7b93ec5320e3, 46e572c3-f792-4da8-83dc-253cb5a91387, mediawiki.edit_attempt, null} |eas.wt.loaded |{Линдси, Лиам, null, 10290950, null, 0, null, 0, null, null, null, null} |{null, 17629, null, null, 3042831, null, true, null, null, null, 19a545392251a717ea2f, null, 3ac5c1e14facc886a2c3}|{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 23, browser_family -> Yandex Browser, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, ru, [], org, wikipedia} |eqiad |2023|7 |26 |19 |405 |is_bot |boolean |false |skin |string |vector |\n", "|null |{null, mediawiki_js, mobile_browser} |{is_bot -> {boolean, false}, integration -> {string, page}, init_mechanism -> {string, click}, editing_session_id -> {string, 1c6fb9ef3e72fdc7f771}, editor_interface -> {string, wikitext}, wiki -> {string, eswiki}, init_type -> {string, section}, skin -> {string, minerva}} |2023-07-26T19:44:28.981Z|{null, null, null, {user-agent -> Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, eswiki} |{es.wikipedia.org, 2023-07-26T19:44:41.696Z, c05ef17a-b59f-4a97-abc8-1416a2084513, ab5b5ddb-6f2f-46c0-9338-a24739232c57, mediawiki.edit_attempt, null} |eas.mf.init |{Jumbo-Visma, null, 152062, null, 0, null, 152693879, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, 4a203cdc07ac2285b113, null, 8709cc5915c25806a2ff} |{os_family -> Android, os_major -> 10, os_minor -> -, browser_major -> 114, browser_family -> Chrome Mobile, device_family -> K, wmf_app_version -> -} |true |{wikipedia, es, [], org, wikipedia} |eqiad |2023|7 |26 |19 |849 |is_bot |boolean |false |skin |string |minerva |\n", "|null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, ready_timing -> {number, 3147}, integration -> {string, page}, editing_session_id -> {string, 75e3494b70a95dd475fa7643062c5450}, editor_interface -> {string, wikitext}, wiki -> {string, enwiki}, skin -> {string, vector}} |2023-07-26T19:56:34.476Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, enwiki} |{en.wikipedia.org, 2023-07-26T19:55:19.572Z, 30b29dae-110b-4ae5-9b1f-f3c301fa8564, 9d37005d-c113-4a4e-828a-2b332846fbac, mediawiki.edit_attempt, null} |eas.wt.ready |{2021 Summer World University Games, null, 53654189, null, 0, null, 0, null, null, null, null} |{null, 1745, null, null, 44216329, null, true, null, null, null, 56335a3a1d866cb5c300, null, b6bc1301eec461967d9d}|{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 114, browser_family -> Chrome, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, en, [], org, wikipedia} |eqiad |2023|7 |26 |19 |1238|is_bot |boolean |false |skin |string |vector |\n", "|null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, ready_timing -> {number, 1289}, integration -> {string, page}, editing_session_id -> {string, c051cc72e9954654d1b42340f3427590}, editor_interface -> {string, wikitext}, wiki -> {string, eswiki}, skin -> {string, vector-2022}} |2023-07-26T19:20:03.005Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, eswiki} |{es.wikipedia.org, 2023-07-26T19:17:35.374Z, c0628f59-64c4-47a0-91eb-d0d184014f07, 6a5b1fe4-4721-4109-92ac-2e45e6ecc1f3, mediawiki.edit_attempt, null} |eas.wt.ready |{Pueblo Viejo (distrito), null, 0, null, 0, null, 0, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, 951010e93aaa6bcc57d3, null, 1fc973515b5d4139e98e} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 101, browser_family -> Chrome, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, es, [], org, wikipedia} |eqiad |2023|7 |26 |19 |1365|is_bot |boolean |false |skin |string |vector-2022 |\n", "|null |{null, mediawiki_js, mobile_browser} |{is_bot -> {boolean, false}, ready_timing -> {number, 1091}, integration -> {string, page}, editing_session_id -> {string, 7b9af25f08804c268ad7}, editor_interface -> {string, wikitext}, wiki -> {string, be_x_oldwiki}, skin -> {string, minerva}} |2023-07-26T19:43:11.059Z|{null, null, null, {user-agent -> Mozilla/5.0 (Linux; Android 10) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/114.0.5735.196 Mobile DuckDuckGo/5 Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, be_x_oldwiki}|{be-tarask.wikipedia.org, 2023-07-26T19:43:20.798Z, 25d2baef-cd94-4063-8dc7-508d17ff050d, 56b1bdad-6d96-43b9-a04d-74de5686e365, mediawiki.edit_attempt, null}|eas.mf.ready |{Копішча, null, 0, null, 14, null, 0, null, null, null, null} |{null, 2573, null, null, 78688, null, true, null, null, null, 7fb12f5e933a20028941, null, 112ccf5368060aa41faf} |{os_family -> Android, os_major -> 10, os_minor -> -, browser_major -> 5, browser_family -> DuckDuckGo Mobile, device_family -> Generic Smartphone, wmf_app_version -> -}|true |{wikipedia, be-tarask, [], org, wikipedia}|eqiad |2023|7 |26 |19 |1785|is_bot |boolean |false |skin |string |minerva |\n", "|null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, integration -> {string, page}, loaded_timing -> {number, 899}, editing_session_id -> {string, 96824cdde59db9f169da}, editor_interface -> {string, visualeditor}, wiki -> {string, frwiki}, skin -> {string, vector-2022}} |2023-07-26T19:38:27.343Z|{null, null, null, {user-agent -> Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.1 Safari/605.1.15}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, frwiki} |{fr.wikipedia.org, 2023-07-26T19:38:31.731Z, ed417597-f809-4c41-96e4-6ed633c993fb, 5a5dafa1-1c06-4a97-b360-dca99816939c, mediawiki.edit_attempt, null} |eas.ve.loaded |{Martinière (ruisseau), null, 0, null, 0, null, 0, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, 2e42a2545d2892be4e09, null, de71a7b564f0f8245a33} |{os_family -> Mac OS X, os_major -> 10, os_minor -> 15, browser_major -> 16, browser_family -> Safari, device_family -> Mac, wmf_app_version -> -} |true |{wikipedia, fr, [], org, wikipedia} |eqiad |2023|7 |26 |19 |1898|is_bot |boolean |false |skin |string |vector-2022 |\n", "|null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, ready_timing -> {number, 963}, integration -> {string, page}, editing_session_id -> {string, fbbfac6833f8b53aaae52d98c4fa1981}, editor_interface -> {string, wikitext}, wiki -> {string, itwikivoyage}, skin -> {string, vector}} |2023-07-26T19:12:06.214Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.19, itwikivoyage}|{it.wikivoyage.org, 2023-07-26T19:12:36.138Z, 515b0c55-3ad5-4195-abd8-911d16d93dc7, 43785065-91bd-4224-992e-25ed6e57bea4, mediawiki.edit_attempt, null} |eas.wt.ready |{Teseo/Sandbox, null, 21878, null, 2, null, 0, null, null, null, null} |{null, 26406, null, null, 3816, null, true, null, null, null, fb9a20cc139ee863fece, null, f68eff377a70e58588c8} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 114, browser_family -> Chrome, device_family -> Other, wmf_app_version -> -} |true |{wikivoyage, it, [], org, wikivoyage} |eqiad |2023|7 |26 |19 |1990|is_bot |boolean |false |skin |string |vector |\n", "|null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, abort_timing -> {number, 10666}, integration -> {string, page}, editing_session_id -> {string, a26d51b010cee70def7a010c3a60b381}, editor_interface -> {string, wikitext}, abort_type -> {string, nochange}, wiki -> {string, enwiki}, skin -> {string, vector-2022}}|2023-07-26T19:15:39.652Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, enwiki} |{en.wikipedia.org, 2023-07-26T19:15:40.274Z, ec4be75a-0add-4a7a-89f2-ef3b3fe0ae50, a6cfb9e4-e496-4f25-9687-bc9ee5fe3d46, mediawiki.edit_attempt, null} |eas.wt.abort |{Military ranks of the Gambia, null, 55292065, null, 0, null, 0, null, null, null, null} |{null, 1822, null, null, 192622, null, true, null, null, null, 6ee0f847842e70570d3e, null, e2e27c6dcf13c60dff75} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 115, browser_family -> Firefox, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, en, [], org, wikipedia} |eqiad |2023|7 |26 |19 |2154|is_bot |boolean |false |skin |string |vector-2022 |\n", "|null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, integration -> {string, page}, loaded_timing -> {number, 2926}, editing_session_id -> {string, 26e1f3f518df15af56d4f39c4991c3f3}, editor_interface -> {string, wikitext}, bucket -> {string, test}, wiki -> {string, trwiki}, skin -> {string, monobook}} |2023-07-26T19:53:02.809Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, trwiki} |{tr.wikipedia.org, 2023-07-26T19:53:33.674Z, 4b77d5c2-86e3-4077-9302-4ae27fb8be70, 18ca348b-50c6-41d7-83f4-f7d1d7e48db2, mediawiki.edit_attempt, null} |eas.wt.loaded |{Vikiveri görsel, null, 1748531, null, 10, null, 0, null, null, null, null} |{null, 207125, null, null, 166490, null, true, null, null, null, 639a168aa09687a88933, null, f61aad976348e58d82cd}|{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 115, browser_family -> Firefox, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, tr, [], org, wikipedia} |eqiad |2023|7 |26 |19 |2353|is_bot |boolean |false |skin |string |monobook |\n", "|null |{null, mediawiki_js, mobile_browser} |{is_bot -> {boolean, false}, integration -> {string, page}, init_mechanism -> {string, new}, editing_session_id -> {string, efddc7fd7c6c064d783d}, editor_interface -> {string, wikitext}, wiki -> {string, dewiki}, init_type -> {string, section}, skin -> {string, minerva}} |2023-07-26T19:16:28.283Z|{null, null, null, {user-agent -> Mozilla/5.0 (iPhone; CPU iPhone OS 15_7_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, dewiki} |{de.wikipedia.org, 2023-07-26T19:16:30.870Z, 67f9a261-4a78-4319-9cad-f66140503f78, bc2abe82-dd36-4759-8071-d4dd6e619296, mediawiki.edit_attempt, null} |eas.mf.init |{Landesgartenschau Wangen im Allgäu 2024, null, 0, null, 0, null, 0, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, f77d8287f41e5c0d2ad6, null, 9b3f62ab375385972b70} |{os_family -> iOS, os_major -> 15, os_minor -> 7, browser_major -> -, browser_family -> Mobile Safari UI/WKWebView, device_family -> iPhone, wmf_app_version -> -} |true |{wikipedia, de, [], org, wikipedia} |eqiad |2023|7 |26 |19 |2835|is_bot |boolean |false |skin |string |minerva |\n", "|null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, ready_timing -> {number, 857}, integration -> {string, page}, editing_session_id -> {string, 8bc91cb4d4314b53cacb2e1fe8831bff}, editor_interface -> {string, wikitext}, wiki -> {string, enwiki}, skin -> {string, vector-2022}} |2023-07-26T19:55:21.008Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 OPR/100.0.0.0}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, enwiki} |{en.wikipedia.org, 2023-07-26T19:55:41.922Z, bfb25ed8-9461-4fc2-9ece-c95ae1e14f33, fa75decd-bd46-4e1e-baa6-b1e5e129303d, mediawiki.edit_attempt, null} |eas.wt.ready |{Linda Thomas-Greenfield, null, 46258473, null, 0, null, 0, null, null, null, null} |{null, 3, null, null, 45565573, null, true, null, null, null, 28fe8aad651ad3978a10, null, 94cf7c26e123be250bf7} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 100, browser_family -> Opera, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, en, [], org, wikipedia} |eqiad |2023|7 |26 |19 |2851|is_bot |boolean |false |skin |string |vector-2022 |\n", "|null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, ready_timing -> {number, 1675}, integration -> {string, page}, editing_session_id -> {string, c487faa94b5d735f0708}, editor_interface -> {string, visualeditor}, wiki -> {string, huwiki}, skin -> {string, vector}} |2023-07-26T19:30:12.367Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, huwiki} |{hu.wikipedia.org, 2023-07-26T19:30:24.808Z, 37ae5ec1-e9d1-4ba3-a227-5b8e44afe1c2, 67b2f798-b3be-4928-abf0-1d029fdaa5fb, mediawiki.edit_attempt, null} |eas.ve.ready |{Pápai Faragó László, null, 0, null, 0, null, 0, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, c39c46aa8391573691c4, null, bdb832ba409884e8a12a} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 114, browser_family -> Chrome, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, hu, [], org, wikipedia} |eqiad |2023|7 |26 |19 |3512|is_bot |boolean |false |skin |string |vector |\n", "|null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, integration -> {string, page}, loaded_timing -> {number, 1233}, editing_session_id -> {string, 054de64ee3a3c664d24f}, editor_interface -> {string, visualeditor}, bucket -> {string, control}, wiki -> {string, rowiki}, skin -> {string, vector}} |2023-07-26T19:00:53.217Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, rowiki} |{ro.wikipedia.org, 2023-07-26T19:01:19.895Z, 3671726b-b594-496e-86b8-bceb90eeac80, 4c47e452-cb64-4e82-a3e3-761071440653, mediawiki.edit_attempt, null} |eas.ve.loaded |{Partidul Socialist Muncitoresc Spaniol, null, 1118532, null, 0, null, 15580735, null, null, null, null}|{null, 487, null, null, 595753, null, true, null, null, null, edfbfc75d23f86b07ec3, null, c8cf328595825a8d2dc3} |{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 115, browser_family -> Firefox, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, ro, [], org, wikipedia} |eqiad |2023|7 |26 |19 |3554|is_bot |boolean |false |skin |string |vector |\n", "|null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, integration -> {string, page}, first_change_timing -> {number, 982}, editing_session_id -> {string, 95928439414683e4954958d58aa77935}, editor_interface -> {string, wikitext}, wiki -> {string, ruwiki}, skin -> {string, vector}} |2023-07-26T19:11:25.861Z|{null, null, null, {user-agent -> Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 YaBrowser/23.7.0.2526 Yowser/2.5 Safari/537.36}, null, null}|{false, null, null, null, null, 1.41.0-wmf.18, ruwiki} |{ru.wikipedia.org, 2023-07-26T19:11:29.885Z, 525ba1ef-65b4-4a9b-a393-767a61ede93f, e5794bbd-c55c-408a-bfa3-9162b7a6e94e, mediawiki.edit_attempt, null} |eas.wt.first_change|{Сёке, Юлиус, null, 6878412, null, 0, null, 0, null, null, null, null} |{null, 25006, null, null, 3107761, null, true, null, null, null, 7702c4469a024c66d375, null, 2bbebf128c85ad935990}|{os_family -> Windows, os_major -> 10, os_minor -> -, browser_major -> 23, browser_family -> Yandex Browser, device_family -> Other, wmf_app_version -> -} |true |{wikipedia, ru, [], org, wikipedia} |eqiad |2023|7 |26 |19 |3633|is_bot |boolean |false |skin |string |vector |\n", "|null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, ready_timing -> {number, 758}, integration -> {string, page}, editing_session_id -> {string, fcce1153da44adaeb2c523a3944ab08f}, editor_interface -> {string, wikitext}, wiki -> {string, enwiki}, skin -> {string, vector-2022}} |2023-07-26T19:57:54.560Z|{null, null, null, {user-agent -> Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, enwiki} |{en.wikipedia.org, 2023-07-26T19:58:07.495Z, 6d2f84f9-3183-4105-bd2d-f0f58d34fd21, 5fb706be-be18-4f8b-bb77-0d91bcdc9035, mediawiki.edit_attempt, null} |eas.wt.ready |{Fellowes Brands, null, 3912790, null, 0, null, 0, null, null, null, null} |{null, 3, null, null, 46296429, null, true, null, null, null, f45250104823dd0bed67, null, d75fe017a1110c370c5b} |{os_family -> Mac OS X, os_major -> 10, os_minor -> 15, browser_major -> 114, browser_family -> Chrome, device_family -> Mac, wmf_app_version -> -} |true |{wikipedia, en, [], org, wikipedia} |eqiad |2023|7 |26 |19 |3662|is_bot |boolean |false |skin |string |vector-2022 |\n", "|null |{null, mediawiki_js, desktop_browser}|{is_bot -> {boolean, false}, integration -> {string, page}, loaded_timing -> {number, 426}, editing_session_id -> {string, 14f912ddf9e4ebe878f6}, editor_interface -> {string, visualeditor}, wiki -> {string, dewiki}, skin -> {string, vector}} |2023-07-26T19:57:31.830Z|{null, null, null, {user-agent -> Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.2 Safari/605.1.15}, null, null} |{false, null, null, null, null, 1.41.0-wmf.18, dewiki} |{de.wikipedia.org, 2023-07-26T19:57:35.139Z, 565f99a4-cad1-47aa-b3e3-8e75f7ad6585, 9574201e-abd3-403f-8ea6-3a6089a5dafc, mediawiki.edit_attempt, null} |eas.ve.loaded |{Mr. Bingham sammelt Meilen, null, 0, null, 0, null, 0, null, null, null, null} |{null, null, null, null, 0, null, false, null, null, null, 49e90e6efebdc8ec2cf3, null, 9dc1457009fb5db08f6d} |{os_family -> Mac OS X, os_major -> 10, os_minor -> 15, browser_major -> 16, browser_family -> Safari, device_family -> Mac, wmf_app_version -> -} |true |{wikipedia, de, [], org, wikipedia} |eqiad |2023|7 |26 |19 |3813|is_bot |boolean |false |skin |string |vector |\n", "+-------+-------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------+--------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------+------------------------------------------+----------+----+-----+---+----+----+-----------------+-----------------+------------------+-----------------+-----------------+------------------+\n", "only showing top 20 rows\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " \r" ] } ], "source": [ "df_ea_core_custom_data_2.show(truncate=False)" ] }, { "cell_type": "code", "execution_count": 22, "id": "bc37b5cc-713d-42e0-8847-3269a6a1ecb0", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " \r" ] } ], "source": [ "df_ea_core_custom_data_2.write.saveAsTable(\"cjming.mp_ea_core_custom_data_2\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }