>>> rdd = sc.sequenceFile("/wmf/data/raw/atskafka_test_webrequest_text/atskafka_test_webrequest_text/hourly/2021/02/21/12/") >>> webrequest_schema = spark.table("wmf_raw.webrequest").schema >>> df = spark.read.schema(webrequest_schema).json(rdd) >>> df.createOrReplaceGlobalTempView("requests") 21/02/22 16:01:12 WARN Utils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.debug.maxToStringFields' in SparkEnv.conf. >>> df2 = spark.sql("select uri_path from global_temp.requests limit 10") >>> df2.show() +--------+ |uri_path| +--------+ | null| | null| | null| | null| | null| | null| | null| | null| | null| | null| +--------+