diff --git a/lib/auth.js b/lib/auth.js index fa06948..e35cde8 100644 --- a/lib/auth.js +++ b/lib/auth.js @@ -1,48 +1,55 @@ "use strict"; var P = require('bluebird'); var rbUtil = require('./rbUtil'); var URI = require('swagger-router').URI; var auth = {}; +/** + * Checks against MW api if all the required permissions are present. + * In case some of the permissions are absent - throws 401 Unauthorized. + * In case failed to get permisisons for MW API throws 400 Bad Request. + * + * @param restbase restbase instance to use + * @param req original request + * @param permissions required premissions + */ auth.checkPermissions = function(restbase, req, permissions) { var checkReq = { - uri: new URI([req.params.domain,'sys','action','userinfo']), + uri: new URI([req.params.domain,'sys','action','query']), method: 'post', body: { meta: 'userinfo', uiprop: 'rights' } }; rbUtil.copyForwardedHeaders(restbase, checkReq); return restbase.post(checkReq) .then(function(userInfo) { - var accessAllowed; - var absentPermissions = []; if (userInfo.rights && Array.isArray(userInfo.rights)) { permissions.forEach(function(perm) { if (userInfo.rights.indexOf(perm) < 0) { - absentPermissions.push(perm); + throw new rbUtil.HTTPError({ + status: 401, + body: { + type: 'unauthorized', + title: 'Not authorized to access the resource', + description: 'Need permission ' + perm + } + }); } }); - accessAllowed = absentPermissions.length === 0; - } else { - accessAllowed = false; - } - if (accessAllowed) { - return userInfo; } else { throw new rbUtil.HTTPError({ - status: 401, + status: 400, body: { - type: 'unauthorized', - title: 'Not authorized to access the resource', - description: 'Need permissions ' + absentPermissions + type: 'invalid_request', + title: 'Failed to check permissions for the request' } }); } }); }; module.exports = auth; \ No newline at end of file diff --git a/lib/rbUtil.js b/lib/rbUtil.js index 73bf145..ad780fc 100644 --- a/lib/rbUtil.js +++ b/lib/rbUtil.js @@ -1,236 +1,241 @@ "use strict"; /* * Static utility methods for RESTBase */ var P = require('bluebird'); var util = require('util'); var url = require('url'); var Busboy = require('busboy'); var uuid = require('cassandra-uuid').TimeUuid; var rbUtil = {}; // Optimized URL parsing var qs = require('querystring'); // Should make it into 0.12, see https://github.com/joyent/node/pull/7878 var SIMPLE_PATH = /^(\/(?!\/)[^\?#\s]*)(?:\?([^#\s]*))?$/; rbUtil.parseURL = function parseURL(uri) { // Fast path for simple path uris var fastMatch = SIMPLE_PATH.exec(uri); if (fastMatch) { return { protocol: null, slashes: null, auth: null, host: null, port: null, hostname: null, hash: null, search: fastMatch[2] || '', pathname: fastMatch[1], path: fastMatch[1], query: fastMatch[2] && qs.parse(fastMatch[2]) || {}, href: uri }; } else { return url.parse(uri, true); } }; /** * Replaces subdomain with a wildcard * * @param domain a full domain name (e.g. en.wikipedia.org) * @returns {string} wildcard version (e.g. *.wikipedia.org) */ rbUtil.wildcardSubdomain = function(domain) { if ((domain.match(/\./g) || []).length >= 2) { return '*.' + domain.replace(/^[^.]+\./, ""); } else { return domain; } }; /** * Constructs Content-Security-Policy header to send in response * * @param domain the domain to allow. If undefined, '*' is allowed * @param options options containing the following fields: * - allowInline - if true 'unsafe-inline' is added to style-src * @returns {string} CSP header value */ rbUtil.constructCSP = function(domain, options) { var styleSource; if (domain) { styleSource = this.wildcardSubdomain(domain); styleSource = 'http://' + styleSource + ' https://' + styleSource; } else { styleSource = '*'; } return "default-src 'none'; media-src *; img-src *; style-src " + styleSource + (options && options.allowInline ? " 'unsafe-inline'" : "") + "; frame-ancestors 'self'"; }; // Parse a POST request into request.body with BusBoy // Drops file uploads on the floor without creating temporary files // // @param {request} HTTP request // @returns {Promise<>} function read(req) { return new P(function(resolve) { var chunks = []; req.on('data', function(chunk) { chunks.push(chunk); }); req.on('end', function() { resolve(Buffer.concat(chunks)); }); }); } rbUtil.parsePOST = function parsePOST(req) { var readIt = (req.method === 'PUT') || (req.method === 'POST' && req.headers && (/^application\/json/i.test(req.headers['content-type']) || !req.headers['content-type'])); if (readIt) { return read(req); } else if (req.method !== 'POST') { return P.resolve(); } else { // Parse the POST return new P(function(resolve) { // Parse POST data var bboy = new Busboy({ headers: req.headers, // Increase the form field size limit from the 1M default. limits: { fieldSize: 15 * 1024 * 1024 } }); var body = {}; bboy.on('field', function(field, val) { body[field] = val; }); bboy.on('finish', function() { resolve(body); }); req.pipe(bboy); }); } }; rbUtil.reverseDomain = function reverseDomain(domain) { return domain.toLowerCase().split('.').reverse().join('.'); }; rbUtil.tidFromDate = function tidFromDate(date) { if (typeof date === 'object') { // Convert Date object to numeric milliseconds date = date.getTime(); } else if (typeof date === 'string') { // Convert date string to numeric milliseconds date = Date.parse(date); } if (isNaN(+date)) { throw new Error('Invalid date'); } // Create a new, deterministic timestamp return uuid.fromDate(date, 0, new Buffer([0x01, 0x23, 0x45, 0x67, 0x89, 0xab]), new Buffer([0x12, 0x34])).toString(); }; /** * Check if a string is a valid timeuuid */ rbUtil.isTimeUUID = function(s) { return uuid.test(s); }; /** * Generates a new request ID */ rbUtil.generateRequestId = function() { return uuid.now().toString(); }; /* * Error instance wrapping HTTP error responses * * Has the same properties as the original response. */ function HTTPError(response) { Error.call(this); Error.captureStackTrace(this, HTTPError); this.name = this.constructor.name; this.message = response.status + ''; if (response.body && response.body.type) { this.message += ': ' + response.body.type; } for (var key in response) { this[key] = response[key]; } } util.inherits(HTTPError, Error); rbUtil.HTTPError = HTTPError; // Create an etag value of the form // "/" rbUtil.makeETag = function(rev, tid) { return '"' + rev + '/' + tid + '"'; }; // Parse an etag value of the form // "/" // @param {string} etag // @return {object} with params rev / tid rbUtil.parseETag = function(etag) { var bits = /^"?([^"\/]+)(?:\/([^"\/]+))"?$/.exec(etag); if (bits) { return { rev: bits[1], tid: bits[2] }; } else { return null; } }; +/** + * Copies forwarded headers from restbase to request. + * If the same header was already set it takes precedence over + * the forwarded header. + */ rbUtil.copyForwardedHeaders = function(restbase, req) { if (restbase.forwardedHeaders) { req.headers = req.headers || {}; Object.keys(restbase.forwardedHeaders).forEach(function(header) { if (!req.headers[header]) { req.headers[header] = restbase.forwardedHeaders[header]; } }); } return req; }; /*** * MediaWiki-specific functions * TODO: Move them out in a separate file ***/ // Store titles as MediaWiki db keys // @param {string} title // @return {string} normalised title rbUtil.normalizeTitle = function(title) { return title.replace(/ /g, '_'); }; module.exports = rbUtil; diff --git a/lib/restbase.js b/lib/restbase.js index fe64a7f..72e0084 100644 --- a/lib/restbase.js +++ b/lib/restbase.js @@ -1,426 +1,425 @@ 'use strict'; /* * RESTBase request dispatcher and general shared per-request state namespace */ var jwt = require('jsonwebtoken'); var P = require('bluebird'); var rbUtil = require('./rbUtil'); var HTTPError = rbUtil.HTTPError; var preq = require('preq'); var swaggerUI = require('./swaggerUI'); var auth = require('./auth'); // Create a uniform but shallow request object copy with sane defaults. This // keeps code dealing with this request monomorphic (good for perf), and // avoids subtle bugs when requests shared between recursive requests are // mutated in another control branch. At the very minimum, we are mutating the // .params property for each sub-request. function cloneRequest(req) { return { uri: req.uri || req.url || null, method: req.method || 'get', headers: req.headers || {}, query: req.query || {}, body: req.body || null, params: req.params || {} }; } function RESTBase(options, req) { if (options && options.constructor === RESTBase) { // Child instance var par = this._parent = options; this.log = par.log; this.metrics = par.metrics; this.reqId = par.reqId || req && req.headers && req.headers['x-request-id'] || rbUtil.generateRequestId(); this._recursionDepth = par._recursionDepth + 1; this._priv = par._priv; this.rb_config = this._priv.options.conf; this._rootReq = par._rootReq || req; // Copy cookies to forward on first request if (this._recursionDepth === 1 && req.headers && req.headers.cookie) { this.forwardedHeaders = { 'cookie' : req.headers.cookie }; } else { this.forwardedHeaders = options.forwardedHeaders; } } else { // Brand new instance this.log = options.log; // Logging method this.metrics = options.metrics; this.reqId = null; // Private this._parent = null; this._req = null; this._recursionDepth = 0; options.maxDepth = options.maxDepth || 10; if (!options.conf.salt || typeof options.conf.salt !== 'string') { throw new Error("Missing or invalid `salt` option in RESTBase config. " + "Expected a string."); } // Private state, shared with child instances this._priv = { options: options, router: options.router }; this.rb_config = options.conf; this.rb_config.user_agent = this.rb_config.user_agent || 'RESTBase'; this._rootReq = null; } } // Sets the request id for this instance and adds it to // the request header, if defined RESTBase.prototype.setRequestId = function(req) { req.headers = req.headers || {}; if (req.headers['x-request-id']) { return; } req.headers['x-request-id'] = this.reqId; }; // Make a child instance RESTBase.prototype.makeChild = function(req) { var child = new RESTBase(this, req); // Remember the request that led to this child instance at each level, so // that we can provide nice error reporting and tracing. child._req = req; return child; }; // A default listing handler for URIs that end in / and don't have any // handlers associated with it otherwise. RESTBase.prototype.defaultListingHandler = function(value, restbase, req) { var rq = req.query; if (rq.spec !== undefined && value.specRoot) { var spec = Object.assign({}, value.specRoot, { // Set the base path dynamically basePath: req.uri.toString().replace(/\/$/, '') }); if (req.params.domain === req.headers.host.replace(/:[0-9]+$/, '')) { // This is a host-based request. Set an appropriate base path. spec.basePath = spec['x-host-basePath'] || spec.basePath; } return P.resolve({ status: 200, body: spec }); } else if (rq.doc !== undefined) { // Return swagger UI & load spec from /?spec if (!req.query.path) { req.query.path = '/index.html'; } return swaggerUI(restbase, req); } else if (/\btext\/html\b/.test(req.headers.accept) && req.uri.path.length <= 2) { // Browser request and above api level req.query.path = '/index.html'; var html = '
' + '
Wikimedia REST API
'; if (req.uri.path.length === 1) { html += '

Domains:

' + '
    ' + req.params._ls.map(function(domain) { return '
  • ' + domain + '
  • '; }).join('\n') + '
'; } else { html += '

APIs:

' + '
    ' + req.params._ls.filter(function(item) { return item !== 'sys'; }) .map(function(api) { return '
  • ' + api + '
  • '; }).join('\n') + '
'; } html += "

JSON listing

To retrieve a regular JSON listing, you can either " + "omit the Accept header, or send one that does not contain " + "text/html.

"; return swaggerUI(restbase, req) .then(function(res) { res.body = res.body .replace(/window\.swaggerUi\.load/, '') .replace(/
/, html); return res; }); } else { // Plain listing return P.resolve({ status: 200, body: { items: req.params._ls } }); } }; // Special handling for external web requests RESTBase.prototype.defaultWebRequestHandler = function(req) { // Enforce the usage of UA req.headers = req.headers || {}; req.headers['User-Agent'] = req.headers['User-Agent'] || this.rb_config.user_agent; this.setRequestId(req); this.log('trace/webrequest', { req: req, request_id: req.headers['x-request-id'] }); // Make sure we have a string req.uri = '' + req.uri; // Call P.resolve to make sure we have a bluebird Promise return P.resolve(preq(req)); }; // Process one request RESTBase.prototype.request = function(req) { // Protect the sys api from direct access // Could consider opening this up with a specific permission later. if (this._recursionDepth === 0 && ((req.uri.params && req.uri.params.api === 'sys') // TODO: Remove once params.api is reliable || (req.uri.path.length > 1 && req.uri.path[1] === 'sys'))) { return P.reject(new HTTPError({ status: 403, body: { type: 'access_denied#sys', title: 'Access to the /{domain}/sys/ hierarchy is restricted to system users.' } })); } if (req.method) { req.method = req.method.toLowerCase(); } return this._request(req); }; // Internal request handler RESTBase.prototype._request = function(req) { var self = this; // Special handling for https? requests if (req.uri.constructor === String && /^https?:\/\//.test(req.uri) || req.uri.urlObj) { return this.defaultWebRequestHandler(req); } var priv = this._priv; if (this._recursionDepth > priv.options.maxDepth) { var parents = []; var rb = this._parent; while (rb) { parents.push(rb._req); rb = rb._parent; } return P.resolve({ status: 500, body: { type: 'request_recursion_depth_exceeded', title: 'RESTBase request recursion depth exceeded.', uri: req.uri, method: req.method, parents: parents, depth: this._recursionDepth } }); } // Make sure we have a sane & uniform request object that doesn't change // (at least at the top level) under our feet. var childReq = cloneRequest(req); var match = priv.router.route(childReq.uri); var methods = match && match.value && match.value.methods; var handler = methods && ( (self._rootReq && self._rootReq.method === 'head' && methods.head) || methods[childReq.method] || methods.all); if (!handler && (req.method === 'head' || self._rootReq && self._rootReq.method === 'head')) { handler = methods && methods.get; } if (match && !handler && childReq.method === 'get' && childReq.uri.path[childReq.uri.path.length - 1] === '') { // A GET for an URL that ends with /: return a default listing if (!match.value) { match.value = {}; } if (!match.value.path) { match.value.path = '_defaultListingHandler'; } handler = function(restbase, req) { return self.defaultListingHandler(match.value, restbase, req); }; } if (handler) { // Remove the /{domain}/ prefix, as it's not very useful in stats var statName = match.value.path.replace(/\/[^\/]+\//, '') + '.' + req.method.toUpperCase() + '.'; // Normalize invalid chars statName = self.metrics.normalizeName(statName); // Start timer var startTime = Date.now(); // Prepare to call the handler with a child restbase instance var childRESTBase = this.makeChild(req); childReq.params = match.params; // Call the handler with P.try to catch synchronous exceptions. - var request = P.try(handler, [childRESTBase, childReq]) + var reqHandlerPromise = P.try(handler, [childRESTBase, childReq]) .then(function(res){ // Record request metrics & log var statusClass = Math.floor(res.status / 100) + 'xx'; self.metrics.endTiming([statName + statusClass, statName + 'ALL'], startTime); self.log('trace', { req: req, res: res, request_id: childRESTBase.reqId }); if (!res) { throw new HTTPError({ status: 500, body: { type: 'empty_response', description: 'Empty response received', req: req } }); } else if (res.status >= 400 && !(res instanceof Error)) { var err = new HTTPError(res); if (res.body && res.body.stack) { err.stack = res.body.stack; } err.innerBody = res.body; err.internalReq = childReq; throw err; } else { return res; } }, function(err) { var statusClass = '5xx'; if (err && err.status) { statusClass = Math.floor(err.status / 100) + 'xx'; } self.metrics.endTiming([statName + statusClass, statName + 'ALL'], startTime); throw err; }); if (match.permissions - && Array.isArray(match.permissions) - && match.permissions.length > 0) { - request = request.then(function(res) { - var permissionCheckRestbase = self.makeChild(req); - return auth.checkPermissions(permissionCheckRestbase, childReq, match.permissions) + && Array.isArray(match.permissions) + && match.permissions.length > 0) { + reqHandlerPromise = reqHandlerPromise.then(function(res) { + return auth.checkPermissions(childRESTBase, childReq, match.permissions) .then(function() { return res; }); }); } - return request; + return reqHandlerPromise; } else { // No handler found. return P.reject(new HTTPError({ status: 404, body: { type: 'not_found#route', title: 'Not found.', internalURI: req.uri, method: req.method, depth: self._recursionDepth } })); } }; // Generic parameter massaging: // * If last parameter is an object, it is expected to be the request object. // * If the first parameter is a string, it's expected to be the URL. // * If the second parameter is a String or Buffer, it's expected to be a // resource body. function makeRequest(uri, reqOrBody, method) { var req; if (uri.constructor === Object) { // Fast path req = uri; req.method = method; return req; } else if (reqOrBody && reqOrBody.constructor === Object) { req = reqOrBody; req.uri = uri; req.method = method; } else { req = { uri: uri, method: method, body: reqOrBody }; } return req; } // Convenience wrappers var methods = ['get', 'post', 'put', 'delete', 'head', 'options', 'trace', 'connect', 'copy', 'move', 'purge', 'search']; methods.forEach(function(method) { RESTBase.prototype[method] = function(uri, req) { return this._request(makeRequest(uri, req, method)); }; }); // Utility methods that need access to restbase state. // Create a json web token // @param {string} token // @return {string} JWT signed token RESTBase.prototype.encodeToken = function(token) { var newToken = jwt.sign({ next: token }, this._priv.options.conf.salt); return newToken; }; // Decode signed token and decode the orignal token // @param {string} JWT token // @return {string} original token RESTBase.prototype.decodeToken = function(token) { try { var next = jwt.verify(token, this._priv.options.conf.salt); return next.next; } catch (e) { throw new HTTPError({ status: 400, body: { type: 'invalid_paging_token', title: 'Invalid paging token' } }); } }; module.exports = RESTBase; diff --git a/lib/router.js b/lib/router.js index b9f45f8..5d1f789 100644 --- a/lib/router.js +++ b/lib/router.js @@ -1,458 +1,455 @@ "use strict"; var P = require('bluebird'); var yaml = require('js-yaml'); var fs = P.promisifyAll(require('fs')); var Template = require('./reqTemplate'); var rbUtil = require('./rbUtil'); var swaggerRouter = require('swagger-router'); var Node = swaggerRouter.Node; var URI = swaggerRouter.URI; var SwaggerRouter = swaggerRouter.Router; function Router(options) { this._options = options || {}; this._nodes = new Map(); this._modules = new Map(); this.router = new SwaggerRouter(); } // Load & parse a yaml spec from disk Router.prototype._readSpec = function(path) { // XXX: make the spec path configurable? var fsPath = __dirname + '/../specs/' + path + '.yaml'; // Let errors bubble up for now. Fail loud & early. return fs.readFileAsync(fsPath) .then(function(yamlString) { return yaml.safeLoad(yamlString); }); }; // Extend an existing route tree with a new path by walking the existing tree // and inserting new subtrees at the desired location. Router.prototype._buildPath = function route(node, path, value) { var params = {}; for (var i = 0; i < path.length; i++) { var segment = path[i]; var nextNode = node.getChild(segment, params); if (!nextNode) { nextNode = new Node(); node.setChild(segment, nextNode); if (segment.modifier === '/') { // Set the value for each optional path segment ({/foo}) node.value = value; } node = nextNode; } else { node = nextNode; } } return node; }; /** * Load and initialize a module */ Router.prototype._loadModule = function(modDef) { var cachedModule = this._modules.get(modDef); if (cachedModule) { // Found a cached instance. Return it. return P.resolve(cachedModule); } var self = this; var loadPath; // Determine the module's load path switch (modDef.type) { case 'file': if (modDef.path && /^\//.test(modDef.path)) { // Absolute path loadPath = modDef.path; } else { // Relative path or missing loadPath = __dirname + '/../mods/'; if (modDef.path) { // The path has been provided, use it loadPath += modDef.path; } else { // No path given, so assume the file // name matches the module name loadPath += modDef.name; } } break; case 'npm': loadPath = modDef.name; break; default: throw new Error('unknown module type ' + modDef.type + ' (for module ' + modDef.name + ').'); } // Append the log property to module options, if it is not present modDef.options = modDef.options || {}; if (!modDef.options.log) { modDef.options.log = this._options.log || function() {}; } // Let the error propagate in case the module cannot be loaded var modObj = require(loadPath); if (!modObj) { return P.reject("Loading module " + loadPath + " failed."); } // Call if it's a function if (modObj instanceof Function) { modObj = modObj(modDef.options); } if (!(modObj instanceof P)) { // Wrap modObj = P.resolve(modObj); } return modObj.then(function(mod) { if (!mod.operations) { throw new Error('No operations exported by module ' + loadPath); } self._modules.set(modDef, mod); return mod; }); }; function makeRequestTemplate(spec) { var reqTemplate = new Template(spec); return function requestTemplate(restbase, req) { return restbase.request(reqTemplate.eval({ request: req })); }; } /** * Process a Swagger path spec object */ Router.prototype._handleSwaggerPathSpec = function(node, pathspec, operations, specRoot, prefixPath) { var self = this; if (!pathspec) { return P.resolve(); } // Load sub-specs var subSpecs = pathspec['x-subspecs']; if (!subSpecs) { // Check if there is a single child spec var subSpec = pathspec['x-subspec']; if (subSpec) { var specRootBasePath = specRoot.basePath || ''; specRoot = Object.assign({}, subSpec); specRoot.paths = {}; specRoot.definitions = {}; specRoot['x-default-params'] = {}; specRoot.basePath = specRootBasePath + prefixPath; prefixPath = ''; // XXX: The basePath is incorrect when shared between domains. Set // it dynamically for each request instead? // specRoot.basePath = prefixPath; var listNode = new Node(); listNode.value = { specRoot: specRoot, methods: {}, path: specRoot.basePath + '/' }; node.setChild('', listNode); subSpecs = [subSpec]; } } var loaderPromise = P.resolve(); if (Array.isArray(subSpecs)) { // Load sub-specs loaderPromise = loaderPromise.then(function() { return P.each(subSpecs, function(subSpecOrPath) { var subSpecPromise; if (subSpecOrPath instanceof Object) { // Inline sub-spec: return directly subSpecPromise = P.resolve(subSpecOrPath); } else { subSpecPromise = self._readSpec(subSpecOrPath); } return subSpecPromise .then(function(subSpec) { return self._handleSwaggerSpec(node, subSpec, operations, specRoot, prefixPath); }); }); }); } // Load modules var restBaseModules = pathspec['x-modules']; if (Array.isArray(restBaseModules)) { loaderPromise = loaderPromise.then(function() { return P.each(restBaseModules, function(m) { // Share modules return self._loadModule(m) .then(function(module) { if (!module) { throw new Error('Null return when loading module ' + m.name); } if (!module.spec) { throw new Error('Module ' + m.name + ' did not export a spec.'); } if (module.resources) { node.value.resources = (node.value.resources || []) .concat(module.resources); } return self._handleSwaggerSpec(node, module.spec, module.operations, specRoot, prefixPath); }); }); }); } var security = pathspec.security; if (Array.isArray(security)) { loaderPromise = loaderPromise.then(function() { - node.value.security = node.value.security || []; - return P.each(security, function(permission) { - node.value.security.push(permission); - }); + node.value.security = security.concat(node.value.security || []); }); } return loaderPromise // Process HTTP method stanzas ('get', 'put' etc) .then(function() { // Register the path in the specRoot if (specRoot && !specRoot.paths[prefixPath]) { specRoot.paths[prefixPath] = {}; } Object.keys(pathspec).forEach(function(methodName) { if (/^x-/.test(methodName)) { return; } var method = pathspec[methodName]; // Insert the method spec into the global merged spec if (specRoot.paths[prefixPath]) { specRoot.paths[prefixPath][methodName] = method; } if (node.value.methods[methodName]) { throw new Error('Trying to re-define existing method ' + node.value.path + ':' + methodName); } var backendRequest = method && method['x-backend-request']; if (backendRequest) { // Set up a templated backend request handler var templatedReq = makeRequestTemplate(backendRequest); node.value.methods[methodName] = function(restbase, req) { return templatedReq(restbase, req); }; } else if (method.operationId) { var handler = operations[method.operationId]; if (handler) { node.value.methods[methodName] = handler; } else { throw new Error('No known handler associated with operationId ' + method.operationId); } } }); }); }; /** * Process a Swagger spec */ Router.prototype._handleSwaggerSpec = function(rootNode, spec, operations, specRoot, prefixPath) { if (!specRoot) { specRoot = spec; if (!spec.paths) { spec.paths = {}; } if (!spec.definitions) { spec.definitions = {}; } if (!spec['x-default-params']) { spec['x-default-params'] = {}; } if (!spec.basePath) { spec.basePath = prefixPath || ''; } prefixPath = ''; } if (spec.definitions) { // Merge definitions Object.assign(specRoot.definitions, spec.definitions); } if (spec['x-default-params']) { Object.assign(specRoot['x-default-params'], spec['x-default-params']); } var self = this; function handlePaths(paths) { if (!paths || !Object.keys(paths).length) { // No paths here, nothing to do return P.resolve(); } // Handle paths return P.all(Object.keys(paths).map(function(pathPattern) { var pathSpec = paths[pathPattern]; var pathURI = new URI(pathPattern, {}, true); var path = pathURI.path; var subPrefixPath = prefixPath + pathURI.toString('simplePattern'); // Create a value object early, so that _buildPath can set up a reference // to it for optional path segments. var value = { path: undefined, methods: {} }; // Expected to return // - rootNode for single-element path // - a subnode for longer paths var branchNode = self._buildPath(rootNode, path.slice(0, path.length - 1), value); // Check if we can share the path spec var subtree = self._nodes.get(pathSpec); var specPromise; if (!subtree) { var segment = path[path.length - 1]; // Check if the subtree already exists, which can happen when // specs are overlapping. We don't allow this for now to keep // specs easy to read & understand. subtree = branchNode.getChild(segment, {}); if (!subtree) { // Build a new subtree subtree = new Node(); // Set up our specific value object subtree.value = value; value.path = specRoot.basePath + subPrefixPath; value.methods = {}; // XXX: Set ACLs and other value properties for path // subtree.value.acls = ...; if (segment.modifier === '+') { // Set up a recursive match and end the traversal subtree.setChild(segment, subtree); } else if (segment.modifier === '/') { // Since this path segment is optional, the parent node // has the same value. branchNode.value = value; } } // Assign the node before building the tree, so that sharing // opportunities with the same spec are discovered while doing so self._nodes.set(pathSpec, subtree); // Handle the path spec specPromise = self._handleSwaggerPathSpec(subtree, pathSpec, operations, specRoot, subPrefixPath); } else { var origSubtree = subtree; subtree = subtree.clone(); subtree.value = value; // Copy over the specRoot subtree.value.specRoot = origSubtree.value.specRoot; specPromise = P.resolve(); } branchNode.setChild(path[path.length - 1], subtree); return specPromise; })); } // TODO: handle global spec settings if (spec['x-subspecs'] || spec['x-subspec']) { // Allow x-subspecs? at top level spec too. This is useful to avoid // introducing an extra level in the spec tree. return this._handleSwaggerPathSpec(rootNode, spec, operations, specRoot, prefixPath) .then(handlePaths(spec.paths || {})); } else { return handlePaths(spec.paths || {}); } }; /** * Set up resources (ex: dynamic storage like tables) by traversing the tree & * performing the requests specified in resource stanzas. Default HTTP method * is 'put'. * * Any error during resource creation (status code >= 400) will abort startup * after logging the error as a fatal. */ Router.prototype.handleResources = function(restbase) { var self = this; return this.tree.visitAsync(function(value, path) { if (value && Array.isArray(value.resources)) { return P.resolve(value.resources) /* Workaround (forces setTimeout) to avoid excessive nextTick recursion in bluebird with node 0.10. Otherwise, we see errors like this with >~500 domains: (node) warning: Recursive process.nextTick detected. This will break in the next version of node. Please use setImmediate for recursive deferral. */ .delay(0) .each(function(reqSpec) { var reqTemplate = new Template(reqSpec); var req = reqTemplate.eval({ request: { params: { domain: path[0] } } }); if (!req.uri) { throw new Error("Missing resource URI in spec for " + JSON.stringify(path)); } req.method = req.method || 'put'; return restbase.request(req); }); } else { return P.resolve(); } }); }; /** * Load a new Swagger spec * * This involves building a tree, initializing modules, merging specs & * initializing resources: Basically the entire app startup. */ Router.prototype.loadSpec = function(spec, restbase) { var self = this; var rootNode = new Node(); var specPromise; if (spec && spec.constructor === String) { specPromise = this._readSpec(spec); } else { specPromise = P.resolve(spec); } return specPromise .then(function(spec) { return self._handleSwaggerSpec(rootNode, spec, {}); }) .then(function() { // Only set the tree after loading everything self.tree = rootNode; self.router.setTree(rootNode); return self.handleResources(restbase); }) .then(function() { return self; }); }; /** * Resolve an URI to a value object * * Main request routing entry point. * @param {URI} uri URI object * @return {object} match: * - @prop {object} value: * - @prop {object} methods: handlers for methods like get, post etc * - @prop {string} path: path to this tree node * - @prop {object} params: Object with path parameters and optionally `_ls` * for URIs ending in `/`. */ Router.prototype.route = function(uri) { return this.router.lookup(uri); }; module.exports = Router; diff --git a/mods/action.js b/mods/action.js index 5d49985..812aaaa 100644 --- a/mods/action.js +++ b/mods/action.js @@ -1,252 +1,242 @@ 'use strict'; /* * Simple wrapper for the PHP action API */ var rbUtil = require('../lib/rbUtil'); var Template = require('../lib/reqTemplate'); /** * Error translation */ var errDefs = { 400: { status: 400, type: 'invalid_request' }, 401: { status: 401, type: 'unauthorized' }, 403: { status: 403, type: 'access_denied#edit' }, 409: { status: 409, type: 'conflict' }, 413: { status: 413, type: 'too_large' }, 429: { status: 429, type: 'rate_exceeded' }, 500: { status: 500, type: 'server_error' }, 501: { status: 501, type: 'not_supported' } }; var errCodes = { /* 400 - bad request */ articleexists: errDefs['400'], badformat: errDefs['400'], badmd5: errDefs['400'], badtoken: errDefs['400'], invalidparammix: errDefs['400'], invalidsection: errDefs['400'], invalidtitle: errDefs['400'], invaliduser: errDefs['400'], missingparam: errDefs['400'], missingtitle: errDefs['400'], nosuchpageid: errDefs['400'], nosuchrcid: errDefs['400'], nosuchrevid: errDefs['400'], nosuchsection: errDefs['400'], nosuchuser: errDefs['400'], notext: errDefs['400'], notitle: errDefs['400'], pagecannotexist: errDefs['400'], revwrongpage: errDefs['400'], /* 401 - unauthorised */ 'cantcreate-anon': errDefs['401'], confirmemail: errDefs['401'], 'noedit-anon': errDefs['401'], 'noimageredirect-anon': errDefs['401'], protectedpage: errDefs['401'], readapidenied: errDefs['401'], /* 403 - access denied */ autoblocked: errDefs['403'], blocked: errDefs['403'], cantcreate: errDefs['403'], customcssjsprotected: errDefs['403'], customcssprotected: errDefs['403'], customjsprotected: errDefs['403'], emptynewsection: errDefs['403'], emptypage: errDefs['403'], filtered: errDefs['403'], hookaborted: errDefs['403'], noedit: errDefs['403'], noimageredirect: errDefs['403'], permissiondenied: errDefs['403'], protectednamespace: errDefs['403'], 'protectednamespace-interface': errDefs['403'], protectedtitle: errDefs['403'], readonly: errDefs['403'], unsupportednamespace: errDefs['403'], writeapidenied: errDefs['403'], /* 409 - conflict */ cascadeprotected: errDefs['409'], editconflict: errDefs['409'], pagedeleted: errDefs['409'], spamdetected: errDefs['409'], /* 413 - body too large */ contenttoobig: errDefs['413'], /* 429 - rate limit exceeded */ ratelimited: errDefs['429'], /* 501 - not supported */ editnotsupported: errDefs['501'] }; function apiError(apiErr) { var ret; apiErr = apiErr || {}; ret = { message: 'MW API call error ' + apiErr.code, status: errDefs['500'].status, body: { type: errDefs['500'].type, title: apiErr.code || 'MW API Error', description: apiErr.info || 'Unknown MW API error' } }; if (apiErr.code && errCodes.hasOwnProperty(apiErr.code)) { ret.status = errCodes[apiErr.code].status; ret.body.type = errCodes[apiErr.code].type; } return new rbUtil.HTTPError(ret); } /** * Action module code */ function ActionService(options) { // Be backwards-compatible with apiURI-style configs if (!options.apiRequest && options.apiURI) { // Log a deprecation warning options.log('warn/actionService', 'The config options for this module have changed. ' + 'Please use the apiRequest template stanza'); options.apiRequest = { method: 'post', // TODO: assume the URI is in the form https?://{domain}/w/api.php // as we cannot currently template the host in swagger-router uri: options.apiURI, headers: { host: '{$.request.params.domain}' }, body: '{$.request.body}' }; // Now check if there's really a param in the host of the URI if (!/^(:?https?:\/\/){[^\s}]+}\//.test(options.apiURI)) { // No host templating, use the string provided by the config options.apiRequest.uri = options.apiURI; } // TODO: decide what to do when apiURI has got a host param, but // the rest isn't /w/api.php } else if (!options.apiRequest) { throw new Error('The action module needs the apiRequest templating stanza to exist!'); } this.apiRequestTemplate = new Template(options.apiRequest); } function buildQueryResponse(res) { if (res.status !== 200) { throw apiError({ info: 'Unexpected response status (' + res.status + ') from the PHP action API.' }); } else if (!res.body || res.body.error) { throw apiError((res.body || {}).error); - } else if (!res.body.query || !res.body.query.pages) { - throw apiError({ info: 'Missing query pages from the PHP action API response.' }); + } else if (!res.body.query || (!res.body.query.pages && !res.body.query.userinfo)) { + throw apiError({info: 'Missing query pages from the PHP action API response.'}); + } + + if (res.body.query.pages) { + // Rewrite res.body + // XXX: Rethink! + var pages = res.body.query.pages; + var newBody = Object.keys(pages).map(function(key) { + return pages[key]; + }); + // XXX: Clean this up! + res.body = { + items: newBody, + next: res.body["continue"] + }; + return res; + } else if (res.body.query.userinfo) { + return res.body.query.userinfo; + } else { + throw apiError({info: 'Unable to parse PHP action API response.'}); } - // Rewrite res.body - // XXX: Rethink! - var pages = res.body.query.pages; - var newBody = Object.keys(pages).map(function(key) { - return pages[key]; - }); - // XXX: Clean this up! - res.body = { - items: newBody, - next: res.body.continue - }; - return res; } function buildEditResponse(res) { if (res.status !== 200) { throw apiError({ info: 'Unexpected response status (' + res.status + ') from the PHP action API.' }); } else if (!res.body || res.body.error) { throw apiError((res.body || {}).error); } res.body = res.body.edit; if (res.body && !res.body.nochange) { res.status = 201; } return res; } -function buildUserInfoResponse(res) { - if (res.status !== 200) { - throw apiError({info: 'Unexpected response status (' + res.status + ') from the PHP action API.'}); - } else if (!res.body || res.body.error) { - throw apiError((res.body || {}).error); - } else if (!res.body.query || !res.body.query.userinfo) { - throw apiError({info: 'Missing user info from the PHP action API response.'}); - } - return res.body.query.userinfo; -} - ActionService.prototype._doRequest = function(restbase, req, defBody, cont) { var apiRequest = this.apiRequestTemplate.eval({ request: req }); apiRequest.body.action = defBody.action; apiRequest.body.format = apiRequest.body.format || defBody.format || 'json'; apiRequest.body.formatversion = apiRequest.body.formatversion || defBody.formatversion || 1; if (!apiRequest.body.hasOwnProperty('continue')) { apiRequest.body.continue = ''; } req.method = 'post'; req = rbUtil.copyForwardedHeaders(restbase, req); return restbase.request(apiRequest).then(cont); }; ActionService.prototype.query = function(restbase, req) { return this._doRequest(restbase, req, { action: 'query', format: 'json' }, buildQueryResponse); }; ActionService.prototype.edit = function(restbase, req) { return this._doRequest(restbase, req, { action: 'edit', format: 'json', formatversion: 2 }, buildEditResponse); }; ActionService.prototype.userInfo = function(restbase, req) { return this._doRequest(restbase, req, { action: 'query', format: 'json' }, buildUserInfoResponse); }; -module.exports = function(options) { +module.exports = function (options) { var actionService = new ActionService(options); return { spec: { paths: { '/query': { all: { operationId: 'mwApiQuery' } }, '/edit': { post: { operationId: 'mwApiEdit' } - }, - '/userinfo': { - all: { - operationId: 'mwApiUserInfo' - } } } }, operations: { mwApiQuery: actionService.query.bind(actionService), - mwApiEdit: actionService.edit.bind(actionService), - mwApiUserInfo: actionService.userInfo.bind(actionService) + mwApiEdit: actionService.edit.bind(actionService) } }; }; diff --git a/mods/parsoid.js b/mods/parsoid.js index 5ae6f60..8424f6b 100644 --- a/mods/parsoid.js +++ b/mods/parsoid.js @@ -1,690 +1,690 @@ 'use strict'; /* * Simple wrapper for Parsoid */ var P = require('bluebird'); var URI = require('swagger-router').URI; var uuid = require('cassandra-uuid').TimeUuid; var rbUtil = require('../lib/rbUtil'); // TODO: move tests & spec to separate npm module! var yaml = require('js-yaml'); var fs = require('fs'); var spec = yaml.safeLoad(fs.readFileSync(__dirname + '/parsoid.yaml')); function ParsoidService(options) { options = options || {}; this.parsoidHost = options.parsoidHost || 'http://parsoid-lb.eqiad.wikimedia.org'; // Set up operations var self = this; this.operations = { getPageBundle: function(restbase, req) { return self.wrapContentReq(restbase, req, self.pagebundle(restbase, req), 'pagebundle'); }, // Revision retrieval per format getWikitext: self.getFormat.bind(self, 'wikitext'), getHtml: self.getFormat.bind(self, 'html'), getDataParsoid: self.getFormat.bind(self, 'data-parsoid'), // Listings listWikitextRevisions: self.listRevisions.bind(self, 'wikitext'), listHtmlRevisions: self.listRevisions.bind(self, 'html'), listDataParsoidRevisions: self.listRevisions.bind(self, 'data-parsoid'), // Transforms transformHtmlToHtml: self.makeTransform('html', 'html'), transformHtmlToWikitext: self.makeTransform('html', 'wikitext'), transformWikitextToHtml: self.makeTransform('wikitext', 'html'), transformSectionsToWikitext: self.makeTransform('sections', 'wikitext') }; } // Short alias var PSP = ParsoidService.prototype; /** * Wraps a request for getting content (the promise) into a * P.all() call, bundling it with a request for revision * info, so that a 403 error gets raised overall if access to * the revision should be denied * * @param restbase RESTBase the Restbase router object * @param req Object the user request * @param promise Promise the promise object to wrap */ PSP.wrapContentReq = function(restbase, req, promise, format, tid) { var rp = req.params; function ensureCharsetInContentType(res) { var cType = res.headers['content-type']; if (/^text\/html\b/.test(cType) && !/charset=/.test(cType)) { // Make sure a charset is set res.headers['content-type'] = cType + ';charset=utf-8'; } return res; } var reqs = { content: promise }; // Bundle the promise together with a call to getRevisionInfo(). A // failure in getRevisionInfo will abort the entire request. reqs.revisionInfo = this.getRevisionInfo(restbase, req); // If the format is HTML and sections were requested, also request section // offsets if (format === 'html' && req.query.sections) { reqs.sectionOffsets = restbase.get({ uri: this.getBucketURI(rp, 'section.offsets', tid) }); } return P.props(reqs) .then(function(responses) { // If we have reached this point, it means access is not denied, and // sections (if requested) were found if (format === 'html' && req.query.sections) { // Handle section requests var sectionOffsets = responses.sectionOffsets.body; var sections = req.query.sections.split(',').map(function(id) { return id.trim(); }); var body = cheapBodyInnerHTML(responses.content.body.toString()); var chunks = {}; sections.forEach(function(id) { var offsets = sectionOffsets[id]; if (!offsets) { throw new rbUtil.HTTPError({ status: 400, body: { type: 'invalid_request', detail: 'Unknown section id: ' + id } }); } // Offsets as returned by Parsoid are relative to body.innerHTML chunks[id] = body.substring(offsets.html[0], offsets.html[1]); }); return { status: 200, headers: { etag: responses.content.headers.etag, 'content-type': 'application/json' }, body: chunks }; } else { return ensureCharsetInContentType(responses.content); } }); }; PSP.getBucketURI = function(rp, format, tid) { var path = [rp.domain, 'sys', 'key_rev_value', 'parsoid.' + format, rp.title]; if (rp.revision) { path.push(rp.revision); if (tid) { path.push(tid); } } return new URI(path); }; PSP.pagebundle = function(restbase, req) { var rp = req.params; var domain = rp.domain; if (domain === 'en.wikipedia.test.local') { domain = 'en.wikipedia.org'; } // TODO: Pass in current or predecessor version data if available var newReq = Object.assign({}, req); if (!newReq.method) { newReq.method = 'get'; } - newReq = rbUtil.copyForwardedHeaders(restbase, newReq); + rbUtil.copyForwardedHeaders(restbase, newReq); newReq.uri = this.parsoidHost + '/v2/' + domain + '/pagebundle/' + encodeURIComponent(rbUtil.normalizeTitle(rp.title)) + '/' + rp.revision; return restbase.request(newReq); }; PSP.saveParsoidResult = function(restbase, req, format, tid, parsoidResp) { var self = this; var rp = req.params; // Handle the response from Parsoid if (parsoidResp.status === 200) { return P.all([ restbase.put({ uri: self.getBucketURI(rp, 'data-parsoid', tid), headers: parsoidResp.body['data-parsoid'].headers, body: parsoidResp.body['data-parsoid'].body }), restbase.put({ uri: self.getBucketURI(rp, 'section.offsets', tid), headers: { 'content-type': 'application/json' }, body: parsoidResp.body['data-parsoid'].body.sectionOffsets }) ]) // Save HTML last, so that any error in metadata storage suppresses // HTML. .then(function() { return restbase.put({ uri: self.getBucketURI(rp, 'html', tid), headers: parsoidResp.body.html.headers, body: parsoidResp.body.html.body }); }) // And return the response to the client // but only if the revision is accessible .then(function() { var resp = { status: parsoidResp.status, headers: parsoidResp.body[format].headers, body: parsoidResp.body[format].body }; resp.headers.etag = rbUtil.makeETag(rp.revision, tid); return self.wrapContentReq(restbase, req, P.resolve(resp), format, tid); }); } else { return parsoidResp; } }; // Temporary work-around for Parsoid issue // https://phabricator.wikimedia.org/T93715 function normalizeHtml(html) { return html && html.toString && html.toString() .replace(/ about="[^"]+"(?=[\/> ])|]+>/g, ''); } function sameHtml(a, b) { return normalizeHtml(a) === normalizeHtml(b); } PSP.generateAndSave = function(restbase, req, format, currentContentRes) { var self = this; // Try to generate HTML on the fly by calling Parsoid var rp = req.params; var pageBundleUri = new URI([rp.domain, 'sys', 'parsoid', 'pagebundle', rbUtil.normalizeTitle(rp.title), rp.revision]); // Helper for retrieving original content from storage & posting it to // the Parsoid pagebundle end point function getOrigAndPostToParsoid(revision, contentName, updateMode) { return self._getOriginalContent(restbase, req, revision) .then(function(res) { var body = { update: updateMode }; body[contentName] = res; return restbase.post({ uri: pageBundleUri, headers: { 'content-type': 'application/json' }, body: body }); }) .catch(function(e) { // Fall back to plain GET return restbase.get({ uri: pageBundleUri }); }); } var parentRev = parseInt(req.headers['x-restbase-parentrevision']); var updateMode = req.headers['x-restbase-mode']; var parsoidReq; if (parentRev) { // OnEdit job update: pass along the predecessor version parsoidReq = getOrigAndPostToParsoid(parentRev + '', 'previous'); } else if (updateMode) { // Template or image updates. Similar to html2wt, pass: // - current data-parsoid and html // - the edit mode parsoidReq = getOrigAndPostToParsoid(rp.revision, 'original', updateMode); } else { // Plain render parsoidReq = restbase.get({ uri: pageBundleUri }); } return parsoidReq .then(function(res) { var htmlBody = res.body.html.body; var tid = uuid.now().toString(); // Also make sure we have a meta tag for the tid in our output if (!/]+>/.test(htmlBody)) { res.body.html.body = htmlBody .replace(/(]+>)/, '$1'); } if (format === 'html' && currentContentRes && sameHtml(res.body.html.body, currentContentRes.body)) { // New render is the same as the previous one, no need to store // it. restbase.metrics.increment('sys_parsoid_generateAndSave.unchanged_rev_render'); // No need for wrapping here, as we rely on the pagebundle request // being wrapped & throwing an error if access is denied return currentContentRes; } else { return self.saveParsoidResult(restbase, req, format, tid, res); } }); }; // Get / check the revision metadata for a request PSP.getRevisionInfo = function(restbase, req) { var rp = req.params; var path = [rp.domain, 'sys', 'page_revisions', 'page', rbUtil.normalizeTitle(rp.title)]; if (/^(?:[0-9]+)$/.test(rp.revision)) { path.push(rp.revision); } else if (rp.revision) { throw new Error("Invalid revision: " + rp.revision); } return restbase.get({ uri: new URI(path) }) .then(function(res) { return res.body.items[0]; }); }; PSP.getFormat = function(format, restbase, req) { var self = this; var rp = req.params; rp.title = rbUtil.normalizeTitle(rp.title); function generateContent(storageRes) { if (storageRes.status === 404 || storageRes.status === 200) { return self.getRevisionInfo(restbase, req) .then(function(revInfo) { rp.revision = revInfo.rev + ''; if (revInfo.title !== rp.title) { // Re-try to retrieve from storage with the // normalized title & revision rp.title = revInfo.title; return self.getFormat(format, restbase, req); } else { return self.generateAndSave(restbase, req, format, storageRes); } }); } else { // Don't generate content if there's some other error. throw storageRes; } } var contentReq = restbase.get({ uri: self.getBucketURI(rp, format, rp.tid) }); if (req.headers && /no-cache/i.test(req.headers['cache-control']) && rp.revision) { // Check content generation either way contentReq = contentReq.then(function(res) { if (req.headers['if-unmodified-since']) { try { var jobTime = Date.parse(req.headers['if-unmodified-since']); var revInfo = rbUtil.parseETag(res.headers.etag); if (revInfo && uuid.fromString(revInfo.tid).getDate() >= jobTime) { // Already up to date, nothing to do. return { status: 412, body: { type: 'precondition_failed', detail: 'The precondition failed' } }; } } catch (e) {} // Ignore errors from date parsing } return generateContent(res); }, generateContent); } else { // Only (possibly) generate content if there was an error contentReq = contentReq.then(function(res) { return self.wrapContentReq(restbase, req, P.resolve(res), format); }, generateContent // No need to wrap generateContent ); } return contentReq .then(function(res) { if (res && res.headers && !/^application\/json/.test(res.headers['content-type'])) { res.headers['Content-Security-Policy'] = rbUtil.constructCSP(rp.domain, { allowInline: true }); } return res; }); }; PSP.listRevisions = function(format, restbase, req) { var self = this; var rp = req.params; var revReq = { uri: new URI([rp.domain, 'sys', 'key_rev_value', 'parsoid.' + format, rbUtil.normalizeTitle(rp.title), '']), body: { limit: restbase.rb_config.default_page_size } }; if (req.query.page) { revReq.body.next = restbase.decodeToken(req.query.page); } return restbase.get(revReq) .then(function(res) { if (res.body.next) { res.body._links = { next: { href: "?page=" + restbase.encodeToken(res.body.next) } }; } return res; }); }; PSP._getOriginalContent = function(restbase, req, revision, tid) { var rp = req.params; function get(format) { var path = [rp.domain, 'sys', 'parsoid', format, rbUtil.normalizeTitle(rp.title), revision]; if (tid) { path.push(tid); } return restbase.get({ uri: new URI(path) }) .then(function(res) { if (res.body && Buffer.isBuffer(res.body)) { res.body = res.body.toString(); } return { headers: { 'content-type': res.headers['content-type'] }, body: res.body }; }); } return P.props({ html: get('html'), 'data-parsoid': get('data-parsoid') }) .then(function(res) { res.revid = revision; return res; }); }; PSP.transformRevision = function(restbase, req, from, to) { var self = this; var rp = req.params; var tid; if (from === 'html') { if (req.headers && req.headers['if-match'] && rbUtil.parseETag(req.headers['if-match'])) { // Prefer the If-Match header tid = rbUtil.parseETag(req.headers['if-match']).tid; } else if (req.body && req.body.html) { // Fall back to an inline meta tag in the HTML var tidMatch = // .exec(req.body.html); tid = tidMatch && tidMatch[1]; } } return this._getOriginalContent(restbase, req, rp.revision, tid) .then(function(original) { // Check if parsoid metadata is present as it's required by parsoid. if (!original['data-parsoid'].body || original['data-parsoid'].body.constructor !== Object || !original['data-parsoid'].body.ids) { throw new rbUtil.HTTPError({ status: 400, body: { type: 'invalid_request', description: 'The page/revision has no associated Parsoid data' } }); } var body2 = { original: original }; if (from === 'sections') { var sections = req.body.sections; if (req.body.sections.constructor !== Object) { try { sections = JSON.parse(req.body.sections.toString()); } catch (e) { // Catch JSON parsing exception and return 400 throw new rbUtil.HTTPError({ status: 400, body: { type: 'invalid_request', description: 'Invalid JSON provided in the request' } }); } } body2.html = { body: replaceSections(original, sections) }; from = 'html'; } else { body2[from] = req.body[from]; } // For now, simply pass this through. // See https://phabricator.wikimedia.org/T106909 for the discussion // about the longer term plan. if (req.body.scrubWikitext) { body2.scrubWikitext = true; } var path = [rp.domain, 'sys', 'parsoid', 'transform', from, 'to', to]; if (rp.title) { path.push(rbUtil.normalizeTitle(rp.title)); if (rp.revision) { path.push(rp.revision); } } var newReq = { uri: new URI(path), params: req.params, headers: { 'content-type': 'application/json' }, body: body2 }; return self.callParsoidTransform(restbase, newReq, from, to); }); }; PSP.callParsoidTransform = function callParsoidTransform(restbase, req, from, to) { var rp = req.params; // Parsoid currently spells 'wikitext' as 'wt' var parsoidTo = to; if (to === 'wikitext') { parsoidTo = 'wt'; } else if (to === 'html') { // Retrieve pagebundle whenever we want HTML parsoidTo = 'pagebundle'; } var parsoidExtras = []; if (rp.title) { parsoidExtras.push(rbUtil.normalizeTitle(rp.title)); } else { // Fake title to avoid Parsoid error: <400/No title or wikitext was provided> parsoidExtras.push('Main_Page'); } if (rp.revision) { parsoidExtras.push(rp.revision); } var parsoidExtraPath = parsoidExtras.map(encodeURIComponent).join('/'); if (parsoidExtraPath) { parsoidExtraPath = '/' + parsoidExtraPath; } var domain = rp.domain; // Re-map test domain if (domain === 'en.wikipedia.test.local') { domain = 'en.wikipedia.org'; } var parsoidReq = { uri: this.parsoidHost + '/v2/' + domain + '/' + parsoidTo + parsoidExtraPath, headers: { 'content-type': 'application/json' }, body: req.body }; - parsoidReq = rbUtil.copyForwardedHeaders(restbase, parsoidReq); + rbUtil.copyForwardedHeaders(restbase, parsoidReq); return restbase.post(parsoidReq); }; /** * Cheap body.innerHTML extraction. * * This is safe as we know that the HTML we are receiving from Parsoid is * serialized as XML. */ function cheapBodyInnerHTML(html) { var match = /]*>([\s\S]*)<\/body>/.exec(html); if (!match) { throw new Error('No HTML body found!'); } else { return match[1]; } } /** * Replaces sections in original content with sections provided in sectionsJson */ function replaceSections(original, sectionsJson) { var sectionOffsets = original['data-parsoid'].body.sectionOffsets; var newBody = cheapBodyInnerHTML(original.html.body); var sectionIds = Object.keys(sectionsJson); var illegalId = sectionIds.some(function(id) { return !sectionOffsets[id]; }); if (illegalId) { throw new rbUtil.HTTPError({ status: 400, body: { type: 'invalid_request', description: 'Invalid section ids' } }); } sectionIds.sort(function(id1, id2) { return sectionOffsets[id2].html[0] - sectionOffsets[id1].html[0]; }) .forEach(function(id) { var offset = sectionOffsets[id]; newBody = newBody.substring(0, offset.html[0]) + sectionsJson[id] + newBody.substring(offset.html[1], newBody.length); }); return '' + newBody + ''; } PSP.makeTransform = function(from, to) { var self = this; return function(restbase, req) { var rp = req.params; if (!req.body[from]) { throw new rbUtil.HTTPError({ status: 400, body: { type: 'invalid_request', description: 'Missing request parameter: ' + from } }); } var transform; if (rp.revision) { transform = self.transformRevision(restbase, req, from, to); } else { transform = self.callParsoidTransform(restbase, req, from, to); } return transform .then(function(res) { // Unwrap to the flat response format var innerRes = res.body[to]; innerRes.status = 200; // Handle bodyOnly flag if (to === 'html' && req.body.bodyOnly) { innerRes.body = cheapBodyInnerHTML(innerRes.body); } return innerRes; }); }; }; module.exports = function(options) { var ps = new ParsoidService(options); return { spec: spec, operations: ps.operations, // Dynamic resource dependencies, specific to implementation resources: [ { uri: '/{domain}/sys/key_rev_value/parsoid.html', body: { revisionRetentionPolicy: { type: 'latest', count: 1, grace_ttl: 86400 }, valueType: 'blob', version: 1 } }, { uri: '/{domain}/sys/key_rev_value/parsoid.wikitext', body: { valueType: 'blob' } }, { uri: '/{domain}/sys/key_rev_value/parsoid.data-parsoid', body: { revisionRetentionPolicy: { type: 'latest', count: 1, grace_ttl: 86400 }, valueType: 'json', version: 1 } }, { uri: '/{domain}/sys/key_rev_value/parsoid.section.offsets', body: { revisionRetentionPolicy: { type: 'latest', count: 1, grace_ttl: 86400 }, valueType: 'json', version: 1 } }, { uri: '/{domain}/sys/key_rev_value/parsoid.data-mw', body: { valueType: 'json' } } ] }; }; diff --git a/package.json b/package.json index 087e1aa..4c1f102 100644 --- a/package.json +++ b/package.json @@ -1,61 +1,61 @@ { "name": "restbase", "version": "0.7.11", "description": "REST storage and service dispatcher", "main": "server.js", "scripts": { "start": "service-runner", "test": "sh test/utils/run_tests.sh test", "coverage": "sh test/utils/run_tests.sh coverage", "coveralls": "cat ./coverage/lcov.info | coveralls" }, "repository": { "type": "git", "url": "git://github.com/wikimedia/restbase.git" }, "keywords": [ "REST", "API", "routing", "orchestration", "storage", "buckets", "tables", "queues", "cassandra", "kafka" ], "author": "Wikimedia Service Team ", "license": "Apache2", "bugs": { "url": "https://phabricator.wikimedia.org/tag/restbase/" }, "homepage": "https://github.com/wikimedia/restbase", "dependencies": { "bluebird": "2.8.2", "busboy": "^0.2.9", "js-yaml": "^3.3.1", "jsonwebtoken": "^5.0.1", "cassandra-uuid": "^0.0.2", "preq": "^0.4.3", "restbase-mod-table-cassandra": "^0.7.11", "service-runner": "^0.2.0", - "swagger-router": "^0.1.0", + "swagger-router": "^0.1.1", "swagger-ui": "git+https://github.com/wikimedia/swagger-ui#master", "tassembly": "^0.1.4" }, "devDependencies": { "bunyan": "^1.4.0", "coveralls": "^2.11.2", "heapdump": "^0.3.5", "istanbul": "^0.3.15", "mocha": "^2.2.5", "mocha-jshint": "^2.2.3", "mocha-lcov-reporter": "^0.0.2", "swagger-test": "0.2.0", "url-template": "^2.0.6", "nock": "^2.6.0", "restbase-mod-table-sqlite": "^0.1.0", "mocha-jscs": "^1.2.0" } } diff --git a/test/features/router/buildTree.js b/test/features/router/buildTree.js index 628e659..9d9b897 100644 --- a/test/features/router/buildTree.js +++ b/test/features/router/buildTree.js @@ -1,168 +1,205 @@ 'use strict'; // mocha defines to avoid JSHint breakage /* global describe, it, before, beforeEach, after, afterEach */ var fs = require('fs'); var yaml = require('js-yaml'); var assert = require('assert'); var Router = require('../../../lib/router'); var loadConfig = require('../../utils/server').loadConfig; var router = new Router(); var rootSpec = { paths: { '/{domain:en.wikipedia.test.local}/v1': { 'x-subspecs': [ { paths: { '/page/{title}/html': { get: { 'x-backend-request': { uri: '/{domain}/sys/parsoid/html/{title}' } } } } } ] } } }; var faultySpec = { paths: { '/{domain:en.wikipedia.test.local}': { 'x-subspecs': ['some/non/existing/spec'] } } }; var additionalMethodSpec = { paths: { '/{domain:en.wikipedia.test.local}/v1': { 'x-subspecs': [ { paths: { '/page/{title}/html': { get: { 'x-backend-request': { uri: '/{domain}/sys/parsoid/html/{title}' } } } } }, { paths: { '/page/{title}/html': { post: { 'x-backend-request': { uri: '/{domain}/sys/parsoid/html/{title}' } } } } } ] } } }; var overlappingMethodSpec = { paths: { '/{domain:en.wikipedia.test.local}/v1': { 'x-subspecs': [ { paths: { '/page/{title}/html': { get: { 'x-backend-request': { uri: '/{domain}/sys/parsoid/html/{title}' } } } } }, { paths: { '/page/{title}/html': { get: { 'x-backend-request': { uri: '/{domain}/sys/parsoid/html/{title}' } } } } } ] } } }; +var nestedSecuritySpec = { + paths: { + '/{domain:en.wikipedia.test.local}/v1': { + 'x-subspecs': [ + { + paths: { + '/page': { + 'x-subspec': { + paths: { + '/secure': { + get: { + 'x-backend-request': { + uri: '/{domain}/sys/parsoid/html/{title}' + } + } + } + } + }, + security: [ 'second', 'third' ] + } + } + } + ], + security: [ 'first' ] + } + } +}; + +var fullSpec = loadConfig('config.example.yaml'); var fullSpec = loadConfig('config.test.yaml'); describe('tree building', function() { it('should build a simple spec tree', function() { return router.loadSpec(rootSpec) .then(function() { //console.log(JSON.stringify(router.tree, null, 2)); var handler = router.route('/en.wikipedia.test.local/v1/page/Foo/html'); //console.log(handler); assert.equal(!!handler.value.methods.get, true); assert.equal(handler.params.domain, 'en.wikipedia.test.local'); assert.equal(handler.params.title, 'Foo'); }); }); it('should fail loading a faulty spec', function() { return router.loadSpec(faultySpec) .then(function() { throw new Error("Should throw an exception!"); }, function(e) { // exception thrown as expected return; }); }); it('should build the example config spec tree', function() { var resourceRequests = []; return router.loadSpec(fullSpec.spec, { request: function(req) { resourceRequests.push(req); } }) .then(function() { //console.log(JSON.stringify(router.tree, null, 2)); var handler = router.route('/en.wikipedia.test.local/v1/page/html/Foo'); //console.log(handler); assert.equal(resourceRequests.length > 0, true); assert.equal(!!handler.value.methods.get, true); assert.equal(handler.params.domain, 'en.wikipedia.test.local'); assert.equal(handler.params.title, 'Foo'); }); }); it('should allow adding methods to existing paths', function() { return router.loadSpec(additionalMethodSpec) .then(function() { var handler = router.route('/en.wikipedia.test.local/v1/page/Foo/html'); assert.equal(!!handler.value.methods.get, true); assert.equal(!!handler.value.methods.post, true); }); }); it('should on overlapping methods on the same path', function() { return router.loadSpec(additionalMethodSpec) .then(function() { throw new Error("Should throw an exception!"); }, function(e) { // exception thrown as expected return; }); }); + + it('should parse permission along the path to endpoint', function() { + return router.loadSpec(nestedSecuritySpec) + .then(function() { + var handler = router.route('/en.wikipedia.test.local/v1/page/secure'); + assert.deepEqual(handler.permissions, ['first', 'second', 'third']); + }); + }); });