diff --git a/config.test.yaml b/config.test.yaml index 2c336e6..31ea97b 100644 --- a/config.test.yaml +++ b/config.test.yaml @@ -1,295 +1,294 @@ # RESTBase config info: name: restbase templates: wmf-content-1.0.0: &wp/content/1.0.0 swagger: '2.0' # swagger options, overriding the shared ones from the merged specs (?) info: version: 1.0.0-beta title: Wikimedia REST API description: > This API aims to provide coherent and low-latency access to Wikimedia content and services. It is currently in beta testing, so things aren't completely locked down yet. Each entry point has explicit stability markers to inform you about development status and change policy, according to [our API version policy](https://www.mediawiki.org/wiki/API_versioning). ### High-volume access - Don't perform more than 500 requests/s to this API. - Set a unique `User-Agent` header that allows us to contact you quickly. Email addresses or URLs of contact pages work well. - Consider using our [HTML dumps](https://phabricator.wikimedia.org/T17017) once they become available. termsOfService: https://wikimediafoundation.org/wiki/Terms_of_Use contact: name: the Wikimedia Services team url: http://mediawiki.org/wiki/RESTBase license: name: Apache2 url: http://www.apache.org/licenses/LICENSE-2.0 x-subspecs: - mediawiki/v1/content - mediawiki_v1_graphoid - mediawiki/v1/mobileapps - test wmf-sys-1.0.0: &wp/sys/1.0.0 info: title: Default MediaWiki sys API module version: 1.0.0 paths: /{module:table}: &wp/sys/table # Can use this anchor to share the table x-modules: # There can be multiple modules too per stanza, as long as the # exported symbols don't conflict. The operationIds from the spec # will be resolved against all of the modules. - name: restbase-mod-table-cassandra version: 1.0.0 type: npm options: # Passed to the module constructor conf: hosts: [localhost] keyspace: system username: cassandra password: cassandra defaultConsistency: one # or 'one' for single-node testing storage_groups: - name: test.group.local domains: - /test\..*\.org$/ - /test\.local$/ - name: default.group.local domains: /./ /{module:page_revisions}: x-modules: - name: page_revisions version: 1.0.0 type: file /{module:key_value}: x-modules: - name: key_value version: 1.0.0 type: file /{module:key_rev_value}: x-modules: - name: key_rev_value version: 1.0.0 type: file /{module:parsoid}: x-modules: - name: parsoid version: 1.0.0 type: file options: parsoidHost: http://parsoid-lb.eqiad.wikimedia.org # For local testing, use: # parsoidHost: http://localhost:8000 /{module:graphoid}: x-modules: - name: simple_service version: 1.0.0 type: file options: paths: /v1/png/{title}/{revision}/{graph_id}: get: on_request: - get_from_graphoid: request: uri: http://graphoid.wikimedia.org/en.wikipedia.org/v1/png/{title}/{revision}/{graph_id} /{module:mobileapps}: x-modules: - name: simple_service version: 1.0.0 type: file options: paths: /v1/html/{title}: get: on_request: - get_from_backend: request: uri: http://appservice.wmflabs.org/en.wikipedia.org/v1/page/mobile-html/{title} /v1/sections/{title}: get: on_request: - get_from_backend: request: uri: http://appservice.wmflabs.org/en.wikipedia.org/v1/page/mobile-html-sections/{title} /v1/sections-lead/{title}: get: on_request: - get_from_backend: request: uri: http://appservice.wmflabs.org/en.wikipedia.org/v1/page/mobile-html-sections-lead/{title} /v1/sections-remaining/{title}: get: on_request: - get_from_backend: request: uri: http://appservice.wmflabs.org/en.wikipedia.org/v1/page/mobile-html-sections-remaining/{title} /v1/text/{title}: get: on_request: - get_from_backend: request: uri: http://appservice.wmflabs.org/en.wikipedia.org/v1/page/mobile-text/{title} /{module:action}: x-modules: - name: action type: file options: apiRequest: method: post uri: http://en.wikipedia.org/w/api.php headers: # the domain is en.wikipedia.test.local, so we can't use it. use en.wikipedia.org instead host: 'en.wikipedia.org' body: '{$.request.body}' /{module:testservice}: x-modules: - name: simple_service version: 1.0.0 type: file options: paths: /test/{key}: get: on_setup: - init_storage: uri: /{domain}/sys/key_value/testservice.test on_request: - get_from_storage: request: uri: /{domain}/sys/key_value/testservice.test/{key} headers: 'cache-control': '{cache-control}' return_if: status: 200 catch: status: 404 - get_from_api: request: uri: http://en.wikipedia.org/wiki/{+key} body: '{$.request.body}' - store: request: method: put uri: /{domain}/sys/key_value/testservice.test/{key} headers: '{$.get_from_api.headers}' body: '{$.get_from_api.body}' - return_response: return: status: '{$.get_from_api.status}' headers: 'content-type': '{$.get_from_api.headers.content-type}' 'etag': '{$.store.headers.etag}' body: '{$.get_from_api.body}' /{module:testservice_parallel}: x-modules: - name: simple_service version: 1.0.0 type: file options: paths: /test/{key1}/{key2}: get: on_request: - get_from_api1: request: uri: http://en.wikipedia.org/wiki/{+key1} get_from_api2: request: uri: http://en.wikipedia.org/wiki/{+key2} - return_response: return: status: 200 body: first: '{$.get_from_api1}' second: '{$.get_from_api2}' /{module:page_save}: x-modules: - name: page_save type: file wp-default-1.0.0: &wp/default/1.0.0 x-subspecs: - paths: /{api:v1}: x-subspec: *wp/content/1.0.0 - paths: /{api:sys}: x-subspec: *wp/sys/1.0.0 wp-secure-1.0.0: &wp/secure/1.0.0 x-subspecs: - paths: /{api:v1}: x-subspec: *wp/content/1.0.0 security: - mediawiki_auth: - read - paths: /{api:sys}: x-subspec: *wp/sys/1.0.0 spec: &spec title: "The RESTBase root" # Some more general RESTBase info paths: # test domain /{domain:en.wikipedia.test.local}: *wp/default/1.0.0 - /{domain:fr.wikipedia.test.local}: *wp/default/1.0.0 /{domain:secure.wikipedia.test.local}: *wp/secure/1.0.0 services: - name: restbase module: ./lib/server conf: port: 7231 spec: *spec salt: secret default_page_size: 1 internal_request_whitelist: - http://en.wikipedia.org/w/api.php # Left as a regex for test purpose - /http:\/\/parsoid\-lb\.eqiad\.wikimedia\.org/ test: content_types: html: text/html;profile="mediawiki.org/specs/html/1.1.0";charset=utf-8 data-parsoid: application/json;profile="mediawiki.org/specs/data-parsoid/0.0.1" wikitext: text/plain;profile="mediawiki.org/specs/wikitext/1.0.0";charset=utf-8 logging: name: restbase level: info #streams: ## XXX: Use gelf-stream -> logstash #- type: gelf # host: <%= @logstash_host %> # port: <%= @logstash_port %> metrics: #type: txstatsd #host: localhost #port: 8125 diff --git a/doc/Implementation.md b/doc/Implementation.md index 0e23c5d..c6c116d 100644 --- a/doc/Implementation.md +++ b/doc/Implementation.md @@ -1,210 +1,211 @@ # RESTBase Implementation ## Code structure - modules in separate npm packages - `restbase-tables-cassandra` - `restbase-queues-kafka` - `restbase-mod-parsoid` Tree: ``` restbase.js lib/ storage.js util.js # XXX: not quite final yet config.yaml specs/ restbase/ sys/ key_rev_value.yaml key_rev_service.yaml table.yaml # defining operationIds, which map to module exports mediawiki/ v1/ content.yaml sys/ parsoid.yaml page_revision.yaml doc/ test/ ``` ## Spec loading Converts a spec tree into a route object tree, ready to be passed to `swagger-router`. Can be passed into Router.addSpec as a handler. - parameters: - spec - check global nodeMap.get(spec) - if found, just use the existing sub-tree (`parentNode.set()`) and return - specToTree: spec -> { children: [] - look for - for each x-restbase directly inside of path entries (*not* inside of methods) - if `modules` is defined, load them and check for duplicate symbols - if `specs` is defined, load them and apply spec loader recursively, passing in modules and prefix path - if `resources` is defined, add them to a global list, with ref back to the original spec - call them later on complete tree (should we *only* do PUT?) - on error, complain really loudly and either bail out completely or keep going (config) - could also consider blacklisting modules / paths based on this; perhaps re-build the tree unless we can `.delSpec()` by then - for each x-restbase inside of methods inside of path entries - if `service` is defined, construct a method that resolves the backend path - else, check if `operationId` is defined in passed-in modules - in cases where we can be sure that the matching end point will be static, we can cache the result (with a method to map parameters, possibly inferred from a wildcard mapping or by passing in unique strings & looking for them in the final parameters) Result: tree with spec nodes like this: ```javascript { path: new URI(pathFragment), spec: specObj, // reference to the original spec object, for documentation value: valueObject, // optionally: children: [childObj, childObj], // child specs, one for each specs: // declaration } ``` `valueObject` might look like this: ```javascript { acl: {}, // TODO: figure out handler: handlerFn, // signature: f(restbase, req), as currently // more properties extracted from the spec as needed (ex: content-types // for sanitization) } ``` For router setup, each path down the spec tree is passed to the router as an array: `addSpecs([specRootNode, specNode2, specNode3])`. We *could* also pass the entire tree, but that'd be less flexible for dynamic updates later. In any case, passing in an array of spec nodes lets us check each spec node for presence in the `_nodes` map before creating a subtree for it. This will naturally establish sharing at the highest possible spec boundary. Dynamic updates later without a full rebuild won't be trivial with sharing. A good compromise could be to always rebuild an entire domain on any change. (So back do passing trees, except that they are not the root tree?) For ACLs the DAG structure is leveraged by checking ACLs all the way down the path. This would allows us to restrict access at the domain level, for the entire domain, while still sharing sub-trees. To avoid tight coupling of the router to the actual ACL implementation we can have `lookup(path)` (optionally) return an array of all value objects encountered in a successful lookup in addition to the actual lookup result / leaf valueObject. Currently router returns a list of required permissions collected along the path. In the spec, an ACL for a sub-path looks like this: ```yaml paths: /{domain:en.wikipedia.org}: x-subspecs: - paths: /{api:v1}: x-subspec: *wp/content/1.0.0 - security: # list of required permissions - - read + security: # list of required security requirement objects with permissions + - mediawiki_auth + - read ``` The effective required capabilities (aka roles|scopes|..) for a given route are the union of the path-induced ones with those defined on the route handler itself. This means that path-based ACLs can only add to the required capabilities for subtree access, effectively locking them down further. The result should be fairly predictable behavior. Most of the ACL customizations between different wikis would happen at the authorization level anyway (mapping of identity to capabilities), which means that tree ACLs don't absolutely need to differ between public and private wikis. TODO: Actually think this through more thoroughly. ## Internal request & response objects ### Request ```javascript { uri: '/v1/foo', // required // optional from here method: 'GET', // default: 'GET', query: { q: 'some query parameter' } headers: { 'Cache-control': 'no-cache' }, body: 'Hello world' } ``` #### `uri` The URI of the resource. Required. Can be a string, or a `swagger-router.URI` object. #### `method` [optional] HTTP request method. Default `GET`. Examples: `GET`, `POST`, `PUT`, `DELETE` etc. #### `query` [optional] Map of URI query parameters. #### `headers` [optional] Map of HTTP request headers. #### `body` [optional] Request data: - `string` - Incoming: Set for incoming requests with `text/*` content type. - Outgoing: Sent as UTF8 string by default. `Content-Type` set to `text/plain` if not provided. - `Buffer` - Incoming: Returned for non-text content types. - Outgoing: Sent as binary data if the content-type allows it. Default content type if not set: `application/binary`. - `Object` - Incoming: `application/json` request or `POST` data - Outgoing: Sent as JSON. `Content-Type` set to `application/json` if not set. ### Response ```javascript { body: 'Hello world', // default: '' status: 200, // default: 200 headers: { 'Cache-control': 'no-cache' } } ``` #### `body` [optional] Default value: Empty string. Request data: - `string`: Sent as UTF8 string by default. `Content-Type` set to `text/plain` if not provided. - `Buffer`: If no `text/*` `Content-Type` set, sent as binary string with `Content-Type` of `application/binary`. - `Object`: Sent as JSON. `Content-Type` set to `application/json` if not set. #### `status` [optional] The HTTP response status. - Number: `200` - Array of status code & reason string: `[200,"OK"]` #### `headers` [optional] Map of response HTTP headers. ## API doc alternatives - RAML - API-Blueprint - http://www.slideshare.net/SmartBear_Software/api-strat-2014metadataformatsshort - http://apiux.com/2013/04/09/rest-metadata-formats/ - JSON schema hypermedia http://json-schema.org/latest/json-schema-hypermedia.html diff --git a/lib/auth.js b/lib/auth.js index 76b5f91..19a3306 100644 --- a/lib/auth.js +++ b/lib/auth.js @@ -1,100 +1,104 @@ "use strict"; var P = require('bluebird'); var rbUtil = require('./rbUtil'); var URI = require('swagger-router').URI; var auth = {}; /** * Different security requirement objects could be collected * along the path with different required permissions. * * Here we iterate over objects and join permissions for duplicated types * of checks * * @param permissions {Array} of security requirement objects. + * @param existing {Object} an existing collection of security requirements * @return {Object} a collection of joined security requirements */ -function joinPermissions(permissions) { +auth.joinPermissions = function(permissions, existing) { var result = {}; + if (existing) { + Object.keys(existing).forEach(function(permKey) { + result[permKey] = new Set(existing[permKey]); + }); + } permissions.forEach(function(permObj) { var names = Object.keys(permObj); if (names.length !== 1) { throw new Error('Invalid security requirement object: ' + JSON.stringify(permObj)); } var name = names[0]; if (!result[name]) { result[name] = new Set(permObj[name]); } else { permObj[name].forEach(function(perm) { result[name].add(perm); }); } }); return result; -} +}; /** * A collection of known security requirement object checkers. * Identified by the security requirement name, and contains a function * that takes RESTBase, request and a Set of rights as an input and verifies * that all rights are present. */ auth.checkers = {}; auth.checkers.mediawiki_auth = function(restbase, req, permissions) { var checkReq = { uri: new URI([req.params.domain, 'sys', 'action', 'query']), method: 'post', body: { meta: 'userinfo', uiprop: 'rights' } }; return restbase.post(checkReq) .then(function(userInfo) { if (userInfo.rights && Array.isArray(userInfo.rights)) { permissions.forEach(function(perm) { if (userInfo.rights.indexOf(perm) < 0) { throw new rbUtil.HTTPError({ status: 401, body: { type: 'unauthorized', title: 'Not authorized to access the resource', description: 'Need permission ' + perm } }); } }); } else { throw new rbUtil.HTTPError({ status: 400, body: { type: 'invalid_request', title: 'Failed to check permissions for the request' } }); } }); }; /** * Checks against MW api if all the required permissions are present. * In case some of the permissions are absent - throws 401 Unauthorized. * In case failed to get permisisons for MW API throws 400 Bad Request. * * @param restbase restbase instance to use * @param req original request - * @param permissions array of security requirement objects collected along the path */ -auth.checkPermissions = function(restbase, req, permissions) { - var joinedPerms = joinPermissions(permissions); - return P.all(Object.keys(joinedPerms).map(function(requirementName) { +auth.checkPermissions = function(restbase, req) { + return P.all(Object.keys(restbase._accessRestrictions).map(function(requirementName) { var check = auth.checkers[requirementName]; if (!check) { throw new Error('Unknown security requirement name: ' + requirementName); } - return check(restbase, req, joinedPerms[requirementName]); + return check(restbase, req, restbase._accessRestrictions[requirementName]); })); }; module.exports = auth; \ No newline at end of file diff --git a/lib/restbase.js b/lib/restbase.js index 149a4c1..999d415 100644 --- a/lib/restbase.js +++ b/lib/restbase.js @@ -1,444 +1,444 @@ 'use strict'; /* * RESTBase request dispatcher and general shared per-request state namespace */ var jwt = require('jsonwebtoken'); var P = require('bluebird'); var rbUtil = require('./rbUtil'); var HTTPError = rbUtil.HTTPError; var preq = require('preq'); var swaggerUI = require('./swaggerUI'); var auth = require('./auth'); // Create a uniform but shallow request object copy with sane defaults. This // keeps code dealing with this request monomorphic (good for perf), and // avoids subtle bugs when requests shared between recursive requests are // mutated in another control branch. At the very minimum, we are mutating the // .params property for each sub-request. function cloneRequest(req) { return { uri: req.uri || req.url || null, method: req.method || 'get', headers: req.headers || {}, query: req.query || {}, body: req.body || null, params: req.params || {} }; } function RESTBase(options, req) { if (options && options.constructor === RESTBase) { // Child instance var par = options; this.log = par.log; this.metrics = par.metrics; this.reqId = par.reqId || req && req.headers && req.headers['x-request-id'] || rbUtil.generateRequestId(); this._parent = par; // Remember the request that led to this child instance at each level, so // that we can provide nice error reporting and tracing. this._req = par._req; this._recursionDepth = par._recursionDepth + 1; this._priv = par._priv; this.rb_config = this._priv.options.conf; this._rootReq = par._rootReq || req; this._forwardedCookies = par._forwardedCookies || this._rootReq.headers && this._rootReq.headers.cookie; this._accessRestrictions = par._accessRestrictions; } else { // Brand new instance this.log = options.log; // Logging method this.metrics = options.metrics; this.reqId = null; // Private this._parent = null; this._req = null; this._recursionDepth = 0; options.maxDepth = options.maxDepth || 10; if (!options.conf.salt || typeof options.conf.salt !== 'string') { throw new Error("Missing or invalid `salt` option in RESTBase config. " + "Expected a string."); } // Private state, shared with child instances this._priv = { options: options, router: options.router }; this.rb_config = options.conf; this.rb_config.user_agent = this.rb_config.user_agent || 'RESTBase'; this._rootReq = null; this._forwardedCookies = null; - this._accessRestrictions = []; + this._accessRestrictions = {}; } } RESTBase.prototype.isInternalRequest = function(req) { if (this.rb_config.internal_request_whitelist) { var uri = req.uri.toString(); return this.rb_config.internal_request_whitelist.some(function(regex) { if (/^\/.+\/$/.test(regex)) { return new RegExp(regex.substring(1, regex.length - 1)).test(uri); } else { return uri.indexOf(regex) === 0; } }); } return false; }; // Sets the request id for this instance and adds it to // the request header, if defined RESTBase.prototype.setRequestId = function(req) { req.headers = req.headers || {}; if (req.headers['x-request-id']) { return; } req.headers['x-request-id'] = this.reqId; }; // Make a child instance RESTBase.prototype.makeChild = function(req) { return new RESTBase(this, req); }; // A default listing handler for URIs that end in / and don't have any // handlers associated with it otherwise. RESTBase.prototype.defaultListingHandler = function(value, restbase, req) { var rq = req.query; if (rq.spec !== undefined && value.specRoot) { var spec = Object.assign({}, value.specRoot, { // Set the base path dynamically basePath: req.uri.toString().replace(/\/$/, '') }); if (req.params.domain === req.headers.host.replace(/:[0-9]+$/, '')) { // This is a host-based request. Set an appropriate base path. spec.basePath = spec['x-host-basePath'] || spec.basePath; } return P.resolve({ status: 200, body: spec }); } else if (rq.doc !== undefined) { // Return swagger UI & load spec from /?spec if (!req.query.path) { req.query.path = '/index.html'; } return swaggerUI(restbase, req); } else if (/\btext\/html\b/.test(req.headers.accept) && req.uri.path.length <= 2) { // Browser request and above api level req.query.path = '/index.html'; var html = '
To retrieve a regular JSON listing, you can either "
+ "omit the Accept
header, or send one that does not contain "
+ "text/html
.