diff --git a/routes/article.creation.morelike.js b/routes/article.creation.morelike.js index f57544a..a9a8285 100644 --- a/routes/article.creation.morelike.js +++ b/routes/article.creation.morelike.js @@ -1,75 +1,77 @@ 'use strict'; const BBPromise = require("bluebird"); const mysql = require('mysql'); const util = require('../lib/util'); const aUtil = require('../lib/article.creation.morelike'); const router = util.router(); let app; /** * GET /{seed} * Gets missing articles (from the current wiki) similar to seed. * * Similar articles are retrieved using a CirrusSearch morelike query. * Articles are prioritized using translation recommendations * predictions. */ router.get('/:seed', (req, res) => { const domain = req.params.domain; const domainParts = domain.split('.'); const language = domainParts[0]; // e.g. en const projectDomain = domainParts.splice(1).join('.'); const sourceLanguages = app.conf.article.translation_models[language] || null; if (!sourceLanguages) { app.logger.log('error/article.creation.morelike', `Article translation model for "${language}" doesn't exist.`); - throw new util.HTTPError({ - status: 400 + const errorObject = new util.HTTPError({ + status: 400, + message: "Aritcle recommendations for the domain don't exit." }); + return BBPromise.reject(errorObject); } return aUtil.getWikidataId(app, domain, req.params.seed).then((id) => { return aUtil.getSimilarArticles(app, projectDomain, id, sourceLanguages) .then((ids) => { const errorObject = new util.HTTPError({ status: 404 }); if (!ids.length) { return BBPromise.reject(errorObject); } else { return aUtil.getArticleScoresFromDb(app, ids, language) .then((results) => { res.json(results); }) .catch((error) => { return BBPromise.reject(errorObject); }); } }); }); }); module.exports = function(appObj) { app = appObj; const mysqlConf = app.conf.mysql_conn; const hostPort = mysqlConf.host.split(':'); app.mysqlPool = mysql.createPool({ connectionLimit: mysqlConf.limit, host: hostPort[0], port: hostPort[1] || 3306, user: mysqlConf.user, password: mysqlConf.pass, database: mysqlConf.name }); return { path: '/article/creation/morelike', api_version: 1, router }; }; diff --git a/routes/article.creation.translation.js b/routes/article.creation.translation.js index 0f4e5c1..f2dbf0e 100644 --- a/routes/article.creation.translation.js +++ b/routes/article.creation.translation.js @@ -1,72 +1,72 @@ 'use strict'; const util = require('../lib/util'); const tUtil = require('../lib/article.creation.translation'); const router = util.router(); let app; /** * Regular expression used for validating the source parameter * @type {RegExp} */ const sourceValidator = /^[a-zA-Z]+(-[a-zA-Z]+)*$/; function recommend(req, res, source, target, projectDomain, seed) { if (!sourceValidator.test(source)) { throw new util.HTTPError({ status: 400, type: 'bad_request', title: 'Bad request', detail: 'source parameter was invalid' }); } let count = 24; if (req.query && req.query.count) { count = parseInt(req.query.count, 10); if (isNaN(count) || count < 1 || count > 500) { throw new util.HTTPError({ status: 400, type: 'bad_request', title: 'Bad request', detail: 'count parameter was invalid' }); } } return tUtil.recommend(app, source, target, projectDomain, seed) .then((result) => { result = result.slice(0, count); res.json({ count: result.length, items: result }); }); } /** * GET /{source}/{seed} * Gets the articles existing in source but missing in domain based on seed. */ router.get('/:source/:seed?', (req, res) => { const domainParts = req.params.domain.split('.'); const target = domainParts[0]; const projectDomain = domainParts.splice(1).join('.'); return recommend(req, res, req.params.source, target, - projectDomain, req.params.seed); + projectDomain, req.params.seed); }); module.exports = function(appObj) { app = appObj; return { path: '/article/creation/translation', api_version: 1, router }; }; diff --git a/spec.yaml b/spec.yaml index e3631e4..36fa7e4 100644 --- a/spec.yaml +++ b/spec.yaml @@ -1,145 +1,201 @@ swagger: '2.0' info: termsOfService: https://wikimediafoundation.org/wiki/Terms_of_Use contact: name: the Wikimedia Research team url: https://www.mediawiki.org/wiki/Wikimedia_Research license: name: Apache2 url: http://www.apache.org/licenses/LICENSE-2.0 paths: # from routes/root.js /robots.txt: get: tags: - Root - Robots description: Gets robots.txt x-amples: - title: robots.txt check request: {} response: status: 200 headers: user-agent: '*' disallow: '/' /: get: tags: - Root description: The root service end-point produces: - application/json x-amples: - title: root with no query params request: {} response: status: 404 - title: spec from root request: query: spec: true response: status: 200 - title: doc from root request: query: doc: true response: status: 200 - title: root with wrong query param request: query: fooo: true response: status: 404 # from routes/article.creation.translation.js /{domain}/v1/article/creation/translation/{source}{/seed}: get: tags: - Recommend summary: Recommend missing articles description: | Recommends articles to be translated from the source to the domain language. Stability: [unstable](https://www.mediawiki.org/wiki/API_versioning#Unstable) produces: - applicaiton/json parameters: - name: source in: path description: The source language code type: string required: true - name: domain in: path description: The target domain type: string required: true - name: seed in: path description: The article to use as a search seed type: string required: false - name: count in: query description: The max number of articles to return type: int required: false default: 24 x-amples: - - title: normal source and target + - title: article.creation.translation — normal source and target request: params: source: en domain: de.wikipedia.org response: status: 200 headers: content-type: application/json - - title: normal source and target with seed + - title: article.creation.translation — normal source and target with seed request: params: source: en domain: de.wikipedia.org seed: Apple response: status: 200 headers: content-type: application/json # # TODO: Find a solution for this case -# - title: bad source +# - title: article.creation.translation — bad source # request: # params: # source: qqq # domain: de.wikipedia.org # response: # status: 504 # headers: # content-type: application/json - - title: bad seed + - title: article.creation.translation — bad seed request: params: source: en domain: de.wikipedia.org seed: thishsouldnotreturnanyresultsfromthesearchapi response: status: 404 headers: content-type: application/json - - title: invalid count + - title: article.creation.translation — invalid count request: params: source: en domain: de.wikipedia.org query: count: -123 response: status: 400 - - title: incorrectly formatted source + - title: article.creation.translation — incorrectly formatted source request: params: source: en- domain: de.wikipedia.org response: status: 400 + /{domain}/v1/article/creation/morelike/{seed}: + get: + tags: + - Recommendation + summary: Recommend missing articles + description: | + Recommends articles similar to the seed article but are missing + from the domain language Wikipedia. + + Stability: [unstable](https://www.mediawiki.org/wiki/API_versioning#Unstable) + produces: + - application/json + - application/problem+json + parameters: + - name: seed_article + in: path + description: The article title used to search similar but missing articles + type: string + required: true + responses: + '200': + description: the prioritized list of Wikidata IDs recommended for creation as Wikipedia articles + schema: + $ref: '#/definitions/morelike_result' + default: + description: Error + schema: + $ref: '#/definitions/problem' + x-amples: + - title: article.creation.morelike — good article title + request: + params: + seed: Kitob + domain: uz.wikipedia.org + response: + status: 200 + headers: + content-type: application/json + - title: article.creation.morelike — bad article title + request: + params: + seed: kitob-missing + domain: uz.wikipedia.org + response: + status: 404 + headers: + content-type: application/json + - title: article.creation.morelike — missing models + request: + params: + seed: Book + domain: blah.wikipedia.org + response: + status: 400 + headers: + content-type: application/json