diff --git a/model_info/enwiki.damaging.md b/model_info/enwiki.damaging.md index 15ca4dc..19c6f64 100644 --- a/model_info/enwiki.damaging.md +++ b/model_info/enwiki.damaging.md @@ -1,81 +1,81 @@ Model Information: - type: GradientBoosting - version: 0.4.0 - - params: {'max_features': 'log2', 'subsample': 1.0, 'label_weights': OrderedDict([(True, 10)]), 'random_state': None, 'init': None, 'criterion': 'friedman_mse', 'max_leaf_nodes': None, 'population_rates': None, 'min_impurity_decrease': 0.0, 'learning_rate': 0.01, 'min_impurity_split': None, 'loss': 'deviance', 'warm_start': False, 'max_depth': 7, 'labels': [True, False], 'min_samples_split': 2, 'verbose': 0, 'n_estimators': 700, 'center': True, 'presort': 'auto', 'min_weight_fraction_leaf': 0.0, 'min_samples_leaf': 1, 'multilabel': False, 'scale': True} + - params: {'random_state': None, 'scale': True, 'population_rates': None, 'center': True, 'criterion': 'friedman_mse', 'max_depth': 7, 'loss': 'deviance', 'label_weights': OrderedDict([(True, 10)]), 'labels': [True, False], 'presort': 'auto', 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0.0, 'learning_rate': 0.01, 'init': None, 'subsample': 1.0, 'max_features': 'log2', 'multilabel': False, 'n_estimators': 700, 'max_leaf_nodes': None, 'warm_start': False, 'min_impurity_decrease': 0.0, 'verbose': 0, 'min_impurity_split': None, 'min_samples_split': 2} Environment: - - revscoring_version: '2.2.2' + - revscoring_version: '2.2.5' - platform: 'Linux-4.9.0-6-amd64-x86_64-with-debian-9.4' - machine: 'x86_64' - - version: '#1 SMP Debian 4.9.82-1+deb9u3 (2018-03-02)' + - version: '#1 SMP Debian 4.9.88-1+deb9u1 (2018-05-07)' - system: 'Linux' - processor: '' - python_build: ('default', 'Jan 19 2017 14:11:04') - python_compiler: 'GCC 6.3.0 20170118' - python_branch: '' - python_implementation: 'CPython' - python_revision: '' - python_version: '3.5.3' - release: '4.9.0-6-amd64' Statistics: - counts (n=19455): + counts (n=19436): label n ~True ~False ------- ----- --- ------- -------- - True 751 --> 422 329 - False 18704 --> 731 17973 + True 751 --> 425 326 + False 18685 --> 702 17983 rates: True False ---------- ------ ------- sample 0.039 0.961 population 0.034 0.966 - match_rate (micro=0.913, macro=0.5): + match_rate (micro=0.914, macro=0.5): False True ------- ------ - 0.943 0.057 - filter_rate (micro=0.087, macro=0.5): + 0.944 0.056 + filter_rate (micro=0.086, macro=0.5): False True ------- ------ - 0.057 0.943 - recall (micro=0.947, macro=0.761): + 0.056 0.944 + recall (micro=0.949, macro=0.764): False True ------- ------ - 0.961 0.562 - !recall (micro=0.576, macro=0.761): + 0.962 0.566 + !recall (micro=0.579, macro=0.764): False True ------- ------ - 0.562 0.961 - precision (micro=0.962, macro=0.661): + 0.566 0.962 + precision (micro=0.963, macro=0.666): False True ------- ------ - 0.984 0.337 - !precision (micro=0.359, macro=0.661): + 0.984 0.348 + !precision (micro=0.369, macro=0.666): False True ------- ------ - 0.337 0.984 - f1 (micro=0.954, macro=0.697): + 0.348 0.984 + f1 (micro=0.955, macro=0.702): False True ------- ------ - 0.972 0.421 - !f1 (micro=0.44, macro=0.697): + 0.973 0.431 + !f1 (micro=0.449, macro=0.702): False True ------- ------ - 0.421 0.972 - accuracy (micro=0.947, macro=0.947): + 0.431 0.973 + accuracy (micro=0.949, macro=0.949): False True ------- ------ - 0.947 0.947 - fpr (micro=0.424, macro=0.239): + 0.949 0.949 + fpr (micro=0.421, macro=0.236): False True ------- ------ - 0.438 0.039 + 0.434 0.038 roc_auc (micro=0.924, macro=0.924): False True ------- ------ 0.924 0.924 - pr_auc (micro=0.978, macro=0.722): + pr_auc (micro=0.978, macro=0.724): False True ------- ------ - 0.997 0.447 + 0.997 0.452 - - score_schema: {'type': 'object', 'title': 'Scikit learn-based classifier score with probability', 'properties': {'prediction': {'type': 'bool', 'description': 'The most likely label predicted by the estimator'}, 'probability': {'type': 'object', 'description': 'A mapping of probabilities onto each of the potential output labels', 'properties': {'false': 'number', 'true': 'number'}}}} + - score_schema: {'properties': {'probability': {'properties': {'false': 'number', 'true': 'number'}, 'type': 'object', 'description': 'A mapping of probabilities onto each of the potential output labels'}, 'prediction': {'type': 'bool', 'description': 'The most likely label predicted by the estimator'}}, 'type': 'object', 'title': 'Scikit learn-based classifier score with probability'} diff --git a/model_info/enwiki.goodfaith.md b/model_info/enwiki.goodfaith.md index ee687f7..90ee37c 100644 --- a/model_info/enwiki.goodfaith.md +++ b/model_info/enwiki.goodfaith.md @@ -1,81 +1,81 @@ Model Information: - type: GradientBoosting - version: 0.4.0 - - params: {'scale': True, 'criterion': 'friedman_mse', 'max_depth': 7, 'n_estimators': 700, 'random_state': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'init': None, 'learning_rate': 0.01, 'verbose': 0, 'min_impurity_decrease': 0.0, 'max_leaf_nodes': None, 'center': True, 'label_weights': OrderedDict([(False, 10)]), 'max_features': 'log2', 'presort': 'auto', 'labels': [True, False], 'warm_start': False, 'subsample': 1.0, 'population_rates': None, 'min_weight_fraction_leaf': 0.0, 'min_impurity_split': None, 'multilabel': False, 'loss': 'deviance'} + - params: {'center': True, 'n_estimators': 700, 'min_weight_fraction_leaf': 0.0, 'presort': 'auto', 'learning_rate': 0.01, 'scale': True, 'population_rates': None, 'max_depth': 7, 'min_impurity_decrease': 0.0, 'loss': 'deviance', 'max_features': 'log2', 'max_leaf_nodes': None, 'random_state': None, 'init': None, 'warm_start': False, 'min_impurity_split': None, 'multilabel': False, 'criterion': 'friedman_mse', 'min_samples_split': 2, 'min_samples_leaf': 1, 'verbose': 0, 'labels': [True, False], 'subsample': 1.0, 'label_weights': OrderedDict([(False, 10)])} Environment: - - revscoring_version: '2.2.2' + - revscoring_version: '2.2.5' - platform: 'Linux-4.9.0-6-amd64-x86_64-with-debian-9.4' - machine: 'x86_64' - - version: '#1 SMP Debian 4.9.82-1+deb9u3 (2018-03-02)' + - version: '#1 SMP Debian 4.9.88-1+deb9u1 (2018-05-07)' - system: 'Linux' - processor: '' - python_build: ('default', 'Jan 19 2017 14:11:04') - python_compiler: 'GCC 6.3.0 20170118' - python_branch: '' - python_implementation: 'CPython' - python_revision: '' - python_version: '3.5.3' - release: '4.9.0-6-amd64' Statistics: - counts (n=19455): + counts (n=19436): label n ~True ~False ------- ----- --- ------- -------- - True 18945 --> 18624 321 - False 510 --> 259 251 + True 18926 --> 18590 336 + False 510 --> 256 254 rates: True False ---------- ------ ------- sample 0.974 0.026 population 0.967 0.033 - match_rate (micro=0.937, macro=0.5): + match_rate (micro=0.936, macro=0.5): False True ------- ------ - 0.033 0.967 - filter_rate (micro=0.063, macro=0.5): + 0.034 0.966 + filter_rate (micro=0.064, macro=0.5): False True ------- ------ - 0.967 0.033 - recall (micro=0.967, macro=0.738): + 0.966 0.034 + recall (micro=0.966, macro=0.74): False True ------- ------ - 0.492 0.983 - !recall (micro=0.508, macro=0.738): + 0.498 0.982 + !recall (micro=0.514, macro=0.74): False True ------- ------ - 0.983 0.492 - precision (micro=0.967, macro=0.74): + 0.982 0.498 + precision (micro=0.967, macro=0.735): False True ------- ------ - 0.496 0.983 - !precision (micro=0.512, macro=0.74): + 0.488 0.983 + !precision (micro=0.504, macro=0.735): False True ------- ------ - 0.983 0.496 - f1 (micro=0.967, macro=0.739): + 0.983 0.488 + f1 (micro=0.967, macro=0.738): False True ------- ------ - 0.494 0.983 - !f1 (micro=0.51, macro=0.739): + 0.493 0.983 + !f1 (micro=0.509, macro=0.738): False True ------- ------ - 0.983 0.494 - accuracy (micro=0.967, macro=0.967): + 0.983 0.493 + accuracy (micro=0.966, macro=0.966): False True ------- ------ - 0.967 0.967 - fpr (micro=0.492, macro=0.262): + 0.966 0.966 + fpr (micro=0.486, macro=0.26): False True ------- ------ - 0.017 0.508 - roc_auc (micro=0.925, macro=0.924): + 0.018 0.502 + roc_auc (micro=0.927, macro=0.926): False True ------- ------ - 0.924 0.925 - pr_auc (micro=0.979, macro=0.735): + 0.925 0.927 + pr_auc (micro=0.98, macro=0.737): False True ------- ------ - 0.473 0.997 + 0.477 0.997 - - score_schema: {'type': 'object', 'properties': {'prediction': {'type': 'bool', 'description': 'The most likely label predicted by the estimator'}, 'probability': {'type': 'object', 'description': 'A mapping of probabilities onto each of the potential output labels', 'properties': {'false': 'number', 'true': 'number'}}}, 'title': 'Scikit learn-based classifier score with probability'} + - score_schema: {'properties': {'prediction': {'description': 'The most likely label predicted by the estimator', 'type': 'bool'}, 'probability': {'properties': {'true': 'number', 'false': 'number'}, 'description': 'A mapping of probabilities onto each of the potential output labels', 'type': 'object'}}, 'type': 'object', 'title': 'Scikit learn-based classifier score with probability'} diff --git a/model_info/enwiktionary.reverted.md b/model_info/enwiktionary.reverted.md index 39e9850..77ccccf 100644 --- a/model_info/enwiktionary.reverted.md +++ b/model_info/enwiktionary.reverted.md @@ -1,81 +1,81 @@ Model Information: - type: RandomForest - version: 0.4.0 - - params: {'label_weights': OrderedDict([(True, 10)]), 'verbose': 0, 'random_state': None, 'min_weight_fraction_leaf': 0.0, 'multilabel': False, 'min_samples_split': 2, 'max_depth': None, 'n_estimators': 320, 'class_weight': None, 'min_impurity_split': None, 'population_rates': None, 'bootstrap': True, 'oob_score': False, 'warm_start': False, 'min_impurity_decrease': 0.0, 'n_jobs': 1, 'scale': True, 'max_features': 'log2', 'min_samples_leaf': 3, 'criterion': 'entropy', 'labels': [True, False], 'max_leaf_nodes': None, 'center': True} + - params: {'center': True, 'labels': [True, False], 'max_leaf_nodes': None, 'random_state': None, 'multilabel': False, 'scale': True, 'population_rates': None, 'bootstrap': True, 'max_features': 'log2', 'n_estimators': 320, 'max_depth': None, 'label_weights': OrderedDict([(True, 10)]), 'criterion': 'entropy', 'min_impurity_split': None, 'min_weight_fraction_leaf': 0.0, 'min_samples_leaf': 3, 'n_jobs': 1, 'min_impurity_decrease': 0.0, 'min_samples_split': 2, 'oob_score': False, 'warm_start': False, 'class_weight': None, 'verbose': 0} Environment: - - revscoring_version: '2.2.2' + - revscoring_version: '2.2.5' - platform: 'Linux-4.9.0-6-amd64-x86_64-with-debian-9.4' - machine: 'x86_64' - - version: '#1 SMP Debian 4.9.82-1+deb9u3 (2018-03-02)' + - version: '#1 SMP Debian 4.9.88-1+deb9u1 (2018-05-07)' - system: 'Linux' - processor: '' - python_build: ('default', 'Jan 19 2017 14:11:04') - python_compiler: 'GCC 6.3.0 20170118' - python_branch: '' - python_implementation: 'CPython' - python_revision: '' - python_version: '3.5.3' - release: '4.9.0-6-amd64' Statistics: - counts (n=91875): + counts (n=91839): label n ~True ~False ------- ----- --- ------- -------- - True 388 --> 14 374 - False 91487 --> 2 91485 + True 387 --> 14 373 + False 91452 --> 2 91450 rates: True False ---------- ------ ------- sample 0.004 0.996 population 0.005 0.995 match_rate (micro=0.995, macro=0.5): False True ------- ------ 1 0 filter_rate (micro=0.005, macro=0.5): False True ------- ------ 0 1 recall (micro=0.995, macro=0.518): False True ------- ------ 1 0.036 !recall (micro=0.041, macro=0.518): False True ------- ------ 0.036 1 precision (micro=0.995, macro=0.942): False True ------- ------ 0.995 0.888 - !precision (micro=0.888, macro=0.942): + !precision (micro=0.889, macro=0.942): False True ------- ------ 0.888 0.995 f1 (micro=0.993, macro=0.534): False True ------- ------ - 0.998 0.069 + 0.998 0.07 !f1 (micro=0.074, macro=0.534): False True ------- ------ - 0.069 0.998 + 0.07 0.998 accuracy (micro=0.995, macro=0.995): False True ------- ------ 0.995 0.995 fpr (micro=0.959, macro=0.482): False True ------- ------ 0.964 0 - roc_auc (micro=0.981, macro=0.977): + roc_auc (micro=0.983, macro=0.98): False True ------- ------ - 0.981 0.973 - pr_auc (micro=0.997, macro=0.701): + 0.983 0.977 + pr_auc (micro=0.997, macro=0.705): False True ------- ------ - 1 0.402 + 1 0.411 - - score_schema: {'type': 'object', 'properties': {'probability': {'type': 'object', 'properties': {'true': 'number', 'false': 'number'}, 'description': 'A mapping of probabilities onto each of the potential output labels'}, 'prediction': {'type': 'bool', 'description': 'The most likely label predicted by the estimator'}}, 'title': 'Scikit learn-based classifier score with probability'} + - score_schema: {'properties': {'probability': {'description': 'A mapping of probabilities onto each of the potential output labels', 'properties': {'false': 'number', 'true': 'number'}, 'type': 'object'}, 'prediction': {'description': 'The most likely label predicted by the estimator', 'type': 'bool'}}, 'title': 'Scikit learn-based classifier score with probability', 'type': 'object'} diff --git a/models/enwiki.damaging.gradient_boosting.model b/models/enwiki.damaging.gradient_boosting.model index 3794733..8eed65b 100644 --- a/models/enwiki.damaging.gradient_boosting.model +++ b/models/enwiki.damaging.gradient_boosting.model @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d7c82d65e4149a3f7f58cfe19dbd7729b1b63284883124c0fa463e37d4d563c -size 10411075 +oid sha256:c34c0a0cbdb866e4c9a231b67e95c00091f4be73fa142cf6ec0bf45950c9576c +size 10367064 diff --git a/models/enwiki.goodfaith.gradient_boosting.model b/models/enwiki.goodfaith.gradient_boosting.model index 1b69c81..a37a915 100644 --- a/models/enwiki.goodfaith.gradient_boosting.model +++ b/models/enwiki.goodfaith.gradient_boosting.model @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:53d3495f8218fcf828dfd76e4297b14eab23359bcec25712d80cba95cac5b165 -size 10246607 +oid sha256:958286e3873176962816c0cdee454dd1691a20453f70d98600c25d6c9b5ba3ae +size 10266435 diff --git a/models/enwiktionary.reverted.rf.model b/models/enwiktionary.reverted.rf.model index f578fcd..aaaf2a6 100644 --- a/models/enwiktionary.reverted.rf.model +++ b/models/enwiktionary.reverted.rf.model @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eac56c0d5709c500e2c474fde5541a4e6c535c7e65fb034f8dc0cec2bb79e570 -size 13525429 +oid sha256:154fea88dccec68f4fce40eb81aba3c2f6e29391229d14d644cd751b6c90f4e8 +size 13508552 diff --git a/requirements.txt b/requirements.txt index a4ec755..1e9b077 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,12 @@ deep_merge >= 0.0.2 docopt==0.6.2 jinja2 mwapi >= 0.5.1, < 0.5.999 mwtypes >= 0.3.0, < 0.3.999 mwreverts >= 0.1.0, < 0.1.999 -revscoring >= 2.2.0, < 2.2.999 +revscoring >= 2.2.5, < 2.2.999 statistics >= 1.0.3, < 1.0.999 tqdm >= 4.15.0, < 4.15.999 mysqltsv >= 0.0.7, < 0.0.999 para==0.0.5 pytest >= 3.2.3, < 3.2.999