{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "judicial-clearing", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 6, "id": "defensive-liver", "metadata": {}, "outputs": [], "source": [ "data = pd.read_csv('MediaSearch_20210127.tsv',sep='\\t',header=None)" ] }, { "cell_type": "code", "execution_count": 8, "id": "brave-cylinder", "metadata": {}, "outputs": [], "source": [ "data[1] = data[1].apply(lambda x: int(x[x.find(':')+1:]))\n", "data.iloc[:,2:17] = data.iloc[:,2:17].applymap(lambda x: float(x[x.find(':')+1:]))" ] }, { "cell_type": "code", "execution_count": 70, "id": "unable-shore", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789101112131415161718
7431-180.0000000.000000.0000000.0000000.0000000.0000000.00.00.0000000.017.07251018.3499980.0000000.0000000.0# Patterson's General Store Ledger - DPLA - 00...Miss James
7432-180.0000000.000000.0000000.0000000.0000000.0000000.00.00.0000000.017.07251018.3499980.0000000.0000000.0# Patterson's General Store Ledger - DPLA - 00...Miss James
7433-180.0000000.000005.5876924.94994310.36335910.3633590.00.07.2919620.011.98600012.2896140.0000000.0000000.0# James Meredith's Letter to the Registrar, Un...Miss James
74341815.92740416.8835116.05894915.29192012.41814212.4181420.00.022.1271020.019.74137318.95845417.16188817.9417860.0# Miss James by Camille Silvy (cropped).jpgMiss James
7435080.0000000.0000015.97781416.0528900.0000000.0000000.00.022.9878900.017.30608417.8252700.0000000.0000000.0# Hindhead Tunnel Miss James Bridge.JPGMiss James
7436-180.0000000.000000.0000000.0000000.0000000.0000000.00.00.0000000.019.74619920.0567360.0000000.0000000.0# Herstmonceux Place - geograph.org.uk - 15863...Miss James
7437-1817.79538316.7528015.97652516.07052014.96597714.9659770.00.023.0228420.017.44126515.37114414.03435014.1177580.0# Miss James' Walk NT sign.jpgMiss James
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 \\\n", "7431 -1 8 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 \n", "7432 -1 8 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 \n", "7433 -1 8 0.000000 0.00000 5.587692 4.949943 10.363359 10.363359 \n", "7434 1 8 15.927404 16.88351 16.058949 15.291920 12.418142 12.418142 \n", "7435 0 8 0.000000 0.00000 15.977814 16.052890 0.000000 0.000000 \n", "7436 -1 8 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 \n", "7437 -1 8 17.795383 16.75280 15.976525 16.070520 14.965977 14.965977 \n", "\n", " 8 9 10 11 12 13 14 15 \\\n", "7431 0.0 0.0 0.000000 0.0 17.072510 18.349998 0.000000 0.000000 \n", "7432 0.0 0.0 0.000000 0.0 17.072510 18.349998 0.000000 0.000000 \n", "7433 0.0 0.0 7.291962 0.0 11.986000 12.289614 0.000000 0.000000 \n", "7434 0.0 0.0 22.127102 0.0 19.741373 18.958454 17.161888 17.941786 \n", "7435 0.0 0.0 22.987890 0.0 17.306084 17.825270 0.000000 0.000000 \n", "7436 0.0 0.0 0.000000 0.0 19.746199 20.056736 0.000000 0.000000 \n", "7437 0.0 0.0 23.022842 0.0 17.441265 15.371144 14.034350 14.117758 \n", "\n", " 16 17 18 \n", "7431 0.0 # Patterson's General Store Ledger - DPLA - 00... Miss James \n", "7432 0.0 # Patterson's General Store Ledger - DPLA - 00... Miss James \n", "7433 0.0 # James Meredith's Letter to the Registrar, Un... Miss James \n", "7434 0.0 # Miss James by Camille Silvy (cropped).jpg Miss James \n", "7435 0.0 # Hindhead Tunnel Miss James Bridge.JPG Miss James \n", "7436 0.0 # Herstmonceux Place - geograph.org.uk - 15863... Miss James \n", "7437 0.0 # Miss James' Walk NT sign.jpg Miss James " ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[data[1] == 8] # select qid" ] }, { "cell_type": "code", "execution_count": 11, "id": "recreational-leonard", "metadata": {}, "outputs": [], "source": [ "feature = data.iloc[:,2:17].copy()\n", "feature.columns = [str(x) for x in range(1, 16)]" ] }, { "cell_type": "code", "execution_count": 54, "id": "martial-luxembourg", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
123456789101112131415
count8783.0000008783.0000008783.0000008783.0000008783.0000008783.0000008783.0000008783.0000008783.0000008783.08783.0000008783.0000008783.0000008783.0000008783.000000
mean1.8731181.9196549.7290969.3153738.1790328.1790321.9608761.88269711.6086550.013.28179512.3981023.9871703.7973451.606014
std4.6562794.6514938.0467138.1894608.1417908.1417904.9050014.86231511.1701360.08.8568938.9785997.2098127.1094954.235872
min0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00.0000000.0000000.0000000.0000000.000000
25%0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.07.3604866.2308770.0000000.0000000.000000
50%0.0000000.0000009.8896089.5371167.4365587.4365580.0000000.00000011.4901620.012.59385711.6312140.0000000.0000000.000000
75%0.0000000.00000014.97032014.61392513.45965713.4596570.0000000.00000017.8130280.018.69549517.7508537.3269076.3764960.000000
max31.27809730.21830243.35006744.52718447.12815547.12815530.32835030.14207873.6894000.054.40340055.52229347.79889045.36058814.294704
\n", "
" ], "text/plain": [ " 1 2 3 4 5 \\\n", "count 8783.000000 8783.000000 8783.000000 8783.000000 8783.000000 \n", "mean 1.873118 1.919654 9.729096 9.315373 8.179032 \n", "std 4.656279 4.651493 8.046713 8.189460 8.141790 \n", "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "25% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "50% 0.000000 0.000000 9.889608 9.537116 7.436558 \n", "75% 0.000000 0.000000 14.970320 14.613925 13.459657 \n", "max 31.278097 30.218302 43.350067 44.527184 47.128155 \n", "\n", " 6 7 8 9 10 \\\n", "count 8783.000000 8783.000000 8783.000000 8783.000000 8783.0 \n", "mean 8.179032 1.960876 1.882697 11.608655 0.0 \n", "std 8.141790 4.905001 4.862315 11.170136 0.0 \n", "min 0.000000 0.000000 0.000000 0.000000 0.0 \n", "25% 0.000000 0.000000 0.000000 0.000000 0.0 \n", "50% 7.436558 0.000000 0.000000 11.490162 0.0 \n", "75% 13.459657 0.000000 0.000000 17.813028 0.0 \n", "max 47.128155 30.328350 30.142078 73.689400 0.0 \n", "\n", " 11 12 13 14 15 \n", "count 8783.000000 8783.000000 8783.000000 8783.000000 8783.000000 \n", "mean 13.281795 12.398102 3.987170 3.797345 1.606014 \n", "std 8.856893 8.978599 7.209812 7.109495 4.235872 \n", "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "25% 7.360486 6.230877 0.000000 0.000000 0.000000 \n", "50% 12.593857 11.631214 0.000000 0.000000 0.000000 \n", "75% 18.695495 17.750853 7.326907 6.376496 0.000000 \n", "max 54.403400 55.522293 47.798890 45.360588 14.294704 " ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "feature.describe()" ] }, { "cell_type": "code", "execution_count": 101, "id": "different-british", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
123456789101112131415
021.59961020.68296616.61670016.7380700.0000000.0000000.0000000.00000023.0662380.018.57168616.56506714.09811014.8341250.000000
10.0000000.00000017.82968117.9550600.0000000.0000000.0000000.00000023.2442550.020.54407519.7959020.0000000.0000000.000000
20.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00.0000000.0000000.0000000.00000013.733603
30.0000000.00000013.58403013.6913939.7828379.7828370.0000000.00000021.3102150.017.36810117.4810640.0000000.00000014.262909
40.0000000.00000014.19413014.26631410.03650910.03650914.34054214.65528723.0263000.012.06278012.2862950.0000000.0000000.000000
................................................
87780.0000000.0000000.0000000.0000005.4721885.4721880.0000000.0000000.0000000.010.71478710.6219510.0000000.00000011.901220
87790.0000000.0000008.8625788.5129569.2095099.2095098.9340179.08581913.4066170.011.30706511.2444680.0000000.0000000.000000
87800.0000000.0000000.0000000.0000007.2522797.2522790.0000000.0000000.0000000.010.1667059.7031000.0000000.0000000.000000
878111.45362110.80798212.64771512.3279560.0000000.0000000.0000000.00000014.8472950.011.63413911.38257513.58971213.6593420.000000
87820.0000000.0000000.0000000.0000008.4493778.4493770.0000000.0000000.0000000.010.39974210.2227920.0000000.0000000.000000
\n", "

8783 rows × 15 columns

\n", "
" ], "text/plain": [ " 1 2 3 4 5 6 \\\n", "0 21.599610 20.682966 16.616700 16.738070 0.000000 0.000000 \n", "1 0.000000 0.000000 17.829681 17.955060 0.000000 0.000000 \n", "2 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "3 0.000000 0.000000 13.584030 13.691393 9.782837 9.782837 \n", "4 0.000000 0.000000 14.194130 14.266314 10.036509 10.036509 \n", "... ... ... ... ... ... ... \n", "8778 0.000000 0.000000 0.000000 0.000000 5.472188 5.472188 \n", "8779 0.000000 0.000000 8.862578 8.512956 9.209509 9.209509 \n", "8780 0.000000 0.000000 0.000000 0.000000 7.252279 7.252279 \n", "8781 11.453621 10.807982 12.647715 12.327956 0.000000 0.000000 \n", "8782 0.000000 0.000000 0.000000 0.000000 8.449377 8.449377 \n", "\n", " 7 8 9 10 11 12 13 \\\n", "0 0.000000 0.000000 23.066238 0.0 18.571686 16.565067 14.098110 \n", "1 0.000000 0.000000 23.244255 0.0 20.544075 19.795902 0.000000 \n", "2 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 \n", "3 0.000000 0.000000 21.310215 0.0 17.368101 17.481064 0.000000 \n", "4 14.340542 14.655287 23.026300 0.0 12.062780 12.286295 0.000000 \n", "... ... ... ... ... ... ... ... \n", "8778 0.000000 0.000000 0.000000 0.0 10.714787 10.621951 0.000000 \n", "8779 8.934017 9.085819 13.406617 0.0 11.307065 11.244468 0.000000 \n", "8780 0.000000 0.000000 0.000000 0.0 10.166705 9.703100 0.000000 \n", "8781 0.000000 0.000000 14.847295 0.0 11.634139 11.382575 13.589712 \n", "8782 0.000000 0.000000 0.000000 0.0 10.399742 10.222792 0.000000 \n", "\n", " 14 15 \n", "0 14.834125 0.000000 \n", "1 0.000000 0.000000 \n", "2 0.000000 13.733603 \n", "3 0.000000 14.262909 \n", "4 0.000000 0.000000 \n", "... ... ... \n", "8778 0.000000 11.901220 \n", "8779 0.000000 0.000000 \n", "8780 0.000000 0.000000 \n", "8781 13.659342 0.000000 \n", "8782 0.000000 0.000000 \n", "\n", "[8783 rows x 15 columns]" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "feature" ] }, { "cell_type": "code", "execution_count": 12, "id": "electronic-provider", "metadata": {}, "outputs": [], "source": [ "rating = data[0].copy()" ] }, { "cell_type": "code", "execution_count": 16, "id": "chicken-munich", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEGCAYAAAB1iW6ZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAcFklEQVR4nO3de3RU9b338ffXgHIJRrw0KqFcPB4r5RJCBJSDQqmKl2Kp9DlSVOJTZNk+iEepz6IPLquULm9V8XbwcqxSa0XkKIcqCtUSFaseLiYg4AUBlYigeONSVML3+WM2OUmYkEmyN2R+fF5rZTmzZ89nfm72fLKzZ+895u6IiEj2O2h/D0BEROKhQhcRCYQKXUQkECp0EZFAqNBFRALRYn+98JFHHumdO3fOaN5t27bRtm3bRMaRVHa25SaZrdzks7MtN8nsbMttaPaSJUs+dfej0j7o7vvlp0+fPp6pBQsWZDxvQyWVnW25SWYrN/nsbMtNMjvbchuaDSz2OnpVu1xERAKhQhcRCYQKXUQkEPvtQ9F0vv32W9avX8+OHTtqTM/Ly2PVqlWJvGZS2dmWm2R2krlr166loKCAli1bxp4vkm2aVaGvX7+edu3a0blzZ8ysavqWLVto165dIq+ZVHa25SaZnVTuV199xTfffMP69evp0qVL7Pki2aZZ7XLZsWMHRxxxRI0yF6mLmXHEEUfs8RedyIGqWRU6oDKXBtH6IvI/ml2hi4hI4zSrfei1dZ74TKx56248J7asqVOnMnbsWNq0adPkrLfeeosLLrgAM2PWrFkcd9xxMYxQRA40zbrQ96eqM68OSv9HzNSpU7nwwgtjKfTZs2czYsQIrrnmmiZnJaGyspKcnJx9+po7d+6kRQutnvtSug2oCT12UpJmepwbRxIf7XKpZt26dZxwwglcfPHFdO/enQ8//JBf/OIXFBcX8/3vf5/f/OY3ANx555189NFHDB48mMGDBwMwf/58Tj75ZIqKivjpT3/K1q1b98gvKyujf//+9OzZk+HDh/P5558zd+5cpk6dyrRp06qyqsvNzWXSpEn06tWL/v37s2nTpqqx/uAHP6Bnz54MGTKEDz74AICSkhLGjx/PKaecQteuXZk1axYA1157LYWFhRQWFtKhQwcuueQSAP70pz/Rt29fCgsLueKKK6isrKx63QkTJtCrVy9effVVbrvtNrp370737t2ZOnXqHuOsrKykpKSE7t2706NHD26//XYAVq9ezbBhw+jVqxdFRUW89957uDtXX3111byPP/44AKWlpQwcOJBhw4bRrVs3KisrufrqqznppJPo2bMn9913X6P/bUUOBCr0Wt59911++ctfsmLFCjp16sTvfvc7Fi9ezLJly3jxxRdZtmwZ48eP59hjj2XBggUsWLCATz/9lClTpvD888+zdOlSiouLufvuu/fIvvjii7nppptYtmwZPXr04Prrr+fss8/msssu48orr2TBggV7PGfbtm3079+f8vJyTj31VB5++GEALr/8ckaPHs2yZcsYNWoU48ePr3rOhg0bWLhwIU8//TQTJ04EYPLkyZSVlVFaWsrhhx/OuHHjWLVqFY8//jivvPIKZWVlHHTQQTz66KNVr9uvXz/Ky8tp3bo1Dz30EK+//jqvvfYaDzzwAG+88UaNcZaVlVFRUcGbb77J8uXLq35hjBo1iksvvZTy8nL+/ve/c8wxx/Dkk09SVlZGeXk5zz//PFdffTUbNmwAYOnSpdxxxx288847PPjgg+Tl5bFo0SIWLVrEAw88wNq1a5v+jywSKBV6LZ06daJ///5V92fOnElRURG9e/dmxYoVrFy5co/nvPbaa6xcuZIBAwZQWFjI9OnT+fDDD2vM8+WXX/LFF19w2mmnATB69Gheeumlesdz8MEHc+655wLQp0+fqi3xV199lZ/97GcAXHTRRSxcuLDqOT/+8Y856KCD6NatGxs3bqya7u5ceOGFXHXVVfTp04cXXniBJUuWcNJJJ1FYWMiLL77ImjVrAMjJyeH8888HYOHChQwfPpy2bduSm5vLT37yE15++eUa4+zatStr1qzh8ssv57nnnuPQQw9ly5YtVFRU8KMf/QiAVq1a0aZNGxYuXMjIkSPJyckhPz+f0047jUWLFgHQt2/fqmPK58+fzx//+EcKCwvp168fmzdv5t133613mYkcqLSTspbql7Bcu3Ytv//971m0aBHt27enpKQk7THP7s7pp5/OY489VjVty5YtsYynZcuWVYfm5eTksHPnznqfc8ghh9QY227XXXcdBQUFVVvP7s7o0aO54YYbqsa8+wSgVq1aNWi/efv27SkvL2fevHnce++9zJw5kzvuuCPj5+9Wffm7O3fddRdnnnlmg3NEDkTaQt+Lr776irZt25KXl8fGjRt59tlnqx5r165dVWn379+fV155hdWrVwOp3RW1tyTz8vJo37591ZbtI488UrW13hinnHIKM2bMAODRRx9l4MCBe53/L3/5C88//zx33nln1bQhQ4Ywa9asqv3yn332Ge+///4ezx04cCCzZ89m+/btbNu2jaeeemqP1/v000/ZtWsX559/PlOmTGHp0qW0a9eOgoICnn76aQC+/vprtm/fzsCBA3n88ceprKzkk08+4aWXXqJv3757vO6ZZ57JtGnT+PbbbwF455132LZtWwOWksiBpVlvoe/+JD3J0933plevXvTu3Zvvfe97dOzYkQEDBlQ9NnbsWIYOHVq1L/3hhx9m5MiRfP311wBMmjSJoqKiGnnTp0/nsssuY/v27XTt2pWHHnqo0WO76667uOSSS7jllls46qij6s267bbbqKioqCrOYcOGMXnyZKZMmcIZZ5zBrl27yMnJYdq0aXTq1KnGc4uKiigpKal67pgxY+jdu3eNeSoqKrjkkkvYtWsXQNVW/yOPPMKYMWO44YYbaNmyJU888QTDhw/n1VdfpVevXpgZN998M0cffTRvvfVWjcwxY8awbt06ioqKcHeOOuooZs+e3ehlJhI6q/4n+b5UXFzsixcvrjFt1apVnHjiiXvMq+uXJJ+bZHbSuXWtN41VWlrKoEGDYsvbF9lx5NZ12OKty/fc7ovjsMXmvCz2ZW5Ds81sibsXp3tMu1xERALRrHe5iEj2q+uM73QnLemEpaZpdlvo+2sXkGQnrS8i/6NZFXqrVq3YvHmz3qSSEXdn8+bNtGrVan8PRaRZaFa7XAoKCli/fj2ffPJJjek7duxI7E2bVHa25SaZnWTuYYcdRkFBQezZItmoWRV6y5Yt037zTGlp6R6HycUlqexsy00yO9tyRbJVs9rlIiIijZdRoZvZUDN728xWm9nENI9/18wWmNkbZrbMzM6Of6giIrI39Ra6meUA9wBnAd2AkWbWrdZs1wAz3b03cAHw73EPVERE9i6TLfS+wGp3X+Pu3wAzgPNqzePAodHtPOCj+IYoIiKZqPfUfzMbAQx19zHR/YuAfu4+rto8xwDzgfZAW+CH7r4kTdZYYCxAfn5+n90Xl6rP1q1byc3NzWjehkoqO9tyk8xWbvLZceQur/hyj2n5rWHjP/act0eHvCbl1pXdkNy6NOdlHEf24MGD6zz1P66jXEYCD7v7rWZ2MvCImXV3913VZ3L3+4H7IXUtl0yvXdBcrqEQcm6S2cpNPjuO3HRfNVfntVxGZf5a6XLrym5Ibl2a8zJOOjuTXS4VQMdq9wuiadX9HJgJ4O6vAq2AI5s8OhERyVgmhb4ION7MupjZwaQ+9JxTa54PgCEAZnYiqUL/BBER2WfqLXR33wmMA+YBq0gdzbLCzCab2bBotgnApWZWDjwGlLjO3xcR2acy2ofu7nOBubWmXVvt9kpgQO3niYjIvqMzRUVEAqFCFxEJhApdRCQQKnQRkUCo0EVEAqFCFxEJhApdRCQQKnQRkUCo0EVEAqFCFxEJhApdRCQQKnQRkUCo0EVEAqFCFxEJhApdRCQQKnQRkUCo0EVEAqFCFxEJREZfQSd16zzxmbTTJ/TYSUmtx9bdeM6+GJKIHKC0hS4iEggVuohIIFToIiKBUKGLiARChS4iEggVuohIIFToIiKB0HHozVi6Y9zTHd8OOsZdRLSFLiISDBW6iEggVOgiIoFQoYuIBEKFLiISCBW6iEggdNjiASipS/7qUsIi+5cKXSQB+uUm+4N2uYiIBEKFLiISCBW6iEggMip0MxtqZm+b2Wozm1jHPP/LzFaa2Qoz+3O8wxQRkfrU+6GomeUA9wCnA+uBRWY2x91XVpvneODXwAB3/9zMvpPUgEVEJL1MttD7AqvdfY27fwPMAM6rNc+lwD3u/jmAu2+Kd5giIlIfc/e9z2A2Ahjq7mOi+xcB/dx9XLV5ZgPvAAOAHOA6d38uTdZYYCxAfn5+nxkzZmQ0yK1bt5Kbm5vRvA3V1OzlFV+mnZ7fGjb+o+a0Hh3ympydLreh2UmNOcllkU5S60Ucudm4LLJtfatLc14v4sgePHjwEncvTvdYXMehtwCOBwYBBcBLZtbD3b+oPpO73w/cD1BcXOyDBg3KKLy0tJRM522opmanuzY5pI43vnV5zcW7blTDXidddrrchmYnNeYkl0U6Sa0XceRm47LItvWtLs15vUg6O5NdLhVAx2r3C6Jp1a0H5rj7t+6+ltTW+vFNHp2IiGQsk0JfBBxvZl3M7GDgAmBOrXlmk9o6x8yOBP4ZWBPfMEVEpD71Frq77wTGAfOAVcBMd19hZpPNbFg02zxgs5mtBBYAV7v75qQGLSIie8poH7q7zwXm1pp2bbXbDlwV/YiIyH6gM0VFRAKhQhcRCYQKXUQkECp0EZFAqNBFRAKhQhcRCYQKXUQkECp0EZFAqNBFRAKhQhcRCYQKXUQkECp0EZFAqNBFRAKhQhcRCYQKXUQkECp0EZFAqNBFRAKhQhcRCYQKXUQkECp0EZFAqNBFRAKhQhcRCYQKXUQkECp0EZFAqNBFRAKhQhcRCYQKXUQkECp0EZFAqNBFRAKhQhcRCYQKXUQkECp0EZFAqNBFRAKhQhcRCYQKXUQkECp0EZFAqNBFRAKhQhcRCURGhW5mQ83sbTNbbWYT9zLf+WbmZlYc3xBFRCQT9Ra6meUA9wBnAd2AkWbWLc187YArgNfjHqSIiNQvky30vsBqd1/j7t8AM4Dz0sz3W+AmYEeM4xMRkQxlUugdgA+r3V8fTatiZkVAR3d/JsaxiYhIA5i7730GsxHAUHcfE92/COjn7uOi+wcBfwNK3H2dmZUCv3L3xWmyxgJjAfLz8/vMmDEjo0Fu3bqV3NzcjP+nGqKp2csrvkw7Pb81bPxHzWk9OuQ1OTtdbkOzkxpzkssinaTWizhys3FZZNv6VpfmvF7EkT148OAl7p72c8pMCv1k4Dp3PzO6/2sAd78hup8HvAdsjZ5yNPAZMCxdqe9WXFzsixfX+XANpaWlDBo0KKN5G6qp2Z0npv+jZEKPndy6vEWNaetuPKfJ2elyG5qd1JiTXBbpJLVexJGbjcsi29a3ujTn9SKObDOrs9Az2eWyCDjezLqY2cHABcCc3Q+6+5fufqS7d3b3zsBr1FPmIiISv3oL3d13AuOAecAqYKa7rzCzyWY2LOkBiohIZvb8WyoNd58LzK017do65h3U9GGJiEhD6UxREZFAqNBFRAKhQhcRCYQKXUQkECp0EZFAqNBFRAKhQhcRCYQKXUQkECp0EZFAqNBFRAKhQhcRCYQKXUQkECp0EZFAqNBFRAKhQhcRCYQKXUQkECp0EZFAqNBFRAKhQhcRCURG3ykqItLcdJ74TNrpE3rspCTNY+tuPCfpIe13KnQRkVrS/bLIhl8U2uUiIhIIFbqISCBU6CIigVChi4gEQoUuIhIIFbqISCBU6CIigVChi4gEQoUuIhIIFbqISCBU6CIigVChi4gEQoUuIhIIFbqISCBU6CIigVChi4gEQoUuIhIIFbqISCAyKnQzG2pmb5vZajObmObxq8xspZktM7MXzKxT/EMVEZG9qbfQzSwHuAc4C+gGjDSzbrVmewModveewCzg5rgHKiIie5fJFnpfYLW7r3H3b4AZwHnVZ3D3Be6+Pbr7GlAQ7zBFRKQ+5u57n8FsBDDU3cdE9y8C+rn7uDrmvxv42N2npHlsLDAWID8/v8+MGTMyGuTWrVvJzc3NaN6Gamr28oov007Pbw0b/1FzWo8OeU3OTpfb0OykxpzkskgnqfUijtxsXBYhr29xZMeRW5eG/PsNHjx4ibsXp3usRZNHUo2ZXQgUA6ele9zd7wfuByguLvZBgwZllFtaWkqm8zZUU7NLJj6TdvqEHju5dXnNxbtuVMNeJ112utyGZic15iSXRTpJrRdx5Gbjsgh5fYsjO47cusS1LmdS6BVAx2r3C6JpNZjZD4FJwGnu/nWTRyYiIg2SyT70RcDxZtbFzA4GLgDmVJ/BzHoD9wHD3H1T/MMUEZH61Fvo7r4TGAfMA1YBM919hZlNNrNh0Wy3ALnAE2ZWZmZz6ogTEZGEZLQP3d3nAnNrTbu22u0fxjwuERFpoFg/FG3OOu/lA5TaH4Csu/GcfTEkEZFYNbtCT1e86UoXVLwiItXpWi4iIoFodlvoIunoLzeR+mkLXUQkENpClwOatvwlJNpCFxEJhApdRCQQKnQRkUCo0EVEAqFCFxEJhI5yERHZR5K+BIm20EVEAqFCFxEJhApdRCQQKnQRkUCo0EVEAqFCFxEJhApdRCQQKnQRkUCo0EVEAqFCFxEJhApdRCQQKnQRkUCo0EVEAqFCFxEJhApdRCQQKnQRkUCo0EVEAqFCFxEJhApdRCQQKnQRkUCo0EVEAqFCFxEJhApdRCQQKnQRkUCo0EVEAqFCFxEJREaFbmZDzextM1ttZhPTPH6ImT0ePf66mXWOfaQiIrJX9Ra6meUA9wBnAd2AkWbWrdZsPwc+d/d/Am4Hbop7oCIisneZbKH3BVa7+xp3/waYAZxXa57zgOnR7VnAEDOz+IYpIiL1MXff+wxmI4Ch7j4mun8R0M/dx1Wb581onvXR/feieT6tlTUWGBvdPQF4O8NxHgl8Wu9cjZNUdrblJpmt3OSzsy03yexsy21odid3PyrdAy3iG0/93P1+4P6GPs/MFrt7cQJDSiw723KTzFZu8tnZlptkdrblxpmdyS6XCqBjtfsF0bS085hZCyAP2NzUwYmISOYyKfRFwPFm1sXMDgYuAObUmmcOMDq6PQL4m9e3L0dERGJV7y4Xd99pZuOAeUAO8Ad3X2Fmk4HF7j4HeBB4xMxWA5+RKv04NXg3TTPIzrbcJLOVm3x2tuUmmZ1tubFl1/uhqIiIZAedKSoiEggVuohIIJp1oZvZH8xsU3Sce5y5Hc1sgZmtNLMVZnZFTLmtzOy/zaw8yr0+jtxar5FjZm+Y2dMxZq4zs+VmVmZmi2PMPczMZpnZW2a2ysxOjin3hGisu3++MrN/iyn7yujf7k0ze8zMWsWUe0WUuaKpY033vjCzw83sr2b2bvTf9jHl/jQa8y4za/RhdXVk3xKtG8vM7CkzOyym3N9GmWVmNt/Mjo0jt9pjE8zMzezImMZ7nZlVVFufz25obhV3b7Y/wKlAEfBmzLnHAEXR7XbAO0C3GHINyI1utwReB/rHPPargD8DT8eYuQ44MoF/v+nAmOj2wcBhCbxGDvAxqZMtmprVAVgLtI7uzwRKYsjtDrwJtCF1IMLzwD81IW+P9wVwMzAxuj0RuCmm3BNJnQRYChTHPOYzgBbR7ZtiHPOh1W6PB+6NIzea3pHUASLvN+Y9U8d4rwN+1dT1zN2b9xa6u79E6qiZuHM3uPvS6PYWYBWpN3NTc93dt0Z3W0Y/sX3qbGYFwDnAf8SVmRQzyyO18j4I4O7fuPsXCbzUEOA9d38/prwWQOvofIo2wEcxZJ4IvO7u2919J/Ai8JPGhtXxvqh++Y3pwI/jyHX3Ve6e6RndDc2eHy0PgNdIneMSR+5X1e62pRHvwb10z+3A/21MZj25sWjWhb4vRFeG7E1qazqOvBwzKwM2AX9191hyI1NJrUy7YsyE1Mo538yWRJdniEMX4BPgoWgX0X+YWduYsqu7AHgsjiB3rwB+D3wAbAC+dPf5MUS/CQw0syPMrA1wNjVP1otDvrtviG5/DOTHnJ+0/w08G1eYmf3OzD4ERgHXxpR5HlDh7uVx5NUyLtpN9IfG7C7b7YAudDPLBf4T+Ldav9Ubzd0r3b2Q1NZGXzPrHkeumZ0LbHL3JXHk1fIv7l5E6oqa/8fMTo0hswWpPy2nuXtvYBupXQGxiU50GwY8EVNee1Jbul2AY4G2ZnZhU3PdfRWpXQrzgeeAMqCyqbl7eT0nxr8Mk2Zmk4CdwKNxZbr7JHfvGGWOq2/++kS/iP8fMf1yqGUacBxQSGpD4tbGBh2whW5mLUmV+aPu/mTc+dHuhQXA0JgiBwDDzGwdqSte/sDM/hRHcLRlirtvAp4idYXNploPrK/2F8osUgUfp7OApe6+Maa8HwJr3f0Td/8WeBI4JY5gd3/Q3fu4+6nA56Q+t4nTRjM7BiD676aY8xNhZiXAucCo6BdR3B4Fzo8h5zhSv+jLo/dgAbDUzI5uarC7b4w2BHcBD9CE998BWehmZqT27a5y99tizD1q9yf1ZtYaOB14K45sd/+1uxe4e2dSuxn+5u5N3no0s7Zm1m73bVIfVDX5qCJ3/xj40MxOiCYNAVY2NbeWkcS0uyXyAdDfzNpE68gQUp+vNJmZfSf673dJ7T//cxy51VS//MZo4L9izo+dmQ0ltQtxmLtvjzH3+Gp3zyOG96C7L3f377h75+g9uJ7UgRUfNzV79y/iyHCa8v6L45PVpH5IvVk3AN+SWoA/jyn3X0j9SbqM1J+/ZcDZMeT2BN6Ict8Erk1ouQwipqNcgK5AefSzApgU4zgLgcXR8pgNtI8xuy2pC8DlxbxsrydVAG8CjwCHxJT7MqlfaOXAkCZm7fG+AI4AXgDeJXUUzeEx5Q6Pbn8NbATmxTjm1cCH1d6DjTkaJV3uf0b/fsuAvwAd4sit9fg6GneUS7rxPgIsj8Y7BzimseuGTv0XEQnEAbnLRUQkRCp0EZFAqNBFRAKhQhcRCYQKXUQkECp0CY6ZjY+u7tigMw/NrLOZ/SypcYkkTYUuIfolcLq7j2rg8zoDDS50M8tp6HNEkqBCl6CY2b2kTpZ61swmRRc7+u/oAmHnRfN0NrOXzWxp9LP79P4bSV1Eqyy6LnqJmd1dLftpMxsU3d5qZreaWTlwspldGL1OmZndp5KX/UGFLkFx98tIXfJ2MKmzSf/m7n2j+7dElzfYRGoLvgj4V+DO6OkTgZfdvdDdb6/npdqSuiRuL1JnrP4rMMBTF2arJHWVP5F9qsX+HoBIgs4gdUGzX0X3WwHfJVX4d5tZIany/edGZFeSOsUcUtd86QMsSl0ChtZkycWxJCwqdAmZAed7rS9oMLPrSF2XpBepv1J31PH8ndT8K7b619HtcPfdl8A1YLq7/zqOQYs0lna5SMjmAZdHV07EzHpH0/OADZ66XOlFpL7GDmALqa8k3G0dUGhmB5lZR+q+rOkLwIhqV1Q83Mw6xfp/IpIBFbqE7LekvgZwmZmtiO4D/DswOvpA83ukvnwDUle7q7TUl3xfCbxC6jtGV5Laz7403Yu4+0rgGlLf+rQM+Cup760V2ad0tUURkUBoC11EJBAqdBGRQKjQRUQCoUIXEQmECl1EJBAqdBGRQKjQRUQC8f8BgwnM14Siz+4AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df = pd.DataFrame({'feature':list(range(1,16)), 'rate of nonzero score':((feature > 0).sum(axis=0)/8783).values})\n", "ax = df.plot.bar(x='feature', y='rate of nonzero score', rot=0, grid=True)" ] }, { "cell_type": "code", "execution_count": 108, "id": "quick-watson", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
123456789101112131415
021.59961020.68296616.61670016.7380700.0000000.0000000.0000000.00000023.0662380.018.57168616.56506714.09811014.8341250.000000
10.0000000.00000017.82968117.9550600.0000000.0000000.0000000.00000023.2442550.020.54407519.7959020.0000000.0000000.000000
20.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00.0000000.0000000.0000000.00000013.733603
30.0000000.00000013.58403013.6913939.7828379.7828370.0000000.00000021.3102150.017.36810117.4810640.0000000.00000014.262909
40.0000000.00000014.19413014.26631410.03650910.03650914.34054214.65528723.0263000.012.06278012.2862950.0000000.0000000.000000
................................................
87780.0000000.0000000.0000000.0000005.4721885.4721880.0000000.0000000.0000000.010.71478710.6219510.0000000.00000011.901220
87790.0000000.0000008.8625788.5129569.2095099.2095098.9340179.08581913.4066170.011.30706511.2444680.0000000.0000000.000000
87800.0000000.0000000.0000000.0000007.2522797.2522790.0000000.0000000.0000000.010.1667059.7031000.0000000.0000000.000000
878111.45362110.80798212.64771512.3279560.0000000.0000000.0000000.00000014.8472950.011.63413911.38257513.58971213.6593420.000000
87820.0000000.0000000.0000000.0000008.4493778.4493770.0000000.0000000.0000000.010.39974210.2227920.0000000.0000000.000000
\n", "

8783 rows × 15 columns

\n", "
" ], "text/plain": [ " 1 2 3 4 5 6 \\\n", "0 21.599610 20.682966 16.616700 16.738070 0.000000 0.000000 \n", "1 0.000000 0.000000 17.829681 17.955060 0.000000 0.000000 \n", "2 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "3 0.000000 0.000000 13.584030 13.691393 9.782837 9.782837 \n", "4 0.000000 0.000000 14.194130 14.266314 10.036509 10.036509 \n", "... ... ... ... ... ... ... \n", "8778 0.000000 0.000000 0.000000 0.000000 5.472188 5.472188 \n", "8779 0.000000 0.000000 8.862578 8.512956 9.209509 9.209509 \n", "8780 0.000000 0.000000 0.000000 0.000000 7.252279 7.252279 \n", "8781 11.453621 10.807982 12.647715 12.327956 0.000000 0.000000 \n", "8782 0.000000 0.000000 0.000000 0.000000 8.449377 8.449377 \n", "\n", " 7 8 9 10 11 12 13 \\\n", "0 0.000000 0.000000 23.066238 0.0 18.571686 16.565067 14.098110 \n", "1 0.000000 0.000000 23.244255 0.0 20.544075 19.795902 0.000000 \n", "2 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.000000 \n", "3 0.000000 0.000000 21.310215 0.0 17.368101 17.481064 0.000000 \n", "4 14.340542 14.655287 23.026300 0.0 12.062780 12.286295 0.000000 \n", "... ... ... ... ... ... ... ... \n", "8778 0.000000 0.000000 0.000000 0.0 10.714787 10.621951 0.000000 \n", "8779 8.934017 9.085819 13.406617 0.0 11.307065 11.244468 0.000000 \n", "8780 0.000000 0.000000 0.000000 0.0 10.166705 9.703100 0.000000 \n", "8781 0.000000 0.000000 14.847295 0.0 11.634139 11.382575 13.589712 \n", "8782 0.000000 0.000000 0.000000 0.0 10.399742 10.222792 0.000000 \n", "\n", " 14 15 \n", "0 14.834125 0.000000 \n", "1 0.000000 0.000000 \n", "2 0.000000 13.733603 \n", "3 0.000000 14.262909 \n", "4 0.000000 0.000000 \n", "... ... ... \n", "8778 0.000000 11.901220 \n", "8779 0.000000 0.000000 \n", "8780 0.000000 0.000000 \n", "8781 13.659342 0.000000 \n", "8782 0.000000 0.000000 \n", "\n", "[8783 rows x 15 columns]" ] }, "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ "feature" ] }, { "cell_type": "code", "execution_count": 118, "id": "verbal-cleaning", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
123456789101112131415
021.5996120.68296616.61670016.7380700.0000000.0000000.0000000.00000023.0662380.018.57168616.56506714.0981114.8341250.000000
10.000000.00000017.82968117.9550600.0000000.0000000.0000000.00000023.2442550.020.54407519.7959020.000000.0000000.000000
20.000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.00.0000000.0000000.000000.00000013.733603
30.000000.00000013.58403013.6913939.7828379.7828370.0000000.00000021.3102150.017.36810117.4810640.000000.00000014.262909
40.000000.00000014.19413014.26631410.03650910.03650914.34054214.65528723.0263000.012.06278012.2862950.000000.0000000.000000
................................................
87770.000000.0000000.0000000.0000009.2357439.2357430.0000000.0000000.0000000.010.92448010.9955900.000000.00000011.798944
87780.000000.0000000.0000000.0000005.4721885.4721880.0000000.0000000.0000000.010.71478710.6219510.000000.00000011.901220
87790.000000.0000008.8625788.5129569.2095099.2095098.9340179.08581913.4066170.011.30706511.2444680.000000.0000000.000000
87800.000000.0000000.0000000.0000007.2522797.2522790.0000000.0000000.0000000.010.1667059.7031000.000000.0000000.000000
87820.000000.0000000.0000000.0000008.4493778.4493770.0000000.0000000.0000000.010.39974210.2227920.000000.0000000.000000
\n", "

7459 rows × 15 columns

\n", "
" ], "text/plain": [ " 1 2 3 4 5 6 \\\n", "0 21.59961 20.682966 16.616700 16.738070 0.000000 0.000000 \n", "1 0.00000 0.000000 17.829681 17.955060 0.000000 0.000000 \n", "2 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "3 0.00000 0.000000 13.584030 13.691393 9.782837 9.782837 \n", "4 0.00000 0.000000 14.194130 14.266314 10.036509 10.036509 \n", "... ... ... ... ... ... ... \n", "8777 0.00000 0.000000 0.000000 0.000000 9.235743 9.235743 \n", "8778 0.00000 0.000000 0.000000 0.000000 5.472188 5.472188 \n", "8779 0.00000 0.000000 8.862578 8.512956 9.209509 9.209509 \n", "8780 0.00000 0.000000 0.000000 0.000000 7.252279 7.252279 \n", "8782 0.00000 0.000000 0.000000 0.000000 8.449377 8.449377 \n", "\n", " 7 8 9 10 11 12 13 \\\n", "0 0.000000 0.000000 23.066238 0.0 18.571686 16.565067 14.09811 \n", "1 0.000000 0.000000 23.244255 0.0 20.544075 19.795902 0.00000 \n", "2 0.000000 0.000000 0.000000 0.0 0.000000 0.000000 0.00000 \n", "3 0.000000 0.000000 21.310215 0.0 17.368101 17.481064 0.00000 \n", "4 14.340542 14.655287 23.026300 0.0 12.062780 12.286295 0.00000 \n", "... ... ... ... ... ... ... ... \n", "8777 0.000000 0.000000 0.000000 0.0 10.924480 10.995590 0.00000 \n", "8778 0.000000 0.000000 0.000000 0.0 10.714787 10.621951 0.00000 \n", "8779 8.934017 9.085819 13.406617 0.0 11.307065 11.244468 0.00000 \n", "8780 0.000000 0.000000 0.000000 0.0 10.166705 9.703100 0.00000 \n", "8782 0.000000 0.000000 0.000000 0.0 10.399742 10.222792 0.00000 \n", "\n", " 14 15 \n", "0 14.834125 0.000000 \n", "1 0.000000 0.000000 \n", "2 0.000000 13.733603 \n", "3 0.000000 14.262909 \n", "4 0.000000 0.000000 \n", "... ... ... \n", "8777 0.000000 11.798944 \n", "8778 0.000000 11.901220 \n", "8779 0.000000 0.000000 \n", "8780 0.000000 0.000000 \n", "8782 0.000000 0.000000 \n", "\n", "[7459 rows x 15 columns]" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "feature.iloc[rating[rating != 0].index]" ] }, { "cell_type": "code", "execution_count": 119, "id": "hidden-nitrogen", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 1\n", "2 1\n", "3 1\n", "4 -1\n", " ..\n", "8777 -1\n", "8778 1\n", "8779 -1\n", "8780 1\n", "8782 -1\n", "Name: 0, Length: 7459, dtype: int64" ] }, "execution_count": 119, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rating[rating != 0]" ] }, { "cell_type": "code", "execution_count": 115, "id": "medical-athens", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,\n", " ...\n", " 8772, 8773, 8774, 8775, 8776, 8777, 8778, 8779, 8780, 8782],\n", " dtype='int64', length=7459)" ] }, "execution_count": 115, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rating[rating != 0].index" ] }, { "cell_type": "code", "execution_count": 72, "id": "contemporary-pasta", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.42344681012562146" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(feature > 0).sum(axis=0).sum() / (feature.shape[0]*feature.shape[1])" ] }, { "cell_type": "code", "execution_count": 19, "id": "received-running", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3907 1324 3552\n" ] } ], "source": [ "print((rating == 1).sum(), (rating == 0).sum(), (rating == -1).sum())" ] }, { "cell_type": "code", "execution_count": 123, "id": "supported-hardware", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.15074575885232835" ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "1324 / 8783" ] }, { "cell_type": "code", "execution_count": 20, "id": "abroad-tiger", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3907 4876\n" ] } ], "source": [ "numbinrating = rating.apply(lambda x: 1 if x > 0 else 0)\n", "print((numbinrating == 1).sum(), (numbinrating == 0).sum())" ] }, { "cell_type": "code", "execution_count": 120, "id": "leading-backing", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 1\n", "2 1\n", "3 1\n", "4 0\n", " ..\n", "8777 0\n", "8778 1\n", "8779 0\n", "8780 1\n", "8782 0\n", "Name: 0, Length: 7459, dtype: int64" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "numbinrating.iloc[rating[rating != 0].index]" ] }, { "cell_type": "code", "execution_count": 56, "id": "careful-louis", "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression\n", "from sklearn.model_selection import train_test_split\n", "from sklearn import metrics, preprocessing\n", "import math" ] }, { "cell_type": "code", "execution_count": 121, "id": "refined-ethiopia", "metadata": {}, "outputs": [], "source": [ "balanced_accuracy = []\n", "avg_precision_score = []\n", "brier_loss = []\n", "f1_score = []\n", "coefs = []\n", "intercepts = []\n", "\n", "for rs in range(10):\n", " X_train,X_test,y_train,y_test = train_test_split(feature.iloc[rating[rating != 0].index], \n", " numbinrating.iloc[rating[rating != 0].index],\n", " test_size=0.2,random_state=rs)\n", " # standardization using min-max scaler\n", " #min_max_scaler = preprocessing.MinMaxScaler()\n", " #X_train_minmax = min_max_scaler.fit_transform(X_train)\n", " #X_test_minmax = min_max_scaler.transform(X_test)\n", " # logistic regression\n", " logit=LogisticRegression(fit_intercept=True,solver='liblinear')\n", " fitted=logit.fit(X_train,y_train)\n", " y_pred=logit.predict(X_test)\n", " balanced_accuracy.append(metrics.balanced_accuracy_score(y_test, y_pred))\n", " y_pred_p=logit.predict_proba(X_test)\n", " avg_precision_score.append(metrics.average_precision_score(y_test, y_pred_p.T[1]))\n", " brier_loss.append(metrics.brier_score_loss(y_test, y_pred_p.T[1]))\n", " f1_score.append(metrics.average_precision_score(y_test, y_pred))\n", " coefs.append(fitted.coef_[0])\n", " intercepts.append(fitted.intercept_[0])\n", " #print('balanced accuracy: {:.4f}'.format(metrics.balanced_accuracy_score(y_test, y_pred)))\n", " #print('average precision score: {:.4f}'.format(metrics.average_precision_score(y_test, y_pred_p.T[1])))\n", " #print('brier score loss: {:.4f}'.format(metrics.brier_score_loss(y_test, y_pred_p.T[1]))) # The smaller, the better\n", " #print('f1 score: {:.4f}'.format(metrics.average_precision_score(y_test, y_pred)))" ] }, { "cell_type": "code", "execution_count": 122, "id": "friendly-gnome", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "balanced accuracy: 0.6624\n", "average precision score: 0.7551\n", "brier score loss: 0.2073\n", "f1 score: 0.6293\n" ] } ], "source": [ "# no standardization\n", "print('balanced accuracy: {:.4f}'.format(np.mean(balanced_accuracy)))\n", "print('average precision score: {:.4f}'.format(np.mean(avg_precision_score)))\n", "print('brier score loss: {:.4f}'.format(np.mean(brier_loss))) # The smaller, the better\n", "print('f1 score: {:.4f}'.format(np.mean(f1_score)))" ] }, { "cell_type": "code", "execution_count": 131, "id": "printable-singer", "metadata": {}, "outputs": [], "source": [ "balanced_accuracy = []\n", "avg_precision_score = []\n", "brier_loss = []\n", "f1_score = []\n", "coefs = []\n", "intercepts = []\n", "\n", "for rs in range(10):\n", " X_train,X_test,y_train,y_test = train_test_split(feature,numbinrating,test_size=0.2,random_state=rs)\n", " # standardization using min-max scaler\n", " #min_max_scaler = preprocessing.MinMaxScaler()\n", " #X_train_minmax = min_max_scaler.fit_transform(X_train)\n", " #X_test_minmax = min_max_scaler.transform(X_test)\n", " # logistic regression\n", " logit=LogisticRegression(fit_intercept=True,solver='liblinear')\n", " fitted=logit.fit(X_train,y_train)\n", " y_pred=logit.predict(X_test)\n", " balanced_accuracy.append(metrics.balanced_accuracy_score(y_test, y_pred))\n", " y_pred_p=logit.predict_proba(X_test)\n", " avg_precision_score.append(metrics.average_precision_score(y_test, y_pred_p.T[1]))\n", " brier_loss.append(metrics.brier_score_loss(y_test, y_pred_p.T[1]))\n", " f1_score.append(metrics.average_precision_score(y_test, y_pred))\n", " coefs.append(fitted.coef_[0])\n", " intercepts.append(fitted.intercept_[0])\n", " #print('balanced accuracy: {:.4f}'.format(metrics.balanced_accuracy_score(y_test, y_pred)))\n", " #print('average precision score: {:.4f}'.format(metrics.average_precision_score(y_test, y_pred_p.T[1])))\n", " #print('brier score loss: {:.4f}'.format(metrics.brier_score_loss(y_test, y_pred_p.T[1]))) # The smaller, the better\n", " #print('f1 score: {:.4f}'.format(metrics.average_precision_score(y_test, y_pred)))" ] }, { "cell_type": "code", "execution_count": 103, "id": "thorough-shopper", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "balanced accuracy: 0.6306\n", "average precision score: 0.6506\n", "brier score loss: 0.2153\n", "f1 score: 0.5318\n" ] } ], "source": [ "# no standardization\n", "print('balanced accuracy: {:.4f}'.format(np.mean(balanced_accuracy)))\n", "print('average precision score: {:.4f}'.format(np.mean(avg_precision_score)))\n", "print('brier score loss: {:.4f}'.format(np.mean(brier_loss))) # The smaller, the better\n", "print('f1 score: {:.4f}'.format(np.mean(f1_score)))" ] }, { "cell_type": "code", "execution_count": null, "id": "literary-conversation", "metadata": {}, "outputs": [], "source": [ "# best\n", "balanced accuracy: 0.6342\n", "average precision score: 0.6821\n", "brier score loss: 0.2137\n", "f1 score: 0.5533" ] }, { "cell_type": "code", "execution_count": 124, "id": "joined-destination", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAgzUlEQVR4nO3df5xV9X3n8de7gEIYFhXIVMF23GraooMio8QfMTMiijUJaYKpBlnshrI8NmysyiMPXBu1hvahjUo3NqmySKQmdfzR/GADKYpktq5VCyjKL3+gmTVQAwqGOImog5/9455h79y5I3fmngMznPfz8ZgH53zP977Pdy733s+cc+45RxGBmZnl128d6gGYmdmh5UJgZpZzLgRmZjnnQmBmlnMuBGZmOTfwUA+gN0aOHBl1dXUV9f31r3/N0KFDUx9DVrlZZjs3++z+lptldn/LzTK7r+SuW7fuzYgY1WVBRPS7nwkTJkSlfvrTn1bctyeyys0y27nZZ/e33Cyz+1tultl9JRdYG2U+U71ryMws51wIzMxyzoXAzCzn+uXB4nLef/99tm3bxt69ezu1Dx8+nC1btqS+vqxys8xOM3fw4MGMGTOGQYMGpZJnZofOYVMItm3bxrBhw6irq0PS/va3336bYcOGpb6+rHKzzE4rNyLYtWsX27Zt44QTTkhhZGZ2KB02u4b27t3LiBEjOhUBy4YkRowY0WXry8z6p8OmEAAuAgeRn2uzw8dhVQjMzKznDptjBKXq5i9PNa/1lktSzSvn3Xff5ZJLLmHnzp1cf/31HHfcccyZM4dBgwaxfPlyrrrqKh5++OFuHz9r1iyuueYaxo4d2+N1t7S0cMQRR3D22WdX8yuYWUbKfaZdW9/OlWXae/p5ddgWgv7o2WefBeCJJ55g2LBhzJkzh+uuu44rrrgC4EOLAMDixYt7ve6WlhZqampcCMxyyLuGUvQP//APjBs3jlNPPZUZM2bQ2trK+eefz7hx45g0aRKvvfYaAG+88Qaf//znOeOMMzjjjDN44okn2LlzJ1dccQVr1qzhnHPO4e677+bBBx/ka1/7GtOnT6e1tZVTTjkFgH379jFv3jxOOeUUxo0bx5133glAY2Mja9euBeCRRx7hrLPO4vTTT+fSSy+lra0NgLq6Om688UZOP/106uvreeGFF2htbeWuu+5i4cKFnHbaaTz++OM89NBDnHLKKZx66qmcd955h+DZNLODxVsEKdm0aRMLFizgX//1Xxk5ciS7d+9m5syZ+3+WLFnCV77yFX74wx9y1VVXcfXVV3Puuefy2muvcdFFF7FlyxYWL17Mbbfdxv3338+wYcN48skn+dSnPsW0adNobW3dv65FixbR2trK+vXrGThwILt37+40ljfffJMFCxawatUqhg4dyq233sodd9zB1VdfDcDIkSN55pln+Pa3v81tt93G4sWLmTNnDjU1NcybNw+A+vp6Vq5cyejRo/nlL395sJ5GMzsEXAhSsnr1ai699FJGjhwJwDHHHMOTTz7J97//fQBmzJjBV7/6VQBWrVrF5s2b9z/2V7/61f6/2CuxatUq5syZw8CBA/evq9hTTz3F5s2bOeeccwB47733OOuss/Yv/9znPgfAhAkT9o+v1DnnnMOVV17JF77whf39zezw5EJwCHzwwQc89dRTDB48OJP8iGDy5Mncf//9ndrffvttAI488kgABgwYQHt7e9mMu+66i6effprly5czYcIE1q1bx4gRIzIZr5kdWj5GkJLzzz+fhx56iF27dgGwe/duzj77bJqbmwH43ve+xyc+8QkALrzwwv379QHWr1/fo3VNnjyZu+++e/+HeOmuoY9//OM88cQTbN26FShcs/yll1760Mxhw4btLxQAr7zyChMnTuTmm29m1KhR/PznP+/RGM2s/0hli0DSFOB/AAOAxRFxS8ny84C/BcYBl0XEw0XLZgJ/kcwuiIilaYyp4+tTWV4KotjJJ5/M9ddfzyc/+UkGDBjA+PHjufPOO/nTP/1TvvGNbzBq1Ci+853vAPDNb36TL3/5y4wbN4729nbOO+887rrrrorXNWvWLF566SXGjRvHoEGD+LM/+zPmzp27f/moUaO49957ufzyy3n33XcBWLBgAccee2y3mZ/+9KeZNm0aP/rRj7jzzjtZuHAhL7/8MhHBpEmTOPXUU3v5zJhZX1d1IZA0APgWMBnYBqyRtCwiNhd1ew24EphX8thjgBuBBiCAdclj36p2XIdCx4HhYqtXr+7Sb+TIkTzwwANd2hsbG2lsbNz/l/m99967f1ldXR0bN24EYODAgdxxxx3ccccdnR7f0tKyf/r8889nzZo1nZa//fbbnQ46NzQ07H/Mxz72MZ5//vn9yzq2Xszs8JfGrqEzga0R8WpEvAc0A1OLO0REa0Q8D3xQ8tiLgEcjYnfy4f8oMCWFMZmZWYVUuHtZFQHSNGBKRMxK5mcAEyNibpm+9wI/7tg1JGkeMDgiFiTzXwPeiYjbyjx2NjAboLa2dkLHvvcOw4cP58QTT+wyvn379jFgwICqfsdyssrNMjvt3K1bt7Jnzx7a2tqoqalJLbdDVrlZZve33Cyz+1tultlp5G7YvqdLW+0Q2PFO1771o4eXzWhqaloXEQ2l7f3mW0MRsQhYBNDQ0BCNjY2dlm/ZsoWampouF0Pr65d0PpjZaeZGBIMHD2b8+PG0tLRQ+v+Rhqxys8zub7lZZve33Cyz08gtdymJa+vbuX1D14/x1uk9W1cau4a2A8cXzY9J2rJ+bCeDBw9m165dVLuFYwfWcT+CrL7+amYHVxpbBGuAkySdQOFD/DLgixU+diXw15KOTuYvBK7rzSDGjBnDtm3beOONNzq17927N5MPrKxys8xOM7fjDmVm1v9VXQgiol3SXAof6gOAJRGxSdLNwNqIWCbpDOAHwNHApyX9ZUScHBG7JX2dQjEBuDkidpdd0QEMGjSo7N2yWlpaGD9+fG8iP1RWuVlmZzlmM+u/UjlGEBErgBUlbTcUTa+hsNun3GOXAEvSGIeZmfWczyw2M8s5FwIzs5xzITAzyzkXAjOznHMhMDPLORcCM7OccyEwM8s5FwIzs5xzITAzyzkXAjOznHMhMDPLORcCM7OccyEwM8s5FwIzs5xzITAzyzkXAjOznHMhMDPLuVQKgaQpkl6UtFXS/DLLj5T0QLL8aUl1SfsgSUslbZC0RVKv7ldsZma9V3UhkDQA+BZwMTAWuFzS2JJuXwLeiogTgYXArUn7pcCREVEPTAD+S0eRMDOzgyONLYIzga0R8WpEvAc0A1NL+kwFlibTDwOTJAkIYKikgcAQ4D3gVymMyczMKqSIqC5AmgZMiYhZyfwMYGJEzC3qszHpsy2ZfwWYCOwB7gMmAR8Bro6IRd2sZzYwG6C2tnZCc3NzReNra2ujpqaml7/dwc/NMtu52Wf3t9wss/tbbpbZaeRu2L6nS1vtENjxTte+9aOHl81oampaFxENpe0DqxpZ9c4E9gHHAUcDj0taFRGvlnZMCsQigIaGhmhsbKxoBS0tLVTatyeyys0y27nZZ/e33Cyz+1tultlp5F45f3mXtmvr27l9Q9eP8dbpPVtXGruGtgPHF82PSdrK9kl2Aw0HdgFfBP45It6PiJ3AE0CXamVmZtlJoxCsAU6SdIKkI4DLgGUlfZYBM5PpacDqKOyTeg04H0DSUODjwAspjMnMzCpUdSGIiHZgLrAS2AI8GBGbJN0s6TNJt3uAEZK2AtcAHV8x/RZQI2kThYLynYh4vtoxmZlZ5VI5RhARK4AVJW03FE3vpfBV0dLHtZVrNzOzg8dnFpuZ5ZwLgZlZzrkQmJnlnAuBmVnOuRCYmeWcC4GZWc65EJiZ5ZwLgZlZzrkQmJnlnAuBmVnOuRCYmeWcC4GZWc65EJiZ5ZwLgZlZzrkQmJnlnAuBmVnOpVIIJE2R9KKkrZLml1l+pKQHkuVPS6orWjZO0pOSNknaIGlwGmMyM7PKVF0IJA2gcMvJi4GxwOWSxpZ0+xLwVkScCCwEbk0eOxD4LjAnIk4GGoH3qx2TmZlVLo0tgjOBrRHxakS8BzQDU0v6TAWWJtMPA5MkCbgQeD4ingOIiF0RsS+FMZmZWYUUEdUFSNOAKRExK5mfAUyMiLlFfTYmfbYl868AE4ErgAnAR4FRQHNE/E0365kNzAaora2d0NzcXNH42traqKmp6eVvd/Bzs8x2bvbZ/S03y+z+lptldhq5G7bv6dJWOwR2vNO1b/3o4WUzmpqa1kVEQ2l7Kjevr8JA4FzgDOA3wGOS1kXEY6UdI2IRsAigoaEhGhsbK1pBS0sLlfbtiaxys8x2bvbZ/S03y+z+lptldhq5V85f3qXt2vp2bt/Q9WO8dXrP1pXGrqHtwPFF82OStrJ9kuMCw4FdwDbgXyLizYj4DbACOD2FMZmZWYXSKARrgJMknSDpCOAyYFlJn2XAzGR6GrA6CvukVgL1kj6SFIhPAptTGJOZmVWo6l1DEdEuaS6FD/UBwJKI2CTpZmBtRCwD7gHuk7QV2E2hWBARb0m6g0IxCWBFRHTd/jEzs8ykcowgIlZQ2K1T3HZD0fRe4NJuHvtdCl8hNTOzQ8BnFpuZ5ZwLgZlZzrkQmJnlnAuBmVnOHeoTynKtrswJIlA4SaT05JHWWy45GEMysxzyFoGZWc65EJiZ5Zx3DVWg3C6ccrtvwLtwzKz/8RaBmVnOuRCYmeWcC4GZWc75GMFhyF9LNbOe8BaBmVnOuRCYmeWcC4GZWc65EJiZ5ZwLgZlZzqVSCCRNkfSipK2S5pdZfqSkB5LlT0uqK1n+O5LaJM1LYzxmZla5qguBpAHAt4CLgbHA5ZLGlnT7EvBWRJwILARuLVl+B/CTasdiZmY9l8YWwZnA1oh4NSLeA5qBqSV9pgJLk+mHgUmSBCDps8DPgE0pjMXMzHpIEVFdgDQNmBIRs5L5GcDEiJhb1Gdj0mdbMv8KMBHYCzwKTAbmAW0RcVs365kNzAaora2d0NzcXNH42traqKmp6eVvV7Bh+54ubbVDYMc7XfvWjx5eVW532X0htztpPMcHMzfL7P6Wm2V2f8vNMruvfA41NTWti4iG0vZDfWbxTcDCiGhLNhC6FRGLgEUADQ0N0djYWNEKWlpaqLRvd8pdZfTa+nZu39D16WudXvm6yuV2l90XcruTxnN8MHOzzO5vuVlm97fcNLK7P6t/H7f/n193ae/Jmf1ZfQ5BOoVgO3B80fyYpK1cn22SBgLDgV0UtgqmSfob4CjgA0l7I+LvUhiXmZlVII1CsAY4SdIJFD7wLwO+WNJnGTATeBKYBqyOwj6pT3R0kHQThV1DLgJmZgdR1YUgItolzQVWAgOAJRGxSdLNwNqIWAbcA9wnaSuwm0KxMDOzPiCVYwQRsQJYUdJ2Q9H0XuDSA2TclMZYzMysZ3xmsZlZzrkQmJnlnAuBmVnOuRCYmeWcC4GZWc65EJiZ5ZwLgZlZzrkQmJnlnAuBmVnOuRCYmeWcC4GZWc65EJiZ5dyhvjGN9TPlbrxxbX172Ztm9OSmG1nlmtmBeYvAzCznXAjMzHLOhcDMLOdSKQSSpkh6UdJWSfPLLD9S0gPJ8qcl1SXtkyWtk7Qh+ff8NMZjZmaVq7oQSBoAfAu4GBgLXC5pbEm3LwFvRcSJwELg1qT9TeDTEVFP4Z7G91U7HjMz65k0tgjOBLZGxKsR8R7QDEwt6TMVWJpMPwxMkqSIeDYi/j1p3wQMkXRkCmMyM7MKKSKqC5CmAVMiYlYyPwOYGBFzi/psTPpsS+ZfSfq8WZIzJyIu6GY9s4HZALW1tROam5srGl9bWxs1NTW9+t06bNi+p0tb7RDY8U7XvvWjh1eV2112X8jtLjur5yKN3O6k8bo4HHKzzO7L773uVDvmnrz34OC/R5qamtZFRENpe584j0DSyRR2F13YXZ+IWAQsAmhoaIjGxsaKsltaWqi0b3fKfZf92vp2bt/Q9elrnV75usrldpfdF3K7y87quUgjtztpvC4Oh9wss/vye6871Y65J+896DvvkTR2DW0Hji+aH5O0le0jaSAwHNiVzI8BfgD8p4h4JYXxmJlZD6RRCNYAJ0k6QdIRwGXAspI+yygcDAaYBqyOiJB0FLAcmB8RT6QwFjMz66GqC0FEtANzgZXAFuDBiNgk6WZJn0m63QOMkLQVuAbo+IrpXOBE4AZJ65Ofj1Y7JjMzq1wqxwgiYgWwoqTthqLpvcClZR63AFiQxhjMzKx3fGaxmVnOuRCYmeWcC4GZWc65EJiZ5VyfOKHMrD/yzXTscOEtAjOznHMhMDPLORcCM7OccyEwM8s5FwIzs5xzITAzyzkXAjOznPN5BHZYK/ddfyj/fX9/19/yylsEZmY550JgZpZzLgRmZjnnQmBmlnOpFAJJUyS9KGmrpPlllh8p6YFk+dOS6oqWXZe0vyjpojTGY2Zmlau6EEgaAHwLuBgYC1wuaWxJty8Bb0XEicBC4NbksWMp3Oz+ZGAK8O0kz8zMDpI0tgjOBLZGxKsR8R7QDEwt6TMVWJpMPwxMkqSkvTki3o2InwFbkzwzMztIFBHVBUjTgCkRMSuZnwFMjIi5RX02Jn22JfOvABOBm4CnIuK7Sfs9wE8i4uEy65kNzAaora2d0Nzc3Gn5hu17yo6vdgjseKdzW/3o4b34TTtra2ujpqam6pyDme3c7LPTyC33Wi73Ooa+8VruyXsPqh9zVs8x9L/Pi57mNjU1rYuIhtL2fnNCWUQsAhYBNDQ0RGNjY6fl5W4GAoUTh27f0PnXbJ3eWLZvT7S0tFA6hrRkle3c7LPTyC33Wi73Ooa+8VruyXsPqh9zVs8x9L/Pi7Ry09g1tB04vmh+TNJWto+kgcBwYFeFjzUzswylUQjWACdJOkHSERQO/i4r6bMMmJlMTwNWR2Gf1DLgsuRbRScAJwH/lsKYzMysQlXvGoqIdklzgZXAAGBJRGySdDOwNiKWAfcA90naCuymUCxI+j0IbAbagS9HxL5qx2RmZpVL5RhBRKwAVpS03VA0vRe4tJvH/hXwV2mMw8zMes5nFpuZ5ZwLgZlZzrkQmJnlnAuBmVnOuRCYmeWcC4GZWc65EJiZ5ZwLgZlZzrkQmJnlnAuBmVnOuRCYmeWcC4GZWc65EJiZ5ZwLgZlZzrkQmJnlnAuBmVnOVVUIJB0j6VFJLyf/Ht1Nv5lJn5clzUzaPiJpuaQXJG2SdEs1YzEzs96pdotgPvBYRJwEPJbMdyLpGOBGYCJwJnBjUcG4LSL+ABgPnCPp4irHY2ZmPVRtIZgKLE2mlwKfLdPnIuDRiNgdEW8BjwJTIuI3EfFTgIh4D3gGGFPleMzMrIcUEb1/sPTLiDgqmRbwVsd8UZ95wOCIWJDMfw14JyJuK+pzFIVCcEFEvNrNumYDswFqa2snNDc3d1q+YfuesmOsHQI73uncVj96eIW/Yffa2tqoqampOudgZjs3++w0csu9lsu9jqFvvJZ78t6D6sec1XMM/e/zoqe5TU1N6yKiobT9gDevl7QK+O0yi64vnomIkNTjqiJpIHA/8M3uikCSvwhYBNDQ0BCNjY2dll85f3nZx11b387tGzr/mq3TG8v27YmWlhZKx5CWrLKdm312GrnlHt3S0sIX+uhz0ZP3HlT//kvjOT5cPi/Syj1gIYiIC7pbJmmHpGMj4nVJxwI7y3TbTufX9higpWh+EfByRPxtJQPuTustl5Rtb2lpSeU/0szscHXAQnAAy4CZwC3Jvz8q02cl8NdFB4gvBK4DkLQAGA7MqnIcZnaI+I+w/q/ag8W3AJMlvQxckMwjqUHSYoCI2A18HViT/NwcEbsljaGwe2ks8Iyk9ZJcEMzMDrKqtggiYhcwqUz7Wor+yo+IJcCSkj7bAFWzfjMzq57PLDYzyzkXAjOznHMhMDPLORcCM7OccyEwM8s5FwIzs5xzITAzyzkXAjOznHMhMDPLORcCM7OccyEwM8s5FwIzs5xzITAzyzkXAjOznHMhMDPLORcCM7OccyEwM8u5qgqBpGMkPSrp5eTfo7vpNzPp87KkmWWWL5O0sZqxmJlZ71S7RTAfeCwiTgIeS+Y7kXQMcCMwETgTuLG4YEj6HNBW5TjMzKyXqi0EU4GlyfRS4LNl+lwEPBoRuyPiLeBRYAqApBrgGmBBleMwM7NeUkT0/sHSLyPiqGRawFsd80V95gGDI2JBMv814J2IuE3SQuBfgGeBH0fEKR+yrtnAbIDa2toJzc3NFY2xra2Nmpqanv5qhyw3y2znZp/d33KzzO7LuRu27ynbXjsEdrzTua1+9PCq1gV957loampaFxENpe0DD/RASauA3y6z6PrimYgISRVXFUmnAb8XEVdLqjtQ/4hYBCwCaGhoiMbGxorW09LSQqV9eyKr3CyznZt9dn/LzTK7L+deOX952fZr69u5fUPnj8XW6dWtC/r2cwEVFIKIuKC7ZZJ2SDo2Il6XdCyws0y37UBj0fwYoAU4C2iQ1JqM46OSWiKiETMzO2iqPUawDOj4FtBM4Edl+qwELpR0dHKQ+EJgZUT8fUQcFxF1wLnASy4CZmYHX7WF4BZgsqSXgQuSeSQ1SFoMEBG7ga8Da5Kfm5M2MzPrAw64a+jDRMQuYFKZ9rXArKL5JcCSD8lpBbo9UGxmZtnxmcVmZjnnQmBmlnMuBGZmOVfVMQIzs/6o9ZZLyra3tLSkct5Af+MtAjOznHMhMDPLORcCM7OccyEwM8s5FwIzs5xzITAzyzkXAjOznHMhMDPLORcCM7Ocq+pWlYeKpDeA/1th95HAmxkMI6vcLLOdm312f8vNMru/5WaZ3VdyfzciRpU29stC0BOS1pa7R2dfzc0y27nZZ/e33Cyz+1tultl9Pde7hszMcs6FwMws5/JQCBb1s9wss52bfXZ/y80yu7/lZpndp3MP+2MEZmb24fKwRWBmZh/ChcDMLOcO20IgaYmknZI2ppx7vKSfStosaZOkq1LKHSzp3yQ9l+T+ZRq5RfkDJD0r6ccp57ZK2iBpvaS1KeYeJelhSS9I2iLprBQyfz8ZZ8fPryT9eQrD7ci/Ovm/2yjpfkmDU8q9KsncVO14y70vJB0j6VFJLyf/Hp1S7qXJmD+Q1KuvOHaT+43kdfG8pB9IOirF7K8nueslPSLpuDRyi5ZdKykkjUxpvDdJ2l70mv6jnuYCEBGH5Q9wHnA6sDHl3GOB05PpYcBLwNgUcgXUJNODgKeBj6c47muAfwR+nPLz0QqMzOD/bykwK5k+Ajgq5fwBwC8onGCTRt5o4GfAkGT+QeDKFHJPATYCH6Fwa9lVwIlV5HV5XwB/A8xPpucDt6aU+4fA7wMtQEOK470QGJhM39qb8X5I9n8omv4KcFcauUn78cBKCifD9vg90814bwLmVfs6O2y3CCLiX4DdGeS+HhHPJNNvA1sofAhUmxsR0ZbMDkp+UjmSL2kMcAmwOI28rEkaTuFFfw9ARLwXEb9MeTWTgFciotIz1CsxEBgiaSCFD+5/TyHzD4GnI+I3EdEO/G/gc70N6+Z9MZVC4SX597Np5EbEloh4sRfDPFDuI8lzAfAUMCbF7F8VzQ6lF+/BD/nsWQh8tTeZB8it2mFbCA4GSXXAeAp/vaeRN0DSemAn8GhEpJIL/C2FF+AHKeUVC+ARSeskzU4p8wTgDeA7ye6sxZKGppTd4TLg/rTCImI7cBvwGvA6sCciHkkheiPwCUkjJH0E+CMKf1mmqTYiXk+mfwHUppyfpf8M/CTNQEl/JennwHTghpQypwLbI+K5NPJKzE12Zy3pzW49cCHoNUk1wD8Bf17yV0SvRcS+iDiNwl84Z0o6pdpMSZ8CdkbEumqzunFuRJwOXAx8WdJ5KWQOpLAJ/PcRMR74NYVdFqmQdATwGeChFDOPpvCX9QnAccBQSVdUmxsRWyjs/ngE+GdgPbCv2twPWV+Q0pZo1iRdD7QD30szNyKuj4jjk9y51eYlBfy/k1JRKfH3wO8Bp1H4A+T23oS4EPSCpEEUisD3IuL7aecnu0F+CkxJIe4c4DOSWoFm4HxJ300hF9j/lzARsRP4AXBmCrHbgG1FW0QPUygMabkYeCYidqSYeQHws4h4IyLeB74PnJ1GcETcExETIuI84C0Kx6XStEPSsQDJvztTzk+dpCuBTwHTk+KVhe8Bn08h5/co/IHwXPI+HAM8I+m3qw2OiB3JH5AfAP+TXr7/XAh6SJIo7LveEhF3pJg7quPbD5KGAJOBF6rNjYjrImJMRNRR2B2yOiKq/ksVQNJQScM6pikcxKv6W1oR8Qvg55J+P2maBGyuNrfI5aS4WyjxGvBxSR9JXiOTKBw/qpqkjyb//g6F4wP/mEZukWXAzGR6JvCjlPNTJWkKhV2dn4mI36ScfVLR7FTSeQ9uiIiPRkRd8j7cRuELJ7+oNrujgCf+mN6+/6o92txXfyi80V8H3qfwxH8ppdxzKWw6P09hM3098Ecp5I4Dnk1yNwI3ZPCcNJLit4aA/wg8l/xsAq5PMfs0YG3yfPwQODql3KHALmB4Bs/vX1L44NgI3AccmVLu4xQK4XPApCqzurwvgBHAY8DLFL6VdExKuX+cTL8L7ABWppS7Ffh50fuvx9/s+ZDsf0r+/54H/hcwOo3ckuWt9O5bQ+XGex+wIRnvMuDY3jwXvsSEmVnOedeQmVnOuRCYmeWcC4GZWc65EJiZ5ZwLgZlZzrkQmCUkfSW50mmPzlSVVCfpi1mNyyxrLgRm/99/BSZHxPQePq4O6HEhkDSgp48xy4ILgRkg6S4KJ8j9RNL1yQW8/i256N3UpE+dpMclPZP8dFxC4hYKF4Zbn9yT4EpJf1eU/WNJjcl0m6TbJT0HnCXpimQ96yXd7eJgh4ILgRkQEXMoXDa6icLZx6sj4sxk/hvJJTR2UthiOB34E+CbycPnA49HxGkRsfAAqxpK4bLSp1I4w/lPgHOicLHBfRSueGl2UA081AMw64MupHChvnnJ/GDgdygUir+TdBqFD+2P9SJ7H4XLGEDhekQTgDWFyxMxhH5wwTc7/LgQmHUl4PNRclMVSTdRuGbOqRS2pvd28/h2Om9tF9+ycm9EdFxGWsDSiLgujUGb9ZZ3DZl1tRL4b8lVRJE0PmkfDrwehUv+zqBwu0uAtynctrRDK3CapN+SdDzdXxr4MWBa0dVFj5H0u6n+JmYVcCEw6+rrFG4V+rykTck8wLeBmcmB3j+gcMMcKFz5cZ+k5yRdDTxB4f7FmykcR3im3EoiYjPwFxTu8PY88CiFe2KbHVS++qiZWc55i8DMLOdcCMzMcs6FwMws51wIzMxyzoXAzCznXAjMzHLOhcDMLOf+H/JZvG7O7uy1AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# no standardization\n", "df = pd.DataFrame({'feature':list(range(1,16)), 'coefficients':np.stack(coefs).mean(axis=0)})\n", "ax = df.plot(x='feature', y='coefficients', rot=0, kind='bar', grid=True)" ] }, { "cell_type": "code", "execution_count": 125, "id": "broken-taylor", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([-0.02159655, 0.04977869, 0.05276279, 0.04668993, 0.02615154,\n", " 0.02615154, 0.00636762, 0.01282327, -0.02016103, 0. ,\n", " -0.02192281, 0.02003289, -0.04702737, 0.03902806, 0.10792615])" ] }, "execution_count": 125, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.stack(coefs).mean(axis=0)" ] }, { "cell_type": "code", "execution_count": 126, "id": "blond-middle", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.02422864, 0.0258807 , 0.00526667, 0.00627771, 0.00087469,\n", " 0.00087469, 0.01432958, 0.01491875, 0.00288218, 0. ,\n", " 0.0042263 , 0.00431502, 0.00625123, 0.00565562, 0.00302476])" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.stack(coefs).std(axis=0)" ] }, { "cell_type": "code", "execution_count": 130, "id": "stylish-prague", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-1.172818154019393" ] }, "execution_count": 130, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.mean(intercepts)" ] }, { "cell_type": "code", "execution_count": 100, "id": "anticipated-bibliography", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAdOUlEQVR4nO3dfZRU9Z3n8fdnaBSkWVQgPQpm2h1NZtDGB1pRMaaBaDAmIZNAolEWsxKWs2FjVDYHl41xDDlHE4XZqBll0WiMY/sweWCCGRSxZzxEDaIoTz6g6dVmjSgYpRNR0e/+cS9s0VRrd9e9TXffz+ucPtS99atP/broqk/dW1W3FBGYmVlx/cW+noCZme1bLgIzs4JzEZiZFZyLwMys4FwEZmYFV7WvJ9AVw4YNi9ra2g6N/dOf/sSgQYMyn0NeuXlmOzf/7N6Wm2d2b8vNM7un5K5evfq1iBi+1xkR0et+xowZEx314IMPdnhsZ+SVm2e2c/PP7m25eWb3ttw8s3tKLvBYlHlM9a4hM7OCcxGYmRWci8DMrOB65YvF5bz77ru0tLSwY8eOPdYPGTKEjRs3Zn59eeXmmZ1l7oABAxg5ciT9+/fPJM/M9p0+UwQtLS0MHjyY2tpaJO1ev337dgYPHpz59eWVm2d2VrkRwdatW2lpaeHwww/PYGZmti/1mV1DO3bsYOjQoXuUgOVDEkOHDt1r68vMeqc+UwSAS6Ab+bY26zv6VBGYmVnn9ZnXCNqqnbs007zmK8/KNK+ct99+m7POOostW7Ywb948Dj30UGbNmkX//v1ZunQpF154Iffcc0+7l58xYwYXX3wxo0aN6vR1NzU1sd9++3HKKadU8iuYWU7KPaZdUreT88us7+zjVZ8tgt7oiSeeAGDlypUMHjyYWbNmcemll3LeeecBfGAJACxevLjL193U1ER1dbWLwKyAvGsoQz/96U8ZPXo0xxxzDNOmTaO5uZkJEyYwevRoJk6cyIsvvgjAq6++ype+9CVOOOEETjjhBFauXMmWLVs477zzWLVqFePGjePGG2/krrvu4jvf+Q7nnnsuzc3NHH300QC89957zJkzh6OPPprRo0dz7bXXAtDQ0MBjjz0GwH333cfJJ5/M8ccfz9SpU2ltbQWgtraW7373uxx//PHU1dXx9NNP09zczA033MDChQs59thjeeihh7j77rs5+uijOeaYYzjttNP2wa1pZt3FWwQZWb9+PfPnz+e3v/0tw4YNY9u2bUyfPn33z80338w3v/lNfvnLX3LhhRdy0UUXceqpp/Liiy/y6U9/mo0bN7J48WKuvvpq7rjjDgYPHszDDz/MZz/7WaZMmUJzc/Pu61q0aBHNzc2sWbOGqqoqtm3btsdcXnvtNebPn8/y5csZNGgQV111FQsWLOCiiy4CYNiwYTz++OP8+Mc/5uqrr2bx4sXMmjWL6upq5syZA0BdXR3Lli1jxIgR/PGPf+yum9HM9gEXQUZWrFjB1KlTGTZsGAAHH3wwDz/8MD//+c8BmDZtGt/+9rcBWL58ORs2bNh92TfffHP3M/aOWL58ObNmzaKqqmr3dZV65JFH2LBhA+PGjQPgnXfe4eSTT959/he/+EUAxowZs3t+bY0bN47zzz+fL3/5y7vHm1nf5CLYB95//30eeeQRBgwYkEt+RHD66adzxx137LF++/btAOy///4A9OvXj507d5bNuOGGG3j00UdZunQpY8aMYfXq1QwdOjSX+ZrZvuXXCDIyYcIE7r77brZu3QrAtm3bOOWUU2hsbATg9ttv5xOf+AQAZ5xxxu79+gBr1qzp1HWdfvrp3HjjjbsfxNvuGjrppJNYuXIlmzZtApJjlj/77LMfmDl48ODdRQHw/PPPM3bsWK644gqGDx/OSy+91Kk5mlnv0We3CHa9fSrPQ0GUOuqoo5g3bx6f/OQn6devH8cddxzXXnstX/va1/jhD3/I8OHD+clPfgLAj370I77xjW8wevRodu7cyWmnncYNN9zQ4euaMWMGzz77LKNHj6Z///58/etfZ/bs2bvPHz58OLfccgvnnHMOb7/9NgDz58/nkEMOaTfzc5/7HFOmTOFXv/oV1157LQsXLuS5554jIpg4cSLHHHNMF28ZM+vp+mwR7Au7XhgutWLFir3GDRs2jDvvvHOv9Q0NDTQ0NOx+Zn7LLbfsPq+2tpZ169YBUFVVxYIFC1iwYMEel29qatp9esKECaxatWqP87dv377Hi8719fW7L/Oxj32Mp556avd5u7ZezKzv864hM7OCcxGYmRVcnyqC5Cs5rTv4tjbrO/pMEQwYMICtW7f6Aaob7Po+grze/mpm3avPvFg8cuRIWlpaePXVV/dYv2PHjlwesPLKzTM7y9xd31BmZr1fnymC/v37l/22rKamJo477rjMry+v3Dyz85yzmfVefWbXkJmZdY2LwMys4DIpAkmTJD0jaZOkuWXO31/Snen5j0qqTdf3l3SrpLWSNkq6NIv5mJlZx1VcBJL6AdcDZwKjgHMktf2KrAuA1yPiCGAhcFW6fiqwf0TUAWOA/7KrJMzMrHtksUVwIrApIl6IiHeARmBymzGTgVvT0/cAE5V8+3kAgyRVAQOBd4A3M5iTmZl1kCp9372kKcCkiJiRLk8DxkbE7JIx69IxLeny88BY4A3gNmAicABwUUQsaud6ZgIzAWpqasbsOqrnh2ltbaW6urqLv1335+aZ7dz8s3tbbp7ZvS03z+wsctdufmOvdTUD4ZW39h5bN2JI2Yzx48evjoj6tuv39dtHTwTeAw4FDgIekrQ8Il5oOzAtiEUA9fX10dDQ0KEraGpqoqNjOyOv3DyznZt/dm/LzTO7t+XmmZ1Fbrkvqb+kbifXrN37Ybz53M5dVxa7hjYDh5Usj0zXlR2T7gYaAmwFvgr8a0S8GxFbgJXAXm1lZmb5yaIIVgFHSjpc0n7A2cCSNmOWALuOzzwFWBHJPqkXgQkAkgYBJwFPZzAnMzProIqLICJ2ArOBZcBG4K6IWC/pCkmfT4fdBAyVtAm4GNj1FtPrgWpJ60kK5ScR8RRmZtZtMnmNICLuBe5ts+6yktM7SN4q2vZyreXWm5lZ9/Eni83MCs5FYGZWcC4CM7OCcxGYmRWci8DMrOBcBGZmBeciMDMrOBeBmVnBuQjMzArORWBmVnAuAjOzgnMRmJkVnIvAzKzgXARmZgXnIjAzKzgXgZlZwbkIzMwKzkVgZlZwLgIzs4JzEZiZFZyLwMys4FwEZmYF5yIwMyu4TIpA0iRJz0jaJGlumfP3l3Rnev6jkmpLzhst6WFJ6yWtlTQgizmZmVnHVFwEkvoB1wNnAqOAcySNajPsAuD1iDgCWAhclV62CvgZMCsijgIagHcrnZOZmXVcFlsEJwKbIuKFiHgHaAQmtxkzGbg1PX0PMFGSgDOApyLiSYCI2BoR72UwJzMz6yBFRGUB0hRgUkTMSJenAWMjYnbJmHXpmJZ0+XlgLHAeMAb4CDAcaIyIH7RzPTOBmQA1NTVjGhsbOzS/1tZWqquru/jbdX9untnOzT+7t+Xmmd3bcvPMziJ37eY39lpXMxBeeWvvsXUjhpTNGD9+/OqIqG+7vqqimVWuCjgVOAH4M/CApNUR8UDbgRGxCFgEUF9fHw0NDR26gqamJjo6tjPyys0z27n5Z/e23Dyze1tuntlZ5J4/d+le6y6p28k1a/d+GG8+t3PXlcWuoc3AYSXLI9N1ZcekrwsMAbYCLcC/R8RrEfFn4F7g+AzmZGZmHZRFEawCjpR0uKT9gLOBJW3GLAGmp6enACsi2Se1DKiTdEBaEJ8ENmQwJzMz66CKdw1FxE5Js0ke1PsBN0fEeklXAI9FxBLgJuA2SZuAbSRlQUS8LmkBSZkEcG9E7L39Y2ZmucnkNYKIuJdkt07pustKTu8AprZz2Z+RvIXUzMz2AX+y2Mys4FwEZmYF5yIwMys4F4GZWcG5CMzMCs5FYGZWcC4CM7OCcxGYmRWci8DMrOBcBGZmBeciMDMrOBeBmVnBuQjMzArORWBmVnAuAjOzgnMRmJkVnIvAzKzgXARmZgXnIjAzKzgXgZlZwbkIzMwKzkVgZlZwLgIzs4LLpAgkTZL0jKRNkuaWOX9/SXem5z8qqbbN+R+V1CppThbzMTOzjqu4CCT1A64HzgRGAedIGtVm2AXA6xFxBLAQuKrN+QuA31Q6FzMz67yqDDJOBDZFxAsAkhqBycCGkjGTgcvT0/cA10lSRISkLwC/B/6UwVx6ldq5S8uuv6RuJ+e3Oa/5yrO6Y0pmVkCKiMoCpCnApIiYkS5PA8ZGxOySMevSMS3p8vPAWGAHcD9wOjAHaI2Iq9u5npnATICampoxjY2NHZpfa2sr1dXVXfzt8s1du/mNsutrBsIrb+25rm7EkIquC3r2bdGduXlm97bcPLN7W26e2Xk9XpR7rID2Hy/Gjx+/OiLq267PYougEpcDCyOiVdIHDoyIRcAigPr6+mhoaOjQFTQ1NdHRsZ2RRW7bZ/27XFK3k2vW7vlf03xuZdcFPfu26M7cPLN7W26e2b0tN8/svB4vyj1WQOcfL7Iogs3AYSXLI9N15ca0SKoChgBbSbYKpkj6AXAg8L6kHRFxXQbzMjOzDsiiCFYBR0o6nOQB/2zgq23GLAGmAw8DU4AVkeyT+sSuAZIuJ9k15BIwM+tGFRdBROyUNBtYBvQDbo6I9ZKuAB6LiCXATcBtkjYB20jKwszMeoBMXiOIiHuBe9usu6zk9A5g6odkXJ7FXMzMrHP8yWIzs4Lb1+8ashz48wlm1hneIjAzKzgXgZlZwbkIzMwKzkVgZlZwLgIzs4JzEZiZFZyLwMys4FwEZmYF5yIwMys4F4GZWcG5CMzMCs5FYGZWcD7onJlZRjpzwEfoOQd99BaBmVnBuQjMzArORWBmVnAuAjOzgnMRmJkVnIvAzKzgXARmZgXnIjAzK7hMPlAmaRLwv4B+wOKIuLLN+fsDPwXGAFuBr0REs6TTgSuB/YB3gP8eESuymFOWyn1IpKd/QMTMrKMq3iKQ1A+4HjgTGAWcI2lUm2EXAK9HxBHAQuCqdP1rwOciog6YDtxW6XzMzKxzstgiOBHYFBEvAEhqBCYDG0rGTAYuT0/fA1wnSRHxRMmY9cBASftHxNsZzMtykNfWkbe6zPYdRURlAdIUYFJEzEiXpwFjI2J2yZh16ZiWdPn5dMxrbXJmRcSn2rmemcBMgJqamjGNjY0dml9rayvV1dVd+t12Wbv5jb3W1QyEV97ae2zdiCEV5baX3RNy28vO67bIIrc9Wfxd9IXcPLN7W24W2Z2570H330fGjx+/OiLq267vEQedk3QUye6iM9obExGLgEUA9fX10dDQ0KHspqYmOjq2PeWelV5St5Nr1u598zWf2/HrKpfbXnZPyG0vO6/bIovc9mTxd9EXcvPM7m25WWR35r4HPec+ksW7hjYDh5Usj0zXlR0jqQoYQvKiMZJGAr8A/lNEPJ/BfMzMrBOyKIJVwJGSDpe0H3A2sKTNmCUkLwYDTAFWRERIOhBYCsyNiJUZzMXMzDqp4iKIiJ3AbGAZsBG4KyLWS7pC0ufTYTcBQyVtAi4G5qbrZwNHAJdJWpP+fKTSOZmZWcdl8hpBRNwL3Ntm3WUlp3cAU8tcbj4wP4s5mJXTmS8K8buRrKj8yWIzs4JzEZiZFZyLwMys4FwEZmYF1yM+UGZmVo4PPdI9vEVgZlZwLgIzs4JzEZiZFZyLwMys4FwEZmYF5yIwMys4F4GZWcG5CMzMCs5FYGZWcC4CM7OCcxGYmRWci8DMrOBcBGZmBeciMDMrOBeBmVnBuQjMzArORWBmVnAuAjOzgsukCCRNkvSMpE2S5pY5f39Jd6bnPyqptuS8S9P1z0j6dBbzMTOzjqu4CCT1A64HzgRGAedIGtVm2AXA6xFxBLAQuCq97CjgbOAoYBLw4zTPzMy6SRZfXn8isCkiXgCQ1AhMBjaUjJkMXJ6evge4TpLS9Y0R8Tbwe0mb0ryHM5iXWa78xerWVygiKguQpgCTImJGujwNGBsRs0vGrEvHtKTLzwNjScrhkYj4Wbr+JuA3EXFPmeuZCcwEqKmpGdPY2LjH+Ws3v1F2fjUD4ZW39lxXN2JIF37TPbW2tlJdXV1xTndmOzf/7Cxyy/0tl/s7hp7xt9yZ+x5UPue8bmPofY8Xnc0dP3786oiob7s+iy2CbhERi4BFAPX19dHQ0LDH+eWehUHyDO2atXv+ms3nNpQd2xlNTU20nUNW8sp2bv7ZWeSW+1su93cMPeNvuTP3Pah8znndxtD7Hi+yys3ixeLNwGElyyPTdWXHSKoChgBbO3hZMzPLURZFsAo4UtLhkvYjefF3SZsxS4Dp6ekpwIpI9kktAc5O31V0OHAk8LsM5mRmZh1U8a6hiNgpaTawDOgH3BwR6yVdATwWEUuAm4Db0heDt5GUBem4u0heWN4JfCMi3qt0TmZm1nGZvEYQEfcC97ZZd1nJ6R3A1HYu+33g+1nMw8zMOs+fLDYzKzgXgZlZwbkIzMwKzkVgZlZwLgIzs4LrNZ8s/jDtHculqakpk08Gmpn1Vd4iMDMrOBeBmVnBuQjMzArORWBmVnAuAjOzgnMRmJkVXJ95+6hZX1HurdA9+W3Qfut27+ctAjOzgnMRmJkVnIvAzKzgXARmZgXnIjAzKzgXgZlZwbkIzMwKzkVgZlZwLgIzs4JzEZiZFVxFRSDpYEn3S3ou/fegdsZNT8c8J2l6uu4ASUslPS1pvaQrK5mLmZl1TaVbBHOBByLiSOCBdHkPkg4GvguMBU4EvltSGFdHxN8AxwHjJJ1Z4XzMzKyTKi2CycCt6elbgS+UGfNp4P6I2BYRrwP3A5Mi4s8R8SBARLwDPA6MrHA+ZmbWSYqIrl9Y+mNEHJieFvD6ruWSMXOAARExP13+DvBWRFxdMuZAkiL4VES80M51zQRmAtTU1IxpbGzs0BxbW1uprq7u3C+2D3PzzHZu/tm9LTfP7J6cu3bzG2XX1wyEV97ac13diCEVXRf0nNti/PjxqyOivu36Dz0MtaTlwF+WOWte6UJEhKROt4qkKuAO4EftlUCavwhYBFBfXx8NDQ0dym9qaqKjYzsjr9w8s52bf3Zvy80zuyfnnj93adn1l9Tt5Jq1ez4sZnEo7Z58W0AHiiAiPtXeeZJekXRIRLws6RBgS5lhm4GGkuWRQFPJ8iLguYj4h45M2MzMslXpawRLgOnp6enAr8qMWQacIemg9EXiM9J1SJoPDAG+VeE8zMysiyotgiuB0yU9B3wqXUZSvaTFABGxDfgesCr9uSIitkkaSbJ7aRTwuKQ1kmZUOB8zM+ukir6qMiK2AhPLrH8MmFGyfDNwc5sxLYAquX4zM6ucP1lsZlZwLgIzs4JzEZiZFZyLwMys4FwEZmYF5yIwMys4F4GZWcG5CMzMCs5FYGZWcC4CM7OCcxGYmRWci8DMrOAqOuicmVlv1HzlWWXXNzU1ZfJFNL2NtwjMzArORWBmVnAuAjOzgnMRmJkVnIvAzKzgXARmZgXnIjAzKzgXgZlZwbkIzMwKThGxr+fQaZJeBf5PB4cPA17LYRp55eaZ7dz8s3tbbp7ZvS03z+yekvtXETG87cpeWQSdIemxiKjvLbl5Zjs3/+zelptndm/LzTO7p+d615CZWcG5CMzMCq4IRbCol+Xmme3c/LN7W26e2b0tN8/sHp3b518jMDOzD1aELQIzM/sALgIzs4Lrs0Ug6WZJWyStyzj3MEkPStogab2kCzPKHSDpd5KeTHP/Povckvx+kp6Q9OuMc5slrZW0RtJjGeYeKOkeSU9L2ijp5AwyP57Oc9fPm5K+lcF0d+VflP7frZN0h6QBGeVemGaur3S+5e4Xkg6WdL+k59J/D8ood2o65/cldektju3k/jD9u3hK0i8kHZhh9vfS3DWS7pN0aBa5JeddIikkDctovpdL2lzyN/2ZzuYCEBF98gc4DTgeWJdx7iHA8enpwcCzwKgMcgVUp6f7A48CJ2U474uBfwJ+nfHt0QwMy+H/71ZgRnp6P+DAjPP7AX8g+YBNFnkjgN8DA9Plu4DzM8g9GlgHHEDy1bLLgSMqyNvrfgH8AJibnp4LXJVR7t8CHweagPoM53sGUJWevqor8/2A7P9QcvqbwA1Z5KbrDwOWkXwYttP3mXbmezkwp9K/sz67RRAR/w5syyH35Yh4PD29HdhI8iBQaW5ERGu62D/9yeSVfEkjgbOAxVnk5U3SEJI/+psAIuKdiPhjxlczEXg+Ijr6CfWOqAIGSqoieeD+vxlk/i3waET8OSJ2Av8GfLGrYe3cLyaTFC/pv1/IIjciNkbEM12Y5ofl3pfeFgCPACMzzH6zZHEQXbgPfsBjz0Lg213J/JDcivXZIugOkmqB40ievWeR10/SGmALcH9EZJIL/APJH+D7GeWVCuA+Saslzcwo83DgVeAn6e6sxZIGZZS9y9nAHVmFRcRm4GrgReBl4I2IuC+D6HXAJyQNlXQA8BmSZ5ZZqomIl9PTfwBqMs7P038GfpNloKTvS3oJOBe4LKPMycDmiHgyi7w2Zqe7s27uym49cBF0maRq4J+Bb7V5FtFlEfFeRBxL8gznRElHV5op6bPAlohYXWlWO06NiOOBM4FvSDotg8wqkk3gf4yI44A/keyyyISk/YDPA3dnmHkQyTPrw4FDgUGSzqs0NyI2kuz+uA/4V2AN8F6luR9wfUFGW6J5kzQP2AncnmVuRMyLiMPS3NmV5qUF/j/IqFTa+Efgr4FjSZ6AXNOVEBdBF0jqT1ICt0fEz7POT3eDPAhMyiBuHPB5Sc1AIzBB0s8yyAV2PxMmIrYAvwBOzCC2BWgp2SK6h6QYsnIm8HhEvJJh5qeA30fEqxHxLvBz4JQsgiPipogYExGnAa+TvC6VpVckHQKQ/rsl4/zMSTof+Cxwblpeebgd+FIGOX9N8gThyfR+OBJ4XNJfVhocEa+kTyDfB/43Xbz/uQg6SZJI9l1vjIgFGeYO3/XuB0kDgdOBpyvNjYhLI2JkRNSS7A5ZEREVP1MFkDRI0uBdp0lexKv4XVoR8QfgJUkfT1dNBDZUmlviHDLcLZR6EThJ0gHp38hEktePKibpI+m/HyV5feCfssgtsQSYnp6eDvwq4/xMSZpEsqvz8xHx54yzjyxZnEw298G1EfGRiKhN74ctJG84+UOl2bsKPPV3dPX+V+mrzT31h+SO/jLwLskNf0FGuaeSbDo/RbKZvgb4TAa5o4En0tx1wGU53CYNZPiuIeA/Ak+mP+uBeRlmHws8lt4evwQOyih3ELAVGJLD7fv3JA8c64DbgP0zyn2IpAifBCZWmLXX/QIYCjwAPEfyrqSDM8r9u/T028ArwLKMcjcBL5Xc/zr9zp4PyP7n9P/vKeBfgBFZ5LY5v5muvWuo3HxvA9am810CHNKV28KHmDAzKzjvGjIzKzgXgZlZwbkIzMwKzkVgZlZwLgIzs4JzEZilJH0zPdJppz6pKqlW0lfzmpdZ3lwEZv/ffwVOj4hzO3m5WqDTRSCpX2cvY5YHF4EZIOkGkg/I/UbSvPQAXr9LD3o3OR1TK+khSY+nP7sOIXElyYHh1qTfSXC+pOtKsn8tqSE93SrpGklPAidLOi+9njWSbnQ52L7gIjADImIWyWGjx5N8+nhFRJyYLv8wPYTGFpIthuOBrwA/Si8+F3goIo6NiIUfclWDSA4rfQzJJ5y/AoyL5GCD75Ec8dKsW1Xt6wmY9UBnkByob066PAD4KElRXCfpWJIH7Y91Ifs9ksMYQHI8ojHAquTwRAykFxzwzfoeF4HZ3gR8Kdp8qYqky0mOmXMMydb0jnYuv5M9t7ZLv7JyR0TsOoy0gFsj4tIsJm3WVd41ZLa3ZcB/S48iiqTj0vVDgJcjOeTvNJKvuwTYTvK1pbs0A8dK+gtJh9H+oYEfAKaUHF30YEl/lelvYtYBLgKzvX2P5KtCn5K0Pl0G+DEwPX2h929IvjAHkiM/vifpSUkXAStJvr94A8nrCI+Xu5KI2AD8T5JveHsKuJ/kO7HNupWPPmpmVnDeIjAzKzgXgZlZwbkIzMwKzkVgZlZwLgIzs4JzEZiZFZyLwMys4P4f4GzDUVEwRwgAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# no standardization\n", "df = pd.DataFrame({'feature':list(range(1,16)), 'coefficients':np.stack(coefs).mean(axis=0)})\n", "ax = df.plot(x='feature', y='coefficients', rot=0, kind='bar', grid=True)" ] }, { "cell_type": "code", "execution_count": 99, "id": "filled-intervention", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([-0.00722761, 0.021936 , 0.04056063, 0.03559395, 0.02052407,\n", " 0.02052407, 0.01702212, 0.00057635, -0.01129512, 0. ,\n", " -0.01127191, 0.0138696 , -0.03131791, 0.02770065, 0.09263194])" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.stack(coefs).mean(axis=0)" ] }, { "cell_type": "code", "execution_count": 97, "id": "international-reward", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.00777849, 0.00867834, 0.00309854, 0.00398819, 0.00066269,\n", " 0.00066269, 0.01262642, 0.01286337, 0.00246544, 0. ,\n", " 0.00341984, 0.00401186, 0.00823876, 0.00863262, 0.00237907])" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.stack(coefs).std(axis=0)" ] }, { "cell_type": "code", "execution_count": 132, "id": "precious-reader", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-1.3638519776835112" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.mean(intercepts)" ] }, { "cell_type": "code", "execution_count": null, "id": "isolated-implementation", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "venv", "language": "python", "name": "venv" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.7" } }, "nbformat": 4, "nbformat_minor": 5 }