{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "lHecaO_7JG6L" }, "source": [ "# Checking Baseline with AutoML" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "version: 0.1.38+40.geb12c43.dirty\n", "is notebook? True\n" ] } ], "source": [ "%config InlineBackend.figure_format='retina'\n", "from ekorpkit import eKonf\n", "\n", "eKonf.setLogger(\"WARNING\")\n", "print(\"version:\", eKonf.__version__)\n", "print(\"is notebook?\", eKonf.is_notebook())" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "data_dir = \"../data/fomc\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load a feature set" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:ekorpkit.base:Loaded .env from /workspace/projects/ekorpkit-book/config/.env\n", "INFO:ekorpkit.base:setting environment variable CACHED_PATH_CACHE_ROOT to /workspace/.cache/cached_path\n", "INFO:ekorpkit.base:setting environment variable KMP_DUPLICATE_LIB_OK to TRUE\n" ] } ], "source": [ "fs_cfg = eKonf.compose(config_group=\"dataset=feature\")\n", "fs_cfg.name = \"fomc_features_small\"\n", "fs_cfg.data_dir = data_dir\n", "fs_fomc = eKonf.instantiate(fs_cfg)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Auto ML" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "scrolled": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:ekorpkit.base:No method defined to call\n" ] } ], "source": [ "model_cfg = eKonf.compose(config_group='model/automl=classification')\n", "model_cfg.dataset = fs_cfg\n", "model_cfg.config.time_budget = 60\n", "model_cfg.verbose = False\n", "model = eKonf.instantiate(model_cfg)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Best ML leaner: lgbm\n", "Best hyperparmeter config: {'n_estimators': 10, 'num_leaves': 4, 'min_child_samples': 18, 'learning_rate': 0.2293009676418639, 'log_max_bin': 9, 'colsample_bytree': 0.9086551727646448, 'reg_alpha': 0.0015561782752413472, 'reg_lambda': 0.33127416269768944}\n", "Best accuracy on validation data: 0.672\n", "Training duration of best run: 0.03197 s\n" ] } ], "source": [ "model.fit()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
LGBMClassifier(colsample_bytree=0.9086551727646448,\n", " learning_rate=0.2293009676418639, max_bin=511,\n", " min_child_samples=18, n_estimators=10, num_leaves=4,\n", " reg_alpha=0.0015561782752413472, reg_lambda=0.33127416269768944,\n", " verbose=-1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LGBMClassifier(colsample_bytree=0.9086551727646448,\n", " learning_rate=0.2293009676418639, max_bin=511,\n", " min_child_samples=18, n_estimators=10, num_leaves=4,\n", " reg_alpha=0.0015561782752413472, reg_lambda=0.33127416269768944,\n", " verbose=-1)
\n", " | columns | \n", "importances | \n", "
---|---|---|
2 | \n", "PMI | \n", "28 | \n", "
1 | \n", "GDP_diff_prev | \n", "17 | \n", "
3 | \n", "EMP_diff_prev | \n", "16 | \n", "
5 | \n", "UNEMP_diff_prev | \n", "12 | \n", "
0 | \n", "prev_decision | \n", "7 | \n", "
4 | \n", "RSALES_diff_year | \n", "5 | \n", "
6 | \n", "HSALES_diff_year | \n", "5 | \n", "
7 | \n", "Inertia_diff | \n", "0 | \n", "
8 | \n", "Balanced_diff | \n", "0 | \n", "