From 06947b42262c2cdce814654bba247a0ee10a8fab Mon Sep 17 00:00:00 2001 From: Heitor Date: Tue, 12 Sep 2023 18:46:59 +1200 Subject: [PATCH] update the names of the notebooks and the MOA_API_Examples.ipynb --- MOA_API_Examples.ipynb | 416 +++ ...r_Comparison.ipynb => MOA_Comparison.ipynb | 0 MOA_River_Example.ipynb | 2544 ----------------- 3 files changed, 416 insertions(+), 2544 deletions(-) create mode 100644 MOA_API_Examples.ipynb rename MOA_River_Comparison.ipynb => MOA_Comparison.ipynb (100%) delete mode 100644 MOA_River_Example.ipynb diff --git a/MOA_API_Examples.ipynb b/MOA_API_Examples.ipynb new file mode 100644 index 00000000..36e9133b --- /dev/null +++ b/MOA_API_Examples.ipynb @@ -0,0 +1,416 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "be91302b-3d48-449a-9eeb-4444d86d8e7c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MOA jar path location (config.ini): /Users/gomeshe/Dropbox/ciencia_computacao/dev/Using-MOA-API/moa.jar\n", + "JVM Location (system): \n", + "/Users/gomeshe/Library/Java/JavaVirtualMachines/openjdk-20.0.1/Contents/Home\n", + "Sucessfully started the JVM and added MOA jar to the class path\n" + ] + } + ], + "source": [ + "from prepare_jpype import start_jpype\n", + "start_jpype()\n", + "\n", + "from evaluation import test_train_loop_MOA, test_train_loop_RIVER" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "60bb1d0d-66e3-45f1-b7cc-15ae08cbe81a", + "metadata": {}, + "outputs": [], + "source": [ + "## Datasets paths\n", + "csv_elec_tiny_path = '/Users/gomeshe/Desktop/data/electricity_tiny.csv'\n", + "arff_elec_tiny_path = '/Users/gomeshe/Desktop/data/electricity_tiny.arff'" + ] + }, + { + "cell_type": "markdown", + "id": "517c68a0-3951-4202-b3eb-0b648f0e33ba", + "metadata": {}, + "source": [ + "## Basic classification examples\n", + "\n", + "* Using the ```test_train_loop_MOA``` for MOA and ```test_train_loop_RIVER``` helper functions (see module test_train_loop)\n", + "\n", + "* These are just examples on how to run several algorithms, it is kind of repetitive, but might be useful to have an example. \n" + ] + }, + { + "cell_type": "markdown", + "id": "c7835c1a-35d0-42bb-9a4d-d990773cc0f6", + "metadata": {}, + "source": [ + "## Examples using River" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3d366b78-feeb-497a-8a05-578437600343", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "# Read the file to a dataframe, used by all examples\n", + "river_elec_tiny = pd.read_csv(csv_elec_tiny_path).to_numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d56e186b-a288-4953-9d95-74f25cfeff5e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ARFClassifier, 0.8670, 0.3444, 0.3445\n" + ] + } + ], + "source": [ + "from river.forest import ARFClassifier\n", + "\n", + "river_arf5 = ARFClassifier(\n", + " n_models=5,\n", + " max_features=0.60\n", + ")\n", + "\n", + "acc, wallclock, cpu_time, df = test_train_loop_RIVER(river_arf5, river_elec_tiny)\n", + "print(f\"{river_arf5}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0d9c9d33-52b2-4442-a577-396de0bf31e7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HoeffdingTreeClassifier, 0.8230, 0.0372, 0.0379\n" + ] + } + ], + "source": [ + "from river.tree import HoeffdingTreeClassifier\n", + "\n", + "river_ht = HoeffdingTreeClassifier()\n", + "\n", + "acc, wallclock, cpu_time, df = test_train_loop_RIVER(river_ht, river_elec_tiny)\n", + "print(f\"{river_ht}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "84f29ddd-2e97-4c31-945a-1129e8ca1833", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "KNNClassifier, 0.7560, 1.9004, 1.8937\n" + ] + } + ], + "source": [ + "from river.neighbors import KNNClassifier\n", + "\n", + "river_knn = KNNClassifier()\n", + "\n", + "acc, wallclock, cpu_time, df = test_train_loop_RIVER(river_knn, river_elec_tiny)\n", + "print(f\"{river_knn}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "47e22fbe-c42a-4cf0-9f40-0c408fc8185b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GaussianNB, 0.8490, 0.0285, 0.0285\n" + ] + } + ], + "source": [ + "from river.naive_bayes import GaussianNB\n", + "\n", + "river_gnb = GaussianNB()\n", + "\n", + "acc, wallclock, cpu_time, df = test_train_loop_RIVER(river_gnb, river_elec_tiny)\n", + "print(f\"{river_gnb}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7f59847d-b33b-431e-8ddb-ee0a3a5375b7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SRPClassifier(HoeffdingTreeClassifier), 0.8940, 0.7320, 0.7318\n" + ] + } + ], + "source": [ + "from river.ensemble.streaming_random_patches import SRPClassifier\n", + "\n", + "river_srp = SRPClassifier(\n", + " n_models=5,\n", + " subspace_size=0.6\n", + ")\n", + "\n", + "acc, wallclock, cpu_time, df = test_train_loop_RIVER(river_srp, river_elec_tiny)\n", + "print(f\"{river_srp}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e25a96f4-7612-445b-b210-838d474fba2d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ExtremelyFastDecisionTreeClassifier, 0.8230, 0.2255, 0.2264\n" + ] + } + ], + "source": [ + "from river.tree import ExtremelyFastDecisionTreeClassifier\n", + "\n", + "river_EFDT = ExtremelyFastDecisionTreeClassifier()\n", + "\n", + "acc, wallclock, cpu_time, df = test_train_loop_RIVER(river_EFDT, river_elec_tiny)\n", + "print(f\"{river_EFDT}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "15079978-5afa-4876-a0cc-ab0fcf3ab6ce", + "metadata": {}, + "source": [ + "## Examples using MOA" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "389f00c6-22e0-4ed6-a73a-7c18d01c3fb8", + "metadata": {}, + "outputs": [], + "source": [ + "from moa.streams import ArffFileStream\n", + "\n", + "elec_tiny_arff = ArffFileStream(arff_elec_tiny_path, -1)\n", + "elec_tiny_arff.prepareForUse()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "9cf0778d-942f-44e2-bd79-e754e6c7735e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "moa.classifiers.meta.AdaptiveRandomForest, 89.4500, 0.3876, 1.1303\n" + ] + } + ], + "source": [ + "from moa.classifiers.meta import AdaptiveRandomForest\n", + "\n", + "arf10 = AdaptiveRandomForest()\n", + "arf10.getOptions().setViaCLIString(\"-s 5\")\n", + "arf10.prepareForUse()\n", + "\n", + "elec_tiny_arff.restart()\n", + "\n", + "acc, wallclock, cpu_time, df = test_train_loop_MOA(elec_tiny_arff, arf10, maxInstances=2000, sampleFrequency=2000)\n", + "print(f\"{arf10.getClass().getName()}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1270cf7b-3bf1-4092-9498-f5e6f91aea64", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "moa.classifiers.trees.HoeffdingTree, 86.0500, 0.0282, 0.0883\n" + ] + } + ], + "source": [ + "from moa.classifiers.trees import HoeffdingTree\n", + "\n", + "ht_moa = HoeffdingTree()\n", + "ht_moa.prepareForUse()\n", + "\n", + "elec_tiny_arff.restart()\n", + "\n", + "acc, wallclock, cpu_time, df = test_train_loop_MOA(elec_tiny_arff, ht_moa, maxInstances=2000, sampleFrequency=2000)\n", + "print(f\"{ht_moa.getClass().getName()}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "c8ac0b21-a3bf-488b-b5cb-ce6289298ee1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "moa.classifiers.lazy.kNN, 84.2167, 0.1976, 0.5912\n" + ] + } + ], + "source": [ + "from moa.classifiers.lazy import kNN\n", + "\n", + "knn_moa = kNN()\n", + "knn_moa.prepareForUse()\n", + "\n", + "elec_tiny_arff.restart()\n", + "\n", + "acc, wallclock, cpu_time, df = test_train_loop_MOA(elec_tiny_arff, knn_moa, maxInstances=2000, sampleFrequency=2000)\n", + "print(f\"{knn_moa.getClass().getName()}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "15ccd3c4-f0d6-479d-b773-df230cfdf2aa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "moa.classifiers.trees.EFDT, 83.8375, 0.0246, 0.0775\n" + ] + } + ], + "source": [ + "from moa.classifiers.trees import EFDT\n", + "\n", + "EFDT_moa = EFDT()\n", + "EFDT_moa.prepareForUse()\n", + "\n", + "elec_tiny_arff.restart()\n", + "\n", + "acc, wallclock, cpu_time, df = test_train_loop_MOA(elec_tiny_arff, EFDT_moa, maxInstances=2000, sampleFrequency=2000)\n", + "print(f\"{EFDT_moa.getClass().getName()}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "e264098a-588a-456b-ae11-dced5b5ceb7f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "moa.classifiers.bayes.NaiveBayes, 83.8700, 0.0181, 0.0508\n" + ] + } + ], + "source": [ + "from moa.classifiers.bayes import NaiveBayes\n", + "\n", + "NaiveBayes_moa = NaiveBayes()\n", + "NaiveBayes_moa.prepareForUse()\n", + "\n", + "elec_tiny_arff.restart()\n", + "\n", + "acc, wallclock, cpu_time, df = test_train_loop_MOA(elec_tiny_arff, NaiveBayes_moa, maxInstances=2000, sampleFrequency=2000)\n", + "print(f\"{NaiveBayes_moa.getClass().getName()}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6fa952e8-08e3-4e6f-88ad-dac1aaea94ef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "moa.classifiers.meta.StreamingRandomPatches, 84.9417, 0.4210, 1.1560\n" + ] + } + ], + "source": [ + "from moa.classifiers.meta import StreamingRandomPatches\n", + "\n", + "SRP_moa = StreamingRandomPatches()\n", + "SRP_moa.getOptions().setViaCLIString(\"-s 10\")\n", + "SRP_moa.prepareForUse()\n", + "\n", + "elec_tiny_arff.restart()\n", + "\n", + "acc, wallclock, cpu_time, df = test_train_loop_MOA(elec_tiny_arff, SRP_moa, maxInstances=2000, sampleFrequency=2000)\n", + "print(f\"{SRP_moa.getClass().getName()}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/MOA_River_Comparison.ipynb b/MOA_Comparison.ipynb similarity index 100% rename from MOA_River_Comparison.ipynb rename to MOA_Comparison.ipynb diff --git a/MOA_River_Example.ipynb b/MOA_River_Example.ipynb deleted file mode 100644 index c3ed147a..00000000 --- a/MOA_River_Example.ipynb +++ /dev/null @@ -1,2544 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "id": "be91302b-3d48-449a-9eeb-4444d86d8e7c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "MOA jar path location (config.ini): /Users/gomeshe/Dropbox/ciencia_computacao/dev/Using-MOA-API/moa.jar\n", - "JVM Location (system): \n", - "/Users/gomeshe/Library/Java/JavaVirtualMachines/openjdk-20.0.1/Contents/Home\n", - "JVM already started\n" - ] - } - ], - "source": [ - "from prepare_jpype import start_jpype\n", - "start_jpype()\n", - "\n", - "from evaluation import test_train_loop_MOA, test_train_loop_RIVER" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "60bb1d0d-66e3-45f1-b7cc-15ae08cbe81a", - "metadata": {}, - "outputs": [], - "source": [ - "## Datasets paths\n", - "csv_elec_tiny_path = '/Users/gomeshe/Desktop/data/electricity_tiny.csv'\n", - "arff_elec_tiny_path = '/Users/gomeshe/Desktop/data/electricity_tiny.arff'" - ] - }, - { - "cell_type": "markdown", - "id": "517c68a0-3951-4202-b3eb-0b648f0e33ba", - "metadata": {}, - "source": [ - "## Basic classification examples\n", - "\n", - "* Using the ```test_train_loop_MOA``` for MOA and ```test_train_loop_RIVER``` helper functions (see module test_train_loop)\n", - "\n", - "* These are just examples on how to run several algorithms, it is kind of repetitive, but might be useful to have an example. \n" - ] - }, - { - "cell_type": "markdown", - "id": "c7835c1a-35d0-42bb-9a4d-d990773cc0f6", - "metadata": {}, - "source": [ - "## Examples using River" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "3d366b78-feeb-497a-8a05-578437600343", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "# Read the file to a dataframe, used by all examples\n", - "river_elec_tiny = pd.read_csv(csv_elec_tiny_path).to_numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "d56e186b-a288-4953-9d95-74f25cfeff5e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ARFClassifier, 0.8720, 0.3055, 0.3054\n" - ] - } - ], - "source": [ - "from river.forest import ARFClassifier\n", - "\n", - "river_arf5 = ARFClassifier(\n", - " n_models=5,\n", - " max_features=0.60\n", - ")\n", - "\n", - "acc, wallclock, cpu_time, df = test_train_loop_RIVER(river_arf5, river_elec_tiny)\n", - "print(f\"{river_arf5}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "0d9c9d33-52b2-4442-a577-396de0bf31e7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HoeffdingTreeClassifier, 0.8230, 0.0388, 0.0391\n" - ] - } - ], - "source": [ - "from river.tree import HoeffdingTreeClassifier\n", - "\n", - "river_ht = HoeffdingTreeClassifier()\n", - "\n", - "acc, wallclock, cpu_time, df = test_train_loop_RIVER(river_ht, river_elec_tiny)\n", - "print(f\"{river_ht}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "84f29ddd-2e97-4c31-945a-1129e8ca1833", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "KNNClassifier, 0.7550, 2.1145, 2.1065\n" - ] - } - ], - "source": [ - "from river.neighbors import KNNClassifier\n", - "\n", - "river_knn = KNNClassifier()\n", - "\n", - "acc, wallclock, cpu_time, df = test_train_loop_RIVER(river_knn, river_elec_tiny)\n", - "print(f\"{river_knn}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "47e22fbe-c42a-4cf0-9f40-0c408fc8185b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GaussianNB, 0.8490, 0.0294, 0.0294\n" - ] - } - ], - "source": [ - "from river.naive_bayes import GaussianNB\n", - "\n", - "river_gnb = GaussianNB()\n", - "\n", - "acc, wallclock, cpu_time, df = test_train_loop_RIVER(river_gnb, river_elec_tiny)\n", - "print(f\"{river_gnb}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "7f59847d-b33b-431e-8ddb-ee0a3a5375b7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SRPClassifier(HoeffdingTreeClassifier), 0.8850, 0.7937, 0.7863\n" - ] - } - ], - "source": [ - "from river.ensemble.streaming_random_patches import SRPClassifier\n", - "\n", - "river_srp = SRPClassifier(\n", - " n_models=5,\n", - " subspace_size=0.6\n", - ")\n", - "\n", - "acc, wallclock, cpu_time, df = test_train_loop_RIVER(river_srp, river_elec_tiny)\n", - "print(f\"{river_srp}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "e25a96f4-7612-445b-b210-838d474fba2d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ExtremelyFastDecisionTreeClassifier, 0.8230, 0.2274, 0.2280\n" - ] - } - ], - "source": [ - "from river.tree import ExtremelyFastDecisionTreeClassifier\n", - "\n", - "river_EFDT = ExtremelyFastDecisionTreeClassifier()\n", - "\n", - "acc, wallclock, cpu_time, df = test_train_loop_RIVER(river_EFDT, river_elec_tiny)\n", - "print(f\"{river_EFDT}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" - ] - }, - { - "cell_type": "markdown", - "id": "15079978-5afa-4876-a0cc-ab0fcf3ab6ce", - "metadata": {}, - "source": [ - "## Examples using MOA" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "389f00c6-22e0-4ed6-a73a-7c18d01c3fb8", - "metadata": {}, - "outputs": [], - "source": [ - "from moa.streams import ArffFileStream\n", - "\n", - "elec_tiny_arff = ArffFileStream(arff_elec_tiny_path, -1)\n", - "elec_tiny_arff.prepareForUse()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "9cf0778d-942f-44e2-bd79-e754e6c7735e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "moa.classifiers.meta.AdaptiveRandomForest, 89.4500, 0.4188, 0.8300\n" - ] - } - ], - "source": [ - "from moa.classifiers.meta import AdaptiveRandomForest\n", - "\n", - "arf10 = AdaptiveRandomForest()\n", - "arf10.getOptions().setViaCLIString(\"-s 5\")\n", - "arf10.prepareForUse()\n", - "\n", - "elec_tiny_arff.restart()\n", - "\n", - "acc, wallclock, cpu_time, df = test_train_loop_MOA(elec_tiny_arff, arf10, maxInstances=2000, sampleFrequency=2000)\n", - "print(f\"{arf10.getClass().getName()}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "1270cf7b-3bf1-4092-9498-f5e6f91aea64", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "moa.classifiers.trees.HoeffdingTree, 86.0500, 0.0264, 0.0536\n" - ] - } - ], - "source": [ - "from moa.classifiers.trees import HoeffdingTree\n", - "\n", - "ht_moa = HoeffdingTree()\n", - "ht_moa.prepareForUse()\n", - "\n", - "elec_tiny_arff.restart()\n", - "\n", - "acc, wallclock, cpu_time, df = test_train_loop_MOA(elec_tiny_arff, ht_moa, maxInstances=2000, sampleFrequency=2000)\n", - "print(f\"{ht_moa.getClass().getName()}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "c8ac0b21-a3bf-488b-b5cb-ce6289298ee1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "moa.classifiers.lazy.kNN, 84.2167, 0.2200, 0.4583\n" - ] - } - ], - "source": [ - "from moa.classifiers.lazy import kNN\n", - "\n", - "knn_moa = kNN()\n", - "knn_moa.prepareForUse()\n", - "\n", - "elec_tiny_arff.restart()\n", - "\n", - "acc, wallclock, cpu_time, df = test_train_loop_MOA(elec_tiny_arff, knn_moa, maxInstances=2000, sampleFrequency=2000)\n", - "print(f\"{knn_moa.getClass().getName()}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "15ccd3c4-f0d6-479d-b773-df230cfdf2aa", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "moa.classifiers.trees.EFDT, 83.8375, 0.0261, 0.0578\n" - ] - } - ], - "source": [ - "from moa.classifiers.trees import EFDT\n", - "\n", - "EFDT_moa = EFDT()\n", - "EFDT_moa.prepareForUse()\n", - "\n", - "elec_tiny_arff.restart()\n", - "\n", - "acc, wallclock, cpu_time, df = test_train_loop_MOA(elec_tiny_arff, EFDT_moa, maxInstances=2000, sampleFrequency=2000)\n", - "print(f\"{EFDT_moa.getClass().getName()}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "e264098a-588a-456b-ae11-dced5b5ceb7f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "moa.classifiers.bayes.NaiveBayes, 83.8700, 0.0193, 0.0365\n" - ] - } - ], - "source": [ - "from moa.classifiers.bayes import NaiveBayes\n", - "\n", - "NaiveBayes_moa = NaiveBayes()\n", - "NaiveBayes_moa.prepareForUse()\n", - "\n", - "elec_tiny_arff.restart()\n", - "\n", - "acc, wallclock, cpu_time, df = test_train_loop_MOA(elec_tiny_arff, NaiveBayes_moa, maxInstances=2000, sampleFrequency=2000)\n", - "print(f\"{NaiveBayes_moa.getClass().getName()}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "6fa952e8-08e3-4e6f-88ad-dac1aaea94ef", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "moa.classifiers.meta.StreamingRandomPatches, 84.9417, 0.5412, 1.0869\n" - ] - } - ], - "source": [ - "from moa.classifiers.meta import StreamingRandomPatches\n", - "\n", - "SRP_moa = StreamingRandomPatches()\n", - "SRP_moa.getOptions().setViaCLIString(\"-s 10\")\n", - "SRP_moa.prepareForUse()\n", - "\n", - "elec_tiny_arff.restart()\n", - "\n", - "acc, wallclock, cpu_time, df = test_train_loop_MOA(elec_tiny_arff, SRP_moa, maxInstances=2000, sampleFrequency=2000)\n", - "print(f\"{SRP_moa.getClass().getName()}, {acc:.4f}, {wallclock:.4f}, {cpu_time:.4f}\")" - ] - }, - { - "cell_type": "markdown", - "id": "c0886e57-bcd3-46be-9d56-d04888831add", - "metadata": {}, - "source": [ - "# Experiments MOA vs River using ARF\n", - "\n", - "* These tests were using the old test_train_loop() and run_experiment() methods. \n", - "\n", - "* They are safe to delete or update (but the river ones take too long to execute).\n", - "\n", - "* **SAVED THE RESULTS BECAUSE RIVER TAKES TOO LONG TO EXECUTE! Especially for ARF100!**" - ] - }, - { - "cell_type": "markdown", - "id": "9f744b99-2661-462a-bde4-a53b4de3a694", - "metadata": {}, - "source": [ - "## MOA ARF10" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "94bafd08-94cd-4577-b9b6-e8f39d24ae97", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 50.8 s, sys: 367 ms, total: 51.1 s\n", - "Wall time: 50.2 s\n" - ] - }, - { - "data": { - "text/plain": [ - "(83.88189655172414,\n", - " 50.219346046447754,\n", - " 51.13412499999999,\n", - " classified instances classifications correct (percent) \\\n", - " 0 17000.0 86.317647 \n", - " 1 18000.0 86.577778 \n", - " 2 19000.0 86.794737 \n", - " 3 20000.0 87.030000 \n", - " 4 21000.0 87.266667 \n", - " .. ... ... \n", - " 95 112000.0 84.234821 \n", - " 96 113000.0 84.133628 \n", - " 97 114000.0 84.052632 \n", - " 98 115000.0 83.971304 \n", - " 99 116000.0 83.881897 \n", - " \n", - " Kappa Statistic (percent) Kappa Temporal Statistic (percent) \\\n", - " 0 71.151605 7.293743 \n", - " 1 71.304855 13.929462 \n", - " 2 71.387396 18.855110 \n", - " 3 71.509604 22.935235 \n", - " 4 71.645880 26.538462 \n", - " .. ... ... \n", - " 95 76.055835 66.396422 \n", - " 96 75.870843 66.291269 \n", - " 97 75.714576 66.224501 \n", - " 98 75.552421 66.136973 \n", - " 99 75.376769 66.039415 \n", - " \n", - " Kappa M Statistic (percent) \n", - " 0 64.187837 \n", - " 1 63.772680 \n", - " 2 63.388297 \n", - " 3 63.053696 \n", - " 4 62.799110 \n", - " .. ... \n", - " 95 68.073411 \n", - " 96 67.855350 \n", - " 97 67.669079 \n", - " 98 67.474238 \n", - " 99 67.257412 \n", - " \n", - " [100 rows x 5 columns])" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%time\n", - "\n", - "from moa.classifiers.meta import AdaptiveRandomForest\n", - "from moa.core import Example\n", - "from moa.evaluation import BasicClassificationPerformanceEvaluator\n", - "from moa.streams import ArffFileStream\n", - "\n", - "arf10 = AdaptiveRandomForest()\n", - "arf10.getOptions().setViaCLIString(\"-s 10\")\n", - "arf10.setRandomSeed(1)\n", - "arf10.prepareForUse()\n", - "\n", - "rtg_2abrupt = ArffFileStream(\"/Users/gomeshe/Desktop/data/RTG_2abrupt.arff\", -1)\n", - "rtg_2abrupt.prepareForUse()\n", - "\n", - "# Updated this one. \n", - "test_train_loop_MOA(rtg_2abrupt, arf10, maxInstances=100000, sampleFrequency=1000)" - ] - }, - { - "cell_type": "markdown", - "id": "46f4d235-220d-44b9-abf9-07269030c175", - "metadata": {}, - "source": [ - "CPU times: user 48.2 s, sys: 193 ms, total: 48.4 s\n", - "Wall time: 47.4 s\n", - "\n", - "83.527000" - ] - }, - { - "cell_type": "markdown", - "id": "63df8448-9053-448e-83e2-43456e5b0451", - "metadata": {}, - "source": [ - "## RIVER ARF10" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "e3dda62f-23b1-40f4-8dc8-913215fded24", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Learner: ARFClassifier Result: (0.79017, 379)\n", - "CPU times: user 6min 17s, sys: 8.05 s, total: 6min 25s\n", - "Wall time: 6min 21s\n" - ] - } - ], - "source": [ - "%%time\n", - "from river.forest import ARFClassifier\n", - "\n", - "rtg_2abrupt_path = '/Users/gomeshe/Desktop/data/RTG_2abrupt.csv'\n", - "\n", - "river_arf10 = ARFClassifier(\n", - " n_models=10,\n", - " max_features=0.60\n", - ")\n", - "\n", - "river_rtg_2abrupt = pd.read_csv(rtg_2abrupt_path).to_numpy()\n", - "\n", - "result = run_experiment(river_arf10, river_rtg_2abrupt)\n", - "print('Learner: ', river_arf10, ' Result: ', result)" - ] - }, - { - "cell_type": "markdown", - "id": "83a8b293-8ad5-4b8b-9f7f-7b930eac1f4a", - "metadata": {}, - "source": [ - "CPU times: user 6min 5s, sys: 3.75 s, total: 6min 8s\n", - "Wall time: 6min 9s\n", - "\n", - "Learner: ARFClassifier Result: (0.80196, 368)" - ] - }, - { - "cell_type": "markdown", - "id": "172377ea-923e-455f-b3f1-7808ab327fc6", - "metadata": {}, - "source": [ - "## MOA ARF30" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "c0aba450-349a-4bb1-ac46-daf3dda0238b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 2min 40s, sys: 508 ms, total: 2min 40s\n", - "Wall time: 2min 36s\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
classified instancesclassifications correct (percent)Kappa Statistic (percent)Kappa Temporal Statistic (percent)Kappa M Statistic (percent)Recall for class 0 (percent)Recall for class 1 (percent)Recall for class 2 (percent)Recall for class 3 (percent)Recall for class 4 (percent)
01000.088.90000047.85696858.42696631.90184098.68578338.650307NaNNaNNaN
12000.089.55000054.06734763.00885037.98219698.49669345.400593NaNNaNNaN
23000.089.50000055.65087662.94117639.53934798.30576847.600768NaNNaNNaN
34000.090.05000058.07143364.59074742.23512398.39927549.927431NaNNaNNaN
45000.090.16000058.28879664.80686742.52336498.45559850.000000NaNNaNNaN
.................................
9596000.084.03437575.05210269.53064468.70316396.97833270.71553658.35486617.52516390.082503
9697000.083.92164974.85276469.38719468.45724596.98881370.62087758.02574217.51592489.952536
9798000.083.83571474.69307569.29086568.25387397.00629970.59006157.83942817.36288789.764364
9899000.083.75454574.53510569.18493368.05125197.02425070.55127057.61830517.16355589.620224
99100000.083.66600074.36258169.07200967.82809197.03426870.48658557.33992217.14441789.467704
\n", - "

100 rows × 10 columns

\n", - "
" - ], - "text/plain": [ - " classified instances classifications correct (percent) \\\n", - "0 1000.0 88.900000 \n", - "1 2000.0 89.550000 \n", - "2 3000.0 89.500000 \n", - "3 4000.0 90.050000 \n", - "4 5000.0 90.160000 \n", - ".. ... ... \n", - "95 96000.0 84.034375 \n", - "96 97000.0 83.921649 \n", - "97 98000.0 83.835714 \n", - "98 99000.0 83.754545 \n", - "99 100000.0 83.666000 \n", - "\n", - " Kappa Statistic (percent) Kappa Temporal Statistic (percent) \\\n", - "0 47.856968 58.426966 \n", - "1 54.067347 63.008850 \n", - "2 55.650876 62.941176 \n", - "3 58.071433 64.590747 \n", - "4 58.288796 64.806867 \n", - ".. ... ... \n", - "95 75.052102 69.530644 \n", - "96 74.852764 69.387194 \n", - "97 74.693075 69.290865 \n", - "98 74.535105 69.184933 \n", - "99 74.362581 69.072009 \n", - "\n", - " Kappa M Statistic (percent) Recall for class 0 (percent) \\\n", - "0 31.901840 98.685783 \n", - "1 37.982196 98.496693 \n", - "2 39.539347 98.305768 \n", - "3 42.235123 98.399275 \n", - "4 42.523364 98.455598 \n", - ".. ... ... \n", - "95 68.703163 96.978332 \n", - "96 68.457245 96.988813 \n", - "97 68.253873 97.006299 \n", - "98 68.051251 97.024250 \n", - "99 67.828091 97.034268 \n", - "\n", - " Recall for class 1 (percent) Recall for class 2 (percent) \\\n", - "0 38.650307 NaN \n", - "1 45.400593 NaN \n", - "2 47.600768 NaN \n", - "3 49.927431 NaN \n", - "4 50.000000 NaN \n", - ".. ... ... \n", - "95 70.715536 58.354866 \n", - "96 70.620877 58.025742 \n", - "97 70.590061 57.839428 \n", - "98 70.551270 57.618305 \n", - "99 70.486585 57.339922 \n", - "\n", - " Recall for class 3 (percent) Recall for class 4 (percent) \n", - "0 NaN NaN \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - ".. ... ... \n", - "95 17.525163 90.082503 \n", - "96 17.515924 89.952536 \n", - "97 17.362887 89.764364 \n", - "98 17.163555 89.620224 \n", - "99 17.144417 89.467704 \n", - "\n", - "[100 rows x 10 columns]" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%time\n", - "\n", - "from moa.classifiers.meta import AdaptiveRandomForest\n", - "from moa.core import Example\n", - "from moa.evaluation import BasicClassificationPerformanceEvaluator\n", - "from moa.streams import ArffFileStream\n", - "\n", - "arf30 = AdaptiveRandomForest()\n", - "arf30.getOptions().setViaCLIString(\"-s 30\")\n", - "arf30.setRandomSeed(1)\n", - "arf30.prepareForUse()\n", - "\n", - "rtg_2abrupt = ArffFileStream(\"/Users/gomeshe/Desktop/data/RTG_2abrupt.arff\", -1)\n", - "rtg_2abrupt.prepareForUse()\n", - "\n", - "evaluator = BasicClassificationPerformanceEvaluator()\n", - "evaluator.recallPerClassOption.set()\n", - "evaluator.prepareForUse()\n", - "\n", - "test_train_loop(rtg_2abrupt, arf30, evaluator, maxInstances=100000, sampleFrequency=1000)" - ] - }, - { - "cell_type": "markdown", - "id": "1640e8c9-0353-4b04-b448-856073ef6d0f", - "metadata": {}, - "source": [ - "CPU times: user 2min 40s, sys: 508 ms, total: 2min 40s\n", - "Wall time: 2min 36s\n", - "\n", - "83.666000" - ] - }, - { - "cell_type": "markdown", - "id": "906b04c6-c5a4-4035-ba5d-31fba17e1118", - "metadata": {}, - "source": [ - "## RIVER ARF30" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "0665dae0-d21d-4a1d-9a46-0cbfdc356ed9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Learner: ARFClassifier Result: (0.80162, 1145)\n", - "CPU times: user 18min 56s, sys: 11.7 s, total: 19min 7s\n", - "Wall time: 19min 6s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "river_arf30 = ARFClassifier(\n", - " n_models=30,\n", - " max_features=0.60\n", - ")\n", - "\n", - "river_rtg_2abrupt = pd.read_csv(rtg_2abrupt_path).to_numpy()\n", - "\n", - "result = run_experiment(river_arf30, river_rtg_2abrupt)\n", - "print('Learner: ', river_arf30, ' Result: ', result)" - ] - }, - { - "cell_type": "markdown", - "id": "2d9588fa-d162-48e6-9ffd-e6833bbb9562", - "metadata": {}, - "source": [ - "CPU times: user 18min 56s, sys: 11.7 s, total: 19min 7s\n", - "Wall time: 19min 6s\n", - "\n", - "Learner: ARFClassifier Result: (0.80162, 1145)" - ] - }, - { - "cell_type": "markdown", - "id": "9436f4b5-209d-4661-8fa1-9875932e6b2b", - "metadata": {}, - "source": [ - "## ARF100" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "fce1ac7e-d62f-4196-87bc-22dfb5a44c74", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 11min 15s, sys: 2.25 s, total: 11min 17s\n", - "Wall time: 10min 28s\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
classified instancesclassifications correct (percent)Kappa Statistic (percent)Kappa Temporal Statistic (percent)Kappa M Statistic (percent)Recall for class 0 (percent)Recall for class 1 (percent)Recall for class 2 (percent)Recall for class 3 (percent)Recall for class 4 (percent)
01000.088.90000048.18507758.42696631.90184098.56630839.263804NaNNaNNaN
12000.089.60000054.08641963.18584138.27893298.61695745.103858NaNNaNNaN
23000.089.46666755.13908362.82352939.34740998.46712446.641075NaNNaNNaN
34000.089.87500056.82717063.96797241.21915898.55028748.185776NaNNaNNaN
45000.090.12000057.74229664.66380542.28972098.62451748.948598NaNNaNNaN
.................................
9596000.083.96666774.93669069.40142768.57043796.93792970.65188658.93626216.07460089.859058
9697000.083.86288774.75084469.27531168.34196396.96147770.54235258.64269816.18413489.718076
9798000.083.78163374.59857869.18812068.14765896.98343170.49697558.44892816.19778389.530439
9899000.083.69697074.43445969.07572067.93802197.00369970.42865258.16952716.16049089.392473
99100000.083.61600074.27422568.97733567.72960997.01598770.33500157.97817616.27081689.234790
\n", - "

100 rows × 10 columns

\n", - "
" - ], - "text/plain": [ - " classified instances classifications correct (percent) \\\n", - "0 1000.0 88.900000 \n", - "1 2000.0 89.600000 \n", - "2 3000.0 89.466667 \n", - "3 4000.0 89.875000 \n", - "4 5000.0 90.120000 \n", - ".. ... ... \n", - "95 96000.0 83.966667 \n", - "96 97000.0 83.862887 \n", - "97 98000.0 83.781633 \n", - "98 99000.0 83.696970 \n", - "99 100000.0 83.616000 \n", - "\n", - " Kappa Statistic (percent) Kappa Temporal Statistic (percent) \\\n", - "0 48.185077 58.426966 \n", - "1 54.086419 63.185841 \n", - "2 55.139083 62.823529 \n", - "3 56.827170 63.967972 \n", - "4 57.742296 64.663805 \n", - ".. ... ... \n", - "95 74.936690 69.401427 \n", - "96 74.750844 69.275311 \n", - "97 74.598578 69.188120 \n", - "98 74.434459 69.075720 \n", - "99 74.274225 68.977335 \n", - "\n", - " Kappa M Statistic (percent) Recall for class 0 (percent) \\\n", - "0 31.901840 98.566308 \n", - "1 38.278932 98.616957 \n", - "2 39.347409 98.467124 \n", - "3 41.219158 98.550287 \n", - "4 42.289720 98.624517 \n", - ".. ... ... \n", - "95 68.570437 96.937929 \n", - "96 68.341963 96.961477 \n", - "97 68.147658 96.983431 \n", - "98 67.938021 97.003699 \n", - "99 67.729609 97.015987 \n", - "\n", - " Recall for class 1 (percent) Recall for class 2 (percent) \\\n", - "0 39.263804 NaN \n", - "1 45.103858 NaN \n", - "2 46.641075 NaN \n", - "3 48.185776 NaN \n", - "4 48.948598 NaN \n", - ".. ... ... \n", - "95 70.651886 58.936262 \n", - "96 70.542352 58.642698 \n", - "97 70.496975 58.448928 \n", - "98 70.428652 58.169527 \n", - "99 70.335001 57.978176 \n", - "\n", - " Recall for class 3 (percent) Recall for class 4 (percent) \n", - "0 NaN NaN \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - ".. ... ... \n", - "95 16.074600 89.859058 \n", - "96 16.184134 89.718076 \n", - "97 16.197783 89.530439 \n", - "98 16.160490 89.392473 \n", - "99 16.270816 89.234790 \n", - "\n", - "[100 rows x 10 columns]" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%time\n", - "\n", - "from moa.classifiers.meta import AdaptiveRandomForest\n", - "from moa.core import Example\n", - "from moa.evaluation import BasicClassificationPerformanceEvaluator\n", - "from moa.streams import ArffFileStream\n", - "\n", - "arf100 = AdaptiveRandomForest()\n", - "arf100.getOptions().setViaCLIString(\"-s 100\")\n", - "arf100.setRandomSeed(1)\n", - "arf100.prepareForUse()\n", - "\n", - "rtg_2abrupt = ArffFileStream(\"/Users/gomeshe/Desktop/data/RTG_2abrupt.arff\", -1)\n", - "rtg_2abrupt.prepareForUse()\n", - "\n", - "evaluator = BasicClassificationPerformanceEvaluator()\n", - "evaluator.recallPerClassOption.set()\n", - "evaluator.prepareForUse()\n", - "\n", - "test_train_loop(rtg_2abrupt, arf100, evaluator, maxInstances=100000, sampleFrequency=1000)" - ] - }, - { - "cell_type": "markdown", - "id": "02a8856b-6664-407e-83c2-3d78c48d9343", - "metadata": {}, - "source": [ - "CPU times: user 11min 15s, sys: 2.25 s, total: 11min 17s\n", - "Wall time: 10min 28s\n", - "\n", - "83.616000" - ] - }, - { - "cell_type": "markdown", - "id": "d568b5b6-5836-4fe9-8cb0-eca28b8e9082", - "metadata": {}, - "source": [ - "## RIVER ARF100" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "647edb8e-fcad-401d-acc3-33aeb34b71c9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Learner: ARFClassifier Result: (0.80145, 4384)\n", - "CPU times: user 1h 13min 34s, sys: 42.7 s, total: 1h 14min 17s\n", - "Wall time: 2h 5s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "river_arf100 = ARFClassifier(\n", - " n_models=100,\n", - " max_features=0.60\n", - ")\n", - "\n", - "river_rtg_2abrupt = pd.read_csv(rtg_2abrupt_path).to_numpy()\n", - "\n", - "result = run_experiment(river_arf100, river_rtg_2abrupt)\n", - "print('Learner: ', river_arf100, ' Result: ', result)" - ] - }, - { - "cell_type": "markdown", - "id": "4dd276b8-9bb2-4feb-b59d-60aff8f21900", - "metadata": {}, - "source": [ - "CPU times: user 1h 13min 34s, sys: 42.7 s, total: 1h 14min 17s\n", - "Wall time: 2h 5s\n", - "\n", - "Learner: ARFClassifier Result: (0.80145, 4384)" - ] - }, - { - "cell_type": "markdown", - "id": "fa9a71e2-5730-4309-96ed-a7c6aa4e8509", - "metadata": {}, - "source": [ - "## ARF100 -j 4" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "28c4e3b0-a0f9-4755-862e-ee4f3c8b7683", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 8min 25s, sys: 8.7 s, total: 8min 34s\n", - "Wall time: 3min 45s\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
classified instancesclassifications correct (percent)Kappa Statistic (percent)Kappa Temporal Statistic (percent)Kappa M Statistic (percent)Recall for class 0 (percent)Recall for class 1 (percent)Recall for class 2 (percent)Recall for class 3 (percent)Recall for class 4 (percent)
01000.088.90000048.18507758.42696631.90184098.56630839.263804NaNNaNNaN
12000.089.60000054.08641963.18584138.27893298.61695745.103858NaNNaNNaN
23000.089.46666755.13908362.82352939.34740998.46712446.641075NaNNaNNaN
34000.089.87500056.82717063.96797241.21915898.55028748.185776NaNNaNNaN
45000.090.12000057.74229664.66380542.28972098.62451748.948598NaNNaNNaN
.................................
9596000.083.96666774.93669069.40142768.57043796.93792970.65188658.93626216.07460089.859058
9697000.083.86288774.75084469.27531168.34196396.96147770.54235258.64269816.18413489.718076
9798000.083.78163374.59857869.18812068.14765896.98343170.49697558.44892816.19778389.530439
9899000.083.69697074.43445969.07572067.93802197.00369970.42865258.16952716.16049089.392473
99100000.083.61600074.27422568.97733567.72960997.01598770.33500157.97817616.27081689.234790
\n", - "

100 rows × 10 columns

\n", - "
" - ], - "text/plain": [ - " classified instances classifications correct (percent) \\\n", - "0 1000.0 88.900000 \n", - "1 2000.0 89.600000 \n", - "2 3000.0 89.466667 \n", - "3 4000.0 89.875000 \n", - "4 5000.0 90.120000 \n", - ".. ... ... \n", - "95 96000.0 83.966667 \n", - "96 97000.0 83.862887 \n", - "97 98000.0 83.781633 \n", - "98 99000.0 83.696970 \n", - "99 100000.0 83.616000 \n", - "\n", - " Kappa Statistic (percent) Kappa Temporal Statistic (percent) \\\n", - "0 48.185077 58.426966 \n", - "1 54.086419 63.185841 \n", - "2 55.139083 62.823529 \n", - "3 56.827170 63.967972 \n", - "4 57.742296 64.663805 \n", - ".. ... ... \n", - "95 74.936690 69.401427 \n", - "96 74.750844 69.275311 \n", - "97 74.598578 69.188120 \n", - "98 74.434459 69.075720 \n", - "99 74.274225 68.977335 \n", - "\n", - " Kappa M Statistic (percent) Recall for class 0 (percent) \\\n", - "0 31.901840 98.566308 \n", - "1 38.278932 98.616957 \n", - "2 39.347409 98.467124 \n", - "3 41.219158 98.550287 \n", - "4 42.289720 98.624517 \n", - ".. ... ... \n", - "95 68.570437 96.937929 \n", - "96 68.341963 96.961477 \n", - "97 68.147658 96.983431 \n", - "98 67.938021 97.003699 \n", - "99 67.729609 97.015987 \n", - "\n", - " Recall for class 1 (percent) Recall for class 2 (percent) \\\n", - "0 39.263804 NaN \n", - "1 45.103858 NaN \n", - "2 46.641075 NaN \n", - "3 48.185776 NaN \n", - "4 48.948598 NaN \n", - ".. ... ... \n", - "95 70.651886 58.936262 \n", - "96 70.542352 58.642698 \n", - "97 70.496975 58.448928 \n", - "98 70.428652 58.169527 \n", - "99 70.335001 57.978176 \n", - "\n", - " Recall for class 3 (percent) Recall for class 4 (percent) \n", - "0 NaN NaN \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - ".. ... ... \n", - "95 16.074600 89.859058 \n", - "96 16.184134 89.718076 \n", - "97 16.197783 89.530439 \n", - "98 16.160490 89.392473 \n", - "99 16.270816 89.234790 \n", - "\n", - "[100 rows x 10 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%time\n", - "\n", - "from moa.classifiers.meta import AdaptiveRandomForest\n", - "from moa.core import Example\n", - "from moa.evaluation import BasicClassificationPerformanceEvaluator\n", - "from moa.streams import ArffFileStream\n", - "\n", - "arf100 = AdaptiveRandomForest()\n", - "arf100.getOptions().setViaCLIString(\"-s 100 -j 4\")\n", - "arf100.setRandomSeed(1)\n", - "arf100.prepareForUse()\n", - "\n", - "rtg_2abrupt = ArffFileStream(\"/Users/gomeshe/Desktop/data/RTG_2abrupt.arff\", -1)\n", - "rtg_2abrupt.prepareForUse()\n", - "\n", - "evaluator = BasicClassificationPerformanceEvaluator()\n", - "evaluator.recallPerClassOption.set()\n", - "evaluator.prepareForUse()\n", - "\n", - "test_train_loop(rtg_2abrupt, arf100, evaluator, maxInstances=100000, sampleFrequency=1000)" - ] - }, - { - "cell_type": "markdown", - "id": "f459bd5e-c70d-4065-a20e-5c04e24eac9a", - "metadata": {}, - "source": [ - "## ARF100 -j 8" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "fb87016c-9df1-4d9f-aa9f-08decb704a83", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 9min 43s, sys: 12.7 s, total: 9min 55s\n", - "Wall time: 4min 18s\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
classified instancesclassifications correct (percent)Kappa Statistic (percent)Kappa Temporal Statistic (percent)Kappa M Statistic (percent)Recall for class 0 (percent)Recall for class 1 (percent)Recall for class 2 (percent)Recall for class 3 (percent)Recall for class 4 (percent)
01000.088.90000048.18507758.42696631.90184098.56630839.263804NaNNaNNaN
12000.089.60000054.08641963.18584138.27893298.61695745.103858NaNNaNNaN
23000.089.46666755.13908362.82352939.34740998.46712446.641075NaNNaNNaN
34000.089.87500056.82717063.96797241.21915898.55028748.185776NaNNaNNaN
45000.090.12000057.74229664.66380542.28972098.62451748.948598NaNNaNNaN
.................................
9596000.083.96666774.93669069.40142768.57043796.93792970.65188658.93626216.07460089.859058
9697000.083.86288774.75084469.27531168.34196396.96147770.54235258.64269816.18413489.718076
9798000.083.78163374.59857869.18812068.14765896.98343170.49697558.44892816.19778389.530439
9899000.083.69697074.43445969.07572067.93802197.00369970.42865258.16952716.16049089.392473
99100000.083.61600074.27422568.97733567.72960997.01598770.33500157.97817616.27081689.234790
\n", - "

100 rows × 10 columns

\n", - "
" - ], - "text/plain": [ - " classified instances classifications correct (percent) \\\n", - "0 1000.0 88.900000 \n", - "1 2000.0 89.600000 \n", - "2 3000.0 89.466667 \n", - "3 4000.0 89.875000 \n", - "4 5000.0 90.120000 \n", - ".. ... ... \n", - "95 96000.0 83.966667 \n", - "96 97000.0 83.862887 \n", - "97 98000.0 83.781633 \n", - "98 99000.0 83.696970 \n", - "99 100000.0 83.616000 \n", - "\n", - " Kappa Statistic (percent) Kappa Temporal Statistic (percent) \\\n", - "0 48.185077 58.426966 \n", - "1 54.086419 63.185841 \n", - "2 55.139083 62.823529 \n", - "3 56.827170 63.967972 \n", - "4 57.742296 64.663805 \n", - ".. ... ... \n", - "95 74.936690 69.401427 \n", - "96 74.750844 69.275311 \n", - "97 74.598578 69.188120 \n", - "98 74.434459 69.075720 \n", - "99 74.274225 68.977335 \n", - "\n", - " Kappa M Statistic (percent) Recall for class 0 (percent) \\\n", - "0 31.901840 98.566308 \n", - "1 38.278932 98.616957 \n", - "2 39.347409 98.467124 \n", - "3 41.219158 98.550287 \n", - "4 42.289720 98.624517 \n", - ".. ... ... \n", - "95 68.570437 96.937929 \n", - "96 68.341963 96.961477 \n", - "97 68.147658 96.983431 \n", - "98 67.938021 97.003699 \n", - "99 67.729609 97.015987 \n", - "\n", - " Recall for class 1 (percent) Recall for class 2 (percent) \\\n", - "0 39.263804 NaN \n", - "1 45.103858 NaN \n", - "2 46.641075 NaN \n", - "3 48.185776 NaN \n", - "4 48.948598 NaN \n", - ".. ... ... \n", - "95 70.651886 58.936262 \n", - "96 70.542352 58.642698 \n", - "97 70.496975 58.448928 \n", - "98 70.428652 58.169527 \n", - "99 70.335001 57.978176 \n", - "\n", - " Recall for class 3 (percent) Recall for class 4 (percent) \n", - "0 NaN NaN \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - ".. ... ... \n", - "95 16.074600 89.859058 \n", - "96 16.184134 89.718076 \n", - "97 16.197783 89.530439 \n", - "98 16.160490 89.392473 \n", - "99 16.270816 89.234790 \n", - "\n", - "[100 rows x 10 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%time\n", - "\n", - "from moa.classifiers.meta import AdaptiveRandomForest\n", - "from moa.core import Example\n", - "from moa.evaluation import BasicClassificationPerformanceEvaluator\n", - "from moa.streams import ArffFileStream\n", - "\n", - "arf100 = AdaptiveRandomForest()\n", - "arf100.getOptions().setViaCLIString(\"-s 100 -j 8\")\n", - "arf100.setRandomSeed(1)\n", - "arf100.prepareForUse()\n", - "\n", - "rtg_2abrupt = ArffFileStream(\"/Users/gomeshe/Desktop/data/RTG_2abrupt.arff\", -1)\n", - "rtg_2abrupt.prepareForUse()\n", - "\n", - "evaluator = BasicClassificationPerformanceEvaluator()\n", - "evaluator.recallPerClassOption.set()\n", - "evaluator.prepareForUse()\n", - "\n", - "test_train_loop(rtg_2abrupt, arf100, evaluator, maxInstances=100000, sampleFrequency=1000)" - ] - }, - { - "cell_type": "markdown", - "id": "6114fd77-9d53-492d-8906-f2e0c6fb40a1", - "metadata": {}, - "source": [ - "CPU times: user 9min 43s, sys: 12.7 s, total: 9min 55s\n", - "Wall time: 4min 18s\n", - "\n", - "83.616000\t" - ] - }, - { - "cell_type": "markdown", - "id": "8cbee199-c4e2-4907-b110-7b64a66458d0", - "metadata": {}, - "source": [ - "## ARF100 -j 2" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "68a0b070-0ba8-4122-b33f-132b366a4fe3", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 9min 42s, sys: 8.26 s, total: 9min 50s\n", - "Wall time: 6min 20s\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
classified instancesclassifications correct (percent)Kappa Statistic (percent)Kappa Temporal Statistic (percent)Kappa M Statistic (percent)Recall for class 0 (percent)Recall for class 1 (percent)Recall for class 2 (percent)Recall for class 3 (percent)Recall for class 4 (percent)
01000.088.90000048.18507758.42696631.90184098.56630839.263804NaNNaNNaN
12000.089.60000054.08641963.18584138.27893298.61695745.103858NaNNaNNaN
23000.089.46666755.13908362.82352939.34740998.46712446.641075NaNNaNNaN
34000.089.87500056.82717063.96797241.21915898.55028748.185776NaNNaNNaN
45000.090.12000057.74229664.66380542.28972098.62451748.948598NaNNaNNaN
.................................
9596000.083.96666774.93669069.40142768.57043796.93792970.65188658.93626216.07460089.859058
9697000.083.86288774.75084469.27531168.34196396.96147770.54235258.64269816.18413489.718076
9798000.083.78163374.59857869.18812068.14765896.98343170.49697558.44892816.19778389.530439
9899000.083.69697074.43445969.07572067.93802197.00369970.42865258.16952716.16049089.392473
99100000.083.61600074.27422568.97733567.72960997.01598770.33500157.97817616.27081689.234790
\n", - "

100 rows × 10 columns

\n", - "
" - ], - "text/plain": [ - " classified instances classifications correct (percent) \\\n", - "0 1000.0 88.900000 \n", - "1 2000.0 89.600000 \n", - "2 3000.0 89.466667 \n", - "3 4000.0 89.875000 \n", - "4 5000.0 90.120000 \n", - ".. ... ... \n", - "95 96000.0 83.966667 \n", - "96 97000.0 83.862887 \n", - "97 98000.0 83.781633 \n", - "98 99000.0 83.696970 \n", - "99 100000.0 83.616000 \n", - "\n", - " Kappa Statistic (percent) Kappa Temporal Statistic (percent) \\\n", - "0 48.185077 58.426966 \n", - "1 54.086419 63.185841 \n", - "2 55.139083 62.823529 \n", - "3 56.827170 63.967972 \n", - "4 57.742296 64.663805 \n", - ".. ... ... \n", - "95 74.936690 69.401427 \n", - "96 74.750844 69.275311 \n", - "97 74.598578 69.188120 \n", - "98 74.434459 69.075720 \n", - "99 74.274225 68.977335 \n", - "\n", - " Kappa M Statistic (percent) Recall for class 0 (percent) \\\n", - "0 31.901840 98.566308 \n", - "1 38.278932 98.616957 \n", - "2 39.347409 98.467124 \n", - "3 41.219158 98.550287 \n", - "4 42.289720 98.624517 \n", - ".. ... ... \n", - "95 68.570437 96.937929 \n", - "96 68.341963 96.961477 \n", - "97 68.147658 96.983431 \n", - "98 67.938021 97.003699 \n", - "99 67.729609 97.015987 \n", - "\n", - " Recall for class 1 (percent) Recall for class 2 (percent) \\\n", - "0 39.263804 NaN \n", - "1 45.103858 NaN \n", - "2 46.641075 NaN \n", - "3 48.185776 NaN \n", - "4 48.948598 NaN \n", - ".. ... ... \n", - "95 70.651886 58.936262 \n", - "96 70.542352 58.642698 \n", - "97 70.496975 58.448928 \n", - "98 70.428652 58.169527 \n", - "99 70.335001 57.978176 \n", - "\n", - " Recall for class 3 (percent) Recall for class 4 (percent) \n", - "0 NaN NaN \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - ".. ... ... \n", - "95 16.074600 89.859058 \n", - "96 16.184134 89.718076 \n", - "97 16.197783 89.530439 \n", - "98 16.160490 89.392473 \n", - "99 16.270816 89.234790 \n", - "\n", - "[100 rows x 10 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%time\n", - "\n", - "from moa.classifiers.meta import AdaptiveRandomForest\n", - "from moa.core import Example\n", - "from moa.evaluation import BasicClassificationPerformanceEvaluator\n", - "from moa.streams import ArffFileStream\n", - "\n", - "arf100 = AdaptiveRandomForest()\n", - "arf100.getOptions().setViaCLIString(\"-s 100 -j 2\")\n", - "arf100.setRandomSeed(1)\n", - "arf100.prepareForUse()\n", - "\n", - "rtg_2abrupt = ArffFileStream(\"/Users/gomeshe/Desktop/data/RTG_2abrupt.arff\", -1)\n", - "rtg_2abrupt.prepareForUse()\n", - "\n", - "evaluator = BasicClassificationPerformanceEvaluator()\n", - "evaluator.recallPerClassOption.set()\n", - "evaluator.prepareForUse()\n", - "\n", - "test_train_loop(rtg_2abrupt, arf100, evaluator, maxInstances=100000, sampleFrequency=1000)" - ] - }, - { - "cell_type": "markdown", - "id": "17ed7480-582f-479b-ab4b-80445b147841", - "metadata": {}, - "source": [ - "CPU times: user 9min 42s, sys: 8.26 s, total: 9min 50s\n", - "Wall time: 6min 20s\n", - "\n", - "83.616000" - ] - }, - { - "cell_type": "markdown", - "id": "ed024403-f9a3-4d1f-8f07-f39adcb034ab", - "metadata": {}, - "source": [ - "## ARF100 -j 5" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "75c69c1e-c379-4850-805e-ea9d94caef84", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 9min 39s, sys: 8.91 s, total: 9min 48s\n", - "Wall time: 4min 37s\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
classified instancesclassifications correct (percent)Kappa Statistic (percent)Kappa Temporal Statistic (percent)Kappa M Statistic (percent)Recall for class 0 (percent)Recall for class 1 (percent)Recall for class 2 (percent)Recall for class 3 (percent)Recall for class 4 (percent)
01000.088.90000048.18507758.42696631.90184098.56630839.263804NaNNaNNaN
12000.089.60000054.08641963.18584138.27893298.61695745.103858NaNNaNNaN
23000.089.46666755.13908362.82352939.34740998.46712446.641075NaNNaNNaN
34000.089.87500056.82717063.96797241.21915898.55028748.185776NaNNaNNaN
45000.090.12000057.74229664.66380542.28972098.62451748.948598NaNNaNNaN
.................................
9596000.083.96666774.93669069.40142768.57043796.93792970.65188658.93626216.07460089.859058
9697000.083.86288774.75084469.27531168.34196396.96147770.54235258.64269816.18413489.718076
9798000.083.78163374.59857869.18812068.14765896.98343170.49697558.44892816.19778389.530439
9899000.083.69697074.43445969.07572067.93802197.00369970.42865258.16952716.16049089.392473
99100000.083.61600074.27422568.97733567.72960997.01598770.33500157.97817616.27081689.234790
\n", - "

100 rows × 10 columns

\n", - "
" - ], - "text/plain": [ - " classified instances classifications correct (percent) \\\n", - "0 1000.0 88.900000 \n", - "1 2000.0 89.600000 \n", - "2 3000.0 89.466667 \n", - "3 4000.0 89.875000 \n", - "4 5000.0 90.120000 \n", - ".. ... ... \n", - "95 96000.0 83.966667 \n", - "96 97000.0 83.862887 \n", - "97 98000.0 83.781633 \n", - "98 99000.0 83.696970 \n", - "99 100000.0 83.616000 \n", - "\n", - " Kappa Statistic (percent) Kappa Temporal Statistic (percent) \\\n", - "0 48.185077 58.426966 \n", - "1 54.086419 63.185841 \n", - "2 55.139083 62.823529 \n", - "3 56.827170 63.967972 \n", - "4 57.742296 64.663805 \n", - ".. ... ... \n", - "95 74.936690 69.401427 \n", - "96 74.750844 69.275311 \n", - "97 74.598578 69.188120 \n", - "98 74.434459 69.075720 \n", - "99 74.274225 68.977335 \n", - "\n", - " Kappa M Statistic (percent) Recall for class 0 (percent) \\\n", - "0 31.901840 98.566308 \n", - "1 38.278932 98.616957 \n", - "2 39.347409 98.467124 \n", - "3 41.219158 98.550287 \n", - "4 42.289720 98.624517 \n", - ".. ... ... \n", - "95 68.570437 96.937929 \n", - "96 68.341963 96.961477 \n", - "97 68.147658 96.983431 \n", - "98 67.938021 97.003699 \n", - "99 67.729609 97.015987 \n", - "\n", - " Recall for class 1 (percent) Recall for class 2 (percent) \\\n", - "0 39.263804 NaN \n", - "1 45.103858 NaN \n", - "2 46.641075 NaN \n", - "3 48.185776 NaN \n", - "4 48.948598 NaN \n", - ".. ... ... \n", - "95 70.651886 58.936262 \n", - "96 70.542352 58.642698 \n", - "97 70.496975 58.448928 \n", - "98 70.428652 58.169527 \n", - "99 70.335001 57.978176 \n", - "\n", - " Recall for class 3 (percent) Recall for class 4 (percent) \n", - "0 NaN NaN \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - ".. ... ... \n", - "95 16.074600 89.859058 \n", - "96 16.184134 89.718076 \n", - "97 16.197783 89.530439 \n", - "98 16.160490 89.392473 \n", - "99 16.270816 89.234790 \n", - "\n", - "[100 rows x 10 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%time\n", - "\n", - "from moa.classifiers.meta import AdaptiveRandomForest\n", - "from moa.core import Example\n", - "from moa.evaluation import BasicClassificationPerformanceEvaluator\n", - "from moa.streams import ArffFileStream\n", - "\n", - "arf100 = AdaptiveRandomForest()\n", - "arf100.getOptions().setViaCLIString(\"-s 100 -j 5\")\n", - "arf100.setRandomSeed(1)\n", - "arf100.prepareForUse()\n", - "\n", - "rtg_2abrupt = ArffFileStream(\"/Users/gomeshe/Desktop/data/RTG_2abrupt.arff\", -1)\n", - "rtg_2abrupt.prepareForUse()\n", - "\n", - "evaluator = BasicClassificationPerformanceEvaluator()\n", - "evaluator.recallPerClassOption.set()\n", - "evaluator.prepareForUse()\n", - "\n", - "test_train_loop(rtg_2abrupt, arf100, evaluator, maxInstances=100000, sampleFrequency=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8bdc7aae-643e-451f-a62e-3ac0acb9606b", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.17" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}