adap · tanertopal · Mar 23, 2023 · Mar 17, 2023 · Mar 17, 2023 · Mar 19, 2023
@@ -0,0 +1,2 @@
+dataset
+
@@ -11,22 +11,8 @@ This example demonstrates a federated XGBoost using Flower with PyTorch. This is
 
 ## Project Setup
 
-This implementation can be easily run in Google Colab with the following file structure in Google Drive, * denotes folder:
-
-```shell
-—————————————————————————————————————————————————————————————————————
-My Drive
-  XGBoost*
-      |----- code.ipynb
-      dataset*
-          binary_classifications*
-              |----- dataset file 1
-              |----- dataset file 2
-          regression*
-              |----- dataset file 1
-              |----- dataset file 2 
-—————————————————————————————————————————————————————————————————————
-```
+This implementation can be easily run in Google Colab with the button at the top of the README or as a standalone Jupyter notebook,
+it will automatically download and extract the example data inside a `dataset` folder and `binary_classification` and `regression` sub-folders.
 
 ## Datasets
 

@@ -29,32 +29,52 @@
    },
    "outputs": [],
    "source": [
-    "# File structure to run this implementation in Google Colab, * denotes folder:\n",
-    "#—————————————————————————————————————————————————————————————————————\n",
-    "# My Drive\n",
-    "#   XGBoost*\n",
-    "#       |----- code.ipynb (this file)\n",
-    "#       dataset*\n",
-    "#           binary_classifications*\n",
-    "#               |----- dataset file 1\n",
-    "#               |----- dataset file 2\n",
-    "#           regression*\n",
-    "#               |----- dataset file 1\n",
-    "#               |----- dataset file 2 \n",
-    "#—————————————————————————————————————————————————————————————————————\n",
-    "\n",
-    "from google.colab import drive\n",
-    "drive.mount(\"/content/drive\")\n",
-    "\n",
     "import os\n",
-    "import sys\n",
-    "GOOGLE_DRIVE_PATH_AFTER_MYDRIVE = \"FedXGBoost\"\n",
-    "GOOGLE_DRIVE_PATH = os.path.join(\"drive\", \"My Drive\", GOOGLE_DRIVE_PATH_AFTER_MYDRIVE)\n",
-    "print(os.listdir(GOOGLE_DRIVE_PATH))\n",
+    "import urllib.request\n",
+    "import bz2\n",
+    "import shutil\n",
+    "\n",
+    "CLASSIFICATION_PATH = os.path.join(\"dataset\", \"binary_classification\")\n",
+    "REGRESSION_PATH = os.path.join(\"dataset\", \"regression\")\n",
+    "\n",
+    "if not os.path.exists(CLASSIFICATION_PATH):\n",
+    "  os.makedirs(CLASSIFICATION_PATH)\n",
+    "  urllib.request.urlretrieve(\"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/cod-rna\",\n",
+    "                             f\"{os.path.join(CLASSIFICATION_PATH, 'cod-rna')}\")\n",
+    "  urllib.request.urlretrieve(\"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/cod-rna.t\",\n",
+    "                             f\"{os.path.join(CLASSIFICATION_PATH, 'cod-rna.t')}\")\n",
+    "  urllib.request.urlretrieve(\"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/cod-rna.r\",\n",
+    "                             f\"{os.path.join(CLASSIFICATION_PATH, 'cod-rna.r')}\")\n",
+    "  urllib.request.urlretrieve(\"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/ijcnn1.t.bz2\",\n",
+    "                             f\"{os.path.join(CLASSIFICATION_PATH, 'ijcnn1.t.bz2')}\")\n",
+    "  urllib.request.urlretrieve(\"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/ijcnn1.tr.bz2\",\n",
+    "                             f\"{os.path.join(CLASSIFICATION_PATH, 'ijcnn1.tr.bz2')}\")\n",
+    "  for filepath in os.listdir(CLASSIFICATION_PATH):\n",
+    "    if filepath[-3:] == \"bz2\":\n",
+    "      abs_filepath = os.path.join(CLASSIFICATION_PATH, filepath)\n",
+    "      with bz2.BZ2File(abs_filepath) as fr, open(abs_filepath[:-4],\"wb\") as fw:\n",
+    "        shutil.copyfileobj(fr,fw)\n",
+    "\n",
+    "if not os.path.exists(REGRESSION_PATH):\n",
+    "  os.makedirs(REGRESSION_PATH)\n",
+    "  urllib.request.urlretrieve(\"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/eunite2001\",\n",
+    "                              f\"{os.path.join(REGRESSION_PATH, 'eunite2001')}\")\n",
+    "  urllib.request.urlretrieve(\"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/eunite2001.t\",\n",
+    "                             f\"{os.path.join(REGRESSION_PATH, 'eunite2001.t')}\")\n",
+    "  urllib.request.urlretrieve(\"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/YearPredictionMSD.bz2\",\n",
+    "                             f\"{os.path.join(REGRESSION_PATH, 'YearPredictionMSD.bz2')}\")\n",
+    "  urllib.request.urlretrieve(\"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/YearPredictionMSD.t.bz2\",\n",
+    "                             f\"{os.path.join(REGRESSION_PATH, 'YearPredictionMSD.t.bz2')}\")\n",
+    "  for filepath in os.listdir(REGRESSION_PATH):\n",
+    "    if filepath[-3:] == \"bz2\":\n",
+    "      abs_filepath = os.path.join(REGRESSION_PATH, filepath)\n",
+    "      with bz2.BZ2File(abs_filepath) as fr, open(abs_filepath[:-4],\"wb\") as fw:\n",
+    "        shutil.copyfileobj(fr,fw)\n",
+    "\n",
     "\n",
     "# Add to sys so we can import .py files.\n",
-    "sys.path.append(GOOGLE_DRIVE_PATH)\n",
     "!nvidia-smi\n",
+    "!pip install uvicorn uvicorn fastapi requests starlette\n",
     "!pip install xgboost==1.7.2\n",
     "!pip install torchmetrics\n",
     "!pip install torchsummary\n",