adap · tanertopal · Dec 18, 2023 · Oct 3, 2023 · Dec 15, 2023 · Dec 15, 2023
@@ -1,7 +1,7 @@
 # Flower Example using scikit-learn
 
 This example of Flower uses `scikit-learn`'s `LogisticRegression` model to train a federated learning system. It will help you understand how to adapt Flower for use with `scikit-learn`.
-Running this example in itself is quite easy.
+Running this example in itself is quite easy. This example uses [Flower Datasets](https://flower.dev/docs/datasets/) to download, partition and preprocess the MNIST dataset.
 
 ## Project Setup
 
@@ -57,18 +57,24 @@ Afterwards you are ready to start the Flower server as well as the clients. You
 poetry run python3 server.py
 ```
 
-Now you are ready to start the Flower clients which will participate in the learning. To do so simply open two more terminals and run the following command in each:
+Now you are ready to start the Flower clients which will participate in the learning. To do so simply open two or more terminals and run the following command in each:
+
+Start client 1 in the first terminal:
 
 ```shell
-poetry run python3 client.py
+python3 client.py --node-id 0 # or any integer in {0-9}
 ```
 
-Alternatively you can run all of it in one shell as follows:
+Start client 2 in the second terminal:
 
 ```shell
-poetry run python3 server.py &
-poetry run python3 client.py &
-poetry run python3 client.py
+python3 client.py --node-id 1 # or any integer in {0-9}
+```
+
+Alternatively, you can run all of it in one shell as follows:
+
+```bash
+bash run.sh
 ```
 
 You will see that Flower is starting a federated training.
@@ -1,19 +1,35 @@
+import argparse
 import warnings
-import flwr as fl
-import numpy as np
 
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import log_loss
 
+import flwr as fl
 import utils
+from flwr_datasets import FederatedDataset
 
 if __name__ == "__main__":
-    # Load MNIST dataset from https://www.openml.org/d/554
-    (X_train, y_train), (X_test, y_test) = utils.load_mnist()
+    N_CLIENTS = 10
+
+    parser = argparse.ArgumentParser(description="Flower")
+    parser.add_argument(
+        "--node-id",
+        type=int,
+        choices=range(0, N_CLIENTS),
+        required=True,
+        help="Specifies the artificial data partition",
+    )
+    args = parser.parse_args()
+    partition_id = args.node_id
+
+    # Load the partition data
+    fds = FederatedDataset(dataset="mnist", partitioners={"train": N_CLIENTS})
 
-    # Split train set into 10 partitions and randomly use one for training.
-    partition_id = np.random.choice(10)
-    (X_train, y_train) = utils.partition(X_train, y_train, 10)[partition_id]
+    dataset = fds.load_partition(partition_id, "train").with_format("numpy")
+    X, y = dataset["image"].reshape((len(dataset), -1)), dataset["label"]
+    # Split the on edge data: 80% train, 20% test
+    X_train, X_test = X[: int(0.8 * len(X))], X[int(0.8 * len(X)) :]
+    y_train, y_test = y[: int(0.8 * len(y))], y[int(0.8 * len(y)) :]
 
     # Create LogisticRegression Model
     model = LogisticRegression(

@@ -13,7 +13,7 @@ authors = [
 
 [tool.poetry.dependencies]
 python = "^3.8"
-flwr = "^1.0.0"
+flwr = ">=1.0,<2.0"
 # flwr = { path = "../../", develop = true }  # Development
+flwr-datasets = { extras = ["vision"], version = ">=0.0.2,<1.0.0" }
 scikit-learn = "^1.1.1"
-openml = "^0.12.2"
@@ -1,4 +1,4 @@
-flwr~=1.4.0
+flwr>=1.0, <2.0
+flwr-datasets[vision]>=0.0.2, <1.0.0
 numpy~=1.21.1
-openml~=0.13.1
 scikit_learn~=1.2.2
@@ -1,15 +1,17 @@
 #!/bin/bash
+set -e
+cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/
 
 echo "Starting server"
 python server.py &
 sleep 3  # Sleep for 3s to give the server enough time to start
 
-for i in `seq 0 1`; do
+for i in $(seq 0 1); do
     echo "Starting client $i"
-    python client.py &
+    python client.py --node-id "${i}" &
 done
 
 # This will allow you to use CTRL+C to stop all background processes
-trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM
+trap 'trap - SIGTERM && kill -- -$$' SIGINT SIGTERM
 # Wait for all background processes to complete
 wait
@@ -4,6 +4,8 @@
 from sklearn.linear_model import LogisticRegression
 from typing import Dict
 
+from flwr_datasets import FederatedDataset
+
 
 def fit_round(server_round: int) -> Dict:
     """Send round number to client."""
@@ -14,7 +16,9 @@ def get_evaluate_fn(model: LogisticRegression):
     """Return an evaluation function for server-side evaluation."""
 
     # Load test data here to avoid the overhead of doing it in `evaluate` itself
-    _, (X_test, y_test) = utils.load_mnist()
+    fds = FederatedDataset(dataset="mnist", partitioners={"train": 10})
+    dataset = fds.load_full("test").with_format("numpy")
+    X_test, y_test = dataset["image"].reshape((len(dataset), -1)), dataset["label"]
 
     # The `evaluate` function will be called after every round
     def evaluate(server_round, parameters: fl.common.NDArrays, config):

@@ -1,16 +1,11 @@
-from typing import Tuple, Union, List
 import numpy as np
 from sklearn.linear_model import LogisticRegression
-import openml
 
-XY = Tuple[np.ndarray, np.ndarray]
-Dataset = Tuple[XY, XY]
-LogRegParams = Union[XY, Tuple[np.ndarray]]
-XYList = List[XY]
+from flwr.common import NDArrays
 
 
-def get_model_parameters(model: LogisticRegression) -> LogRegParams:
-    """Returns the paramters of a sklearn LogisticRegression model."""
+def get_model_parameters(model: LogisticRegression) -> NDArrays:
+    """Returns the parameters of a sklearn LogisticRegression model."""
     if model.fit_intercept:
         params = [
             model.coef_,
@@ -23,9 +18,7 @@ def get_model_parameters(model: LogisticRegression) -> LogRegParams:
     return params
 
 
-def set_model_params(
-    model: LogisticRegression, params: LogRegParams
-) -> LogisticRegression:
+def set_model_params(model: LogisticRegression, params: NDArrays) -> LogisticRegression:
     """Sets the parameters of a sklean LogisticRegression model."""
     model.coef_ = params[0]
     if model.fit_intercept:
@@ -47,32 +40,3 @@ def set_initial_params(model: LogisticRegression):
     model.coef_ = np.zeros((n_classes, n_features))
     if model.fit_intercept:
         model.intercept_ = np.zeros((n_classes,))
-
-
-def load_mnist() -> Dataset:
-    """Loads the MNIST dataset using OpenML.
-
-    OpenML dataset link: https://www.openml.org/d/554
-    """
-    mnist_openml = openml.datasets.get_dataset(554)
-    Xy, _, _, _ = mnist_openml.get_data(dataset_format="array")
-    X = Xy[:, :-1]  # the last column contains labels
-    y = Xy[:, -1]
-    # First 60000 samples consist of the train set
-    x_train, y_train = X[:60000], y[:60000]
-    x_test, y_test = X[60000:], y[60000:]
-    return (x_train, y_train), (x_test, y_test)
-
-
-def shuffle(X: np.ndarray, y: np.ndarray) -> XY:
-    """Shuffle X and y."""
-    rng = np.random.default_rng()
-    idx = rng.permutation(len(X))
-    return X[idx], y[idx]
-
-
-def partition(X: np.ndarray, y: np.ndarray, num_partitions: int) -> XYList:
-    """Split X and y into a number of partitions."""
-    return list(
-        zip(np.array_split(X, num_partitions), np.array_split(y, num_partitions))
-    )