From 67ad7adef80116c2f3e23eedff0d1931bfc11b8f Mon Sep 17 00:00:00 2001 From: porteratzo Date: Thu, 5 Oct 2023 08:46:33 -0700 Subject: [PATCH 1/4] llm tutorial --- openfl-tutorials/Federated_PyTorch_LLM.ipynb | 474 +++++++++++++++++++ 1 file changed, 474 insertions(+) create mode 100644 openfl-tutorials/Federated_PyTorch_LLM.ipynb diff --git a/openfl-tutorials/Federated_PyTorch_LLM.ipynb b/openfl-tutorials/Federated_PyTorch_LLM.ipynb new file mode 100644 index 0000000000..4ace9f3d30 --- /dev/null +++ b/openfl-tutorials/Federated_PyTorch_LLM.ipynb @@ -0,0 +1,474 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Federated PyTorch TinyImageNet Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook is an example of Transfer Learning \n", + "\n", + "Custom DataLoader is used with OpenFL Python API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Install dependencies if not already installed\n", + "!pip install torch torchvision peft transformers sentencepiece huggingface_hub accelerate datasets evaluate seqeval\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Any, Mapping\n", + "import numpy as np\n", + "import openfl.native as fx\n", + "import torch\n", + "import torch as pt\n", + "from accelerate import Accelerator\n", + "from datasets import Dataset, load_dataset, load_metric\n", + "from openfl.federated import PyTorchTaskRunner, TaskRunner\n", + "from openfl.federated.task.runner_pt import change_tags\n", + "from openfl.utilities import Metric, TensorKey\n", + "from openfl.utilities.data_splitters import EqualNumPyDataSplitter\n", + "from peft import LoraConfig, TaskType, get_peft_model\n", + "from peft.utils import get_peft_model_state_dict, set_peft_model_state_dict\n", + "from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss\n", + "from torch.optim import AdamW\n", + "from torch.utils.data import DataLoader\n", + "from tqdm import tqdm\n", + "import torch.nn as nn\n", + "\n", + "from transformers import (AutoConfig, AutoModelForSequenceClassification,\n", + " AutoTokenizer, DataCollatorWithPadding)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After importing the required packages, the next step is setting up our openfl workspace. To do this, simply run the `fx.init()` command as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Setup default workspace, logging, etc.\n", + "fx.init('torch_cnn_mnist')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we are ready to define our dataset and model to perform federated learning on. The dataset should be composed of a numpy arrayWe start with a simple fully connected model that is trained on the MNIST dataset. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Download the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_glue_mrpc_dataset(tokenizer):\n", + " dataset = load_dataset(\"glue\", \"mrpc\")\n", + "\n", + " def tokenize_function(examples):\n", + " # max_length=None => use the model max length (it's actually the default)\n", + " outputs = tokenizer(\n", + " examples[\"sentence1\"],\n", + " examples[\"sentence2\"],\n", + " truncation=True,\n", + " max_length=None,\n", + " )\n", + " return outputs\n", + "\n", + " tokenized_datasets = dataset.map(\n", + " tokenize_function,\n", + " batched=True,\n", + " remove_columns=[\"idx\", \"sentence1\", \"sentence2\"],\n", + " )\n", + " tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")\n", + " tokenized_datasets.set_format(\"torch\")\n", + " data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding=\"longest\")\n", + " return data_collator, tokenized_datasets\n", + "\n", + "base_model_name = \"roberta-large\"\n", + "padding_side = \"right\"\n", + "tokenizer = AutoTokenizer.from_pretrained(base_model_name, padding_side=padding_side)\n", + "if getattr(tokenizer, \"pad_token_id\") is None:\n", + " tokenizer.pad_token_id = tokenizer.eos_token_id\n", + "data_collator, tokenized_datasets = get_glue_mrpc_dataset(tokenizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Describe the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class GlueMrpc(Dataset):\n", + " \"\"\"\n", + " Has 5.8k pairs of sentences with annotations if the two sentences are equivalent\n", + " \"\"\" \n", + " def get_shape(self):\n", + " \n", + " if not hasattr(self, 'saved_shape'):\n", + " self.saved_shape = max([len(i) for i in self.data['input_ids']])\n", + " return self.saved_shape\n", + "\n", + "train_set = GlueMrpc.from_dict(tokenized_datasets['train'].to_dict())\n", + "valid_set = GlueMrpc.from_dict(tokenized_datasets['test'].to_dict())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Implement Federated dataset\n", + "We have to implement `split` method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class GlueMrpcFederatedDataset(DataLoader):\n", + " def __init__(self, train_set, valid_set, batch_size, data_collator=None):\n", + " self.data_splitter = EqualNumPyDataSplitter()\n", + " if isinstance(train_set,Dataset):\n", + " self.train_set = GlueMrpc.from_dict(train_set.to_dict())\n", + " else:\n", + " self.train_set = train_set\n", + " \n", + " if isinstance(valid_set,Dataset):\n", + " self.valid_set = GlueMrpc.from_dict(valid_set.to_dict())\n", + " else:\n", + " self.valid_set = valid_set \n", + " \n", + " self.batch_size = batch_size\n", + " self.data_collator = data_collator\n", + " \n", + " def split(self, num_collaborators):\n", + " train_split = self.data_splitter.split(self.train_set, num_collaborators)\n", + " valid_split = self.data_splitter.split(self.valid_set, num_collaborators)\n", + " return [\n", + " GlueMrpcFederatedDataset(\n", + " self.train_set.select(train_split[i]),\n", + " self.valid_set.select(valid_split[i]),\n", + " self.batch_size\n", + " )\n", + " for i in range(num_collaborators)\n", + " ]\n", + " \n", + " def get_feature_shape(self):\n", + " return self.train_set.get_shape()\n", + " \n", + " def get_train_loader(self, num_batches=None):\n", + " return DataLoader(self.train_set, batch_size=self.batch_size, collate_fn=data_collator)\n", + " \n", + " def get_valid_loader(self):\n", + " return DataLoader(self.valid_set, collate_fn=data_collator)\n", + " \n", + " def get_train_data_size(self):\n", + " return len(self.train_set)\n", + " \n", + " def get_valid_data_size(self):\n", + " return len(self.valid_set)\n", + " \n", + "fl_data = GlueMrpcFederatedDataset(train_set, valid_set, batch_size=32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Define model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class LLMTaskRunner(PyTorchTaskRunner):\n", + " def __init__(self, base_model_name, data_loader, device=None, metric=None, **kwargs):\n", + " kwargs['data_loader'] = data_loader\n", + " super().__init__(device, **kwargs)\n", + " self.base_model_name = base_model_name\n", + " self.metric = metric\n", + " self._init_model()\n", + " self._init_optimizer()\n", + " \n", + " def _init_model(self):\n", + " model = AutoModelForSequenceClassification.from_pretrained(\n", + " self.base_model_name, return_dict=True)\n", + " peft_config = LoraConfig(task_type=TaskType.SEQ_CLS, inference_mode=False, r=16, lora_alpha=16, lora_dropout=0.1, bias=\"all\")\n", + " self.model = get_peft_model(model, peft_config)\n", + " \n", + " def _init_optimizer(self):\n", + " no_decay = [\"bias\", \"LayerNorm.weight\"]\n", + " optimizer_grouped_parameters = [\n", + " {\n", + " \"params\": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],\n", + " \"weight_decay\": 0.01,\n", + " },\n", + " {\n", + " \"params\": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],\n", + " \"weight_decay\": 0.0,\n", + " },\n", + " ]\n", + " self.optimizer = AdamW(optimizer_grouped_parameters, lr=0.01)\n", + " \n", + " self.training_round_completed = False\n", + " self.initialize_tensorkeys_for_functions()\n", + " \n", + " def state_dict(self):\n", + " return get_peft_model_state_dict(self.model)\n", + " \n", + " def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):\n", + " return set_peft_model_state_dict(\n", + " self.model, state_dict\n", + " )\n", + " \n", + " def validate(self, col_name, round_num, input_tensor_dict,\n", + " use_tqdm=False, **kwargs):\n", + " \"\"\"Validate.\n", + "\n", + " Run validation of the model on the local data.\n", + "\n", + " Args:\n", + " col_name: Name of the collaborator\n", + " round_num: What round is it\n", + " input_tensor_dict: Required input tensors (for model)\n", + " use_tqdm (bool): Use tqdm to print a progress bar (Default=True)\n", + "\n", + " Returns:\n", + " global_output_dict: Tensors to send back to the aggregator\n", + " local_output_dict: Tensors to maintain in the local TensorDB\n", + "\n", + " \"\"\"\n", + " self.rebuild_model(round_num, input_tensor_dict, validation=True)\n", + " self.model.eval()\n", + " self.model.to(self.device)\n", + " val_score = 0\n", + " total_samples = 0\n", + "\n", + " loader = self.data_loader.get_valid_loader()\n", + " if use_tqdm:\n", + " loader = tqdm(loader, desc='validate')\n", + "\n", + " with pt.no_grad():\n", + " for sample in loader:\n", + " samples = sample['input_ids'].shape[0]\n", + " total_samples += samples\n", + " output = self.model(**sample)\n", + " # get the index of the max log-probability\n", + " logits = output.logits\n", + " predictions = torch.argmax(logits, dim=-1)\n", + " metric.add_batch(predictions=predictions, references=sample['labels'])\n", + " val_score = metric.compute()['accuracy']\n", + "\n", + " origin = col_name\n", + " suffix = 'validate'\n", + " if kwargs['apply'] == 'local':\n", + " suffix += '_local'\n", + " else:\n", + " suffix += '_agg'\n", + " tags = ('metric',)\n", + " tags = change_tags(tags, add_field=suffix)\n", + " # TODO figure out a better way to pass in metric for this pytorch\n", + " # validate function\n", + " output_tensor_dict = {\n", + " TensorKey('acc', origin, round_num, True, tags):\n", + " np.array(val_score)\n", + " }\n", + "\n", + " # Empty list represents metrics that should only be stored locally\n", + " return output_tensor_dict, {}\n", + "\n", + " def train_epoch(self, batch_generator) -> Metric:\n", + " \"\"\"Train single epoch.\n", + "\n", + " Override this function in order to use custom training.\n", + "\n", + " Args:\n", + " batch_generator: Train dataset batch generator. Yields (samples, targets) tuples of\n", + " size = `self.data_loader.batch_size`.\n", + " Returns:\n", + " Metric: An object containing name and np.ndarray value.\n", + " \"\"\"\n", + " losses = []\n", + " for sample in batch_generator:\n", + " self.optimizer.zero_grad()\n", + " output = self.model(**sample)\n", + " loss = output.loss\n", + " loss.backward()\n", + " torch.nn.utils.clip_grad_norm_(self.model.parameters(),1.0)\n", + " self.model.step()\n", + " losses.append(loss.detach().cpu().numpy())\n", + " loss = np.mean(losses)\n", + " if self.model.config.problem_type == \"regression\":\n", + " loss_fct = MSELoss()\n", + " elif self.model.config.problem_type == \"single_label_classification\":\n", + " loss_fct = CrossEntropyLoss()\n", + " elif self.model.config.problem_type == \"multi_label_classification\":\n", + " loss_fct = BCEWithLogitsLoss()\n", + " return Metric(name=loss_fct._get_name(), value=np.array(loss))\n", + " \n", + " \n", + " def save_native(self, filepath, model_state_dict_key='model_state_dict',\n", + " optimizer_state_dict_key='optimizer_state_dict', **kwargs):\n", + " \"\"\"\n", + " Save model and optimizer states in a picked file specified by the \\\n", + " filepath. model_/optimizer_state_dicts are stored in the keys provided. \\\n", + " Uses pt.save().\n", + "\n", + " Args:\n", + " filepath (string) : Path to pickle file to be\n", + " created by pt.save().\n", + " model_state_dict_key (string) : key for model state dict\n", + " in pickled file.\n", + " optimizer_state_dict_key (string) : key for optimizer state\n", + " dict in picked file.\n", + " kwargs : unused\n", + "\n", + " Returns:\n", + " None\n", + " \"\"\"\n", + " pickle_dict = {\n", + " model_state_dict_key: get_peft_model_state_dict(self.model),\n", + " optimizer_state_dict_key: self.optimizer.state_dict()\n", + " }\n", + " pt.save(pickle_dict, filepath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_collaborators = 2\n", + "metric = load_metric('glue', \"mrpc\")\n", + "collaborator_models = [\n", + " LLMTaskRunner(\n", + " base_model_name,\n", + " data_loader=data_slice,\n", + " metric=metric\n", + " )\n", + " for data_slice in fl_data.split(num_collaborators)]\n", + "collaborators = {'one':collaborator_models[0],'two':collaborator_models[1]}#, 'three':collaborator_models[2]}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Original TinyImageNet dataset\n", + "print(f'Original training data size: {len(fl_data.train_set)}')\n", + "print(f'Original validation data size: {len(fl_data.valid_set)}\\n')\n", + "\n", + "#Collaborator one's data\n", + "for i, model in enumerate(collaborator_models):\n", + " print(f'Collaborator {i}\\'s training data size: {len(model.data_loader.train_set)}')\n", + " print(f'Collaborator {i}\\'s validation data size: {len(model.data_loader.valid_set)}\\n')\n", + "\n", + "#Collaborator three's data\n", + "#print(f'Collaborator three\\'s training data size: {len(collaborator_models[2].data_loader.X_train)}')\n", + "#print(f'Collaborator three\\'s validation data size: {len(collaborator_models[2].data_loader.X_valid)}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Run experiment, return trained FederatedModel\n", + "final_fl_model = fx.run_experiment(collaborators,{'aggregator.settings.rounds_to_train':3})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Save final model\n", + "final_fl_model.save_native('final_model.pth')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llama-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 600bfc97ce0bfd43121148efc7f485ba5052a8c2 Mon Sep 17 00:00:00 2001 From: porteratzo Date: Thu, 5 Oct 2023 08:50:59 -0700 Subject: [PATCH 2/4] small fix --- openfl-tutorials/Federated_PyTorch_LLM.ipynb | 534 ++++++++++++++++++- 1 file changed, 520 insertions(+), 14 deletions(-) diff --git a/openfl-tutorials/Federated_PyTorch_LLM.ipynb b/openfl-tutorials/Federated_PyTorch_LLM.ipynb index 4ace9f3d30..ca41b3c13d 100644 --- a/openfl-tutorials/Federated_PyTorch_LLM.ipynb +++ b/openfl-tutorials/Federated_PyTorch_LLM.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -66,9 +66,134 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating Workspace Directories\n", + "Creating Workspace Templates\n", + "Collecting torch==1.13.1 (from -r /home/oamontoy/.local/workspace/requirements.txt (line 1))\n", + " Using cached torch-1.13.1-cp38-cp38-manylinux1_x86_64.whl (887.4 MB)\n", + "Requirement already satisfied: torchvision==0.14.1 in ./llama-env/lib/python3.8/site-packages (from -r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (0.14.1)\n", + "Requirement already satisfied: tensorboard in ./llama-env/lib/python3.8/site-packages (from -r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.14.0)\n", + "Requirement already satisfied: wheel>=0.38.0 in ./llama-env/lib/python3.8/site-packages (from -r /home/oamontoy/.local/workspace/requirements.txt (line 4)) (0.41.2)\n", + "Requirement already satisfied: typing-extensions in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (4.8.0)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (11.7.99)\n", + "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (8.5.0.96)\n", + "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (11.10.3.66)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (11.7.99)\n", + "Requirement already satisfied: numpy in ./llama-env/lib/python3.8/site-packages (from torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (1.24.4)\n", + "Requirement already satisfied: requests in ./llama-env/lib/python3.8/site-packages (from torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (2.31.0)\n", + "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in ./llama-env/lib/python3.8/site-packages (from torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (10.0.1)\n", + "Requirement already satisfied: setuptools in ./llama-env/lib/python3.8/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (68.2.2)\n", + "Requirement already satisfied: absl-py>=0.4 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.0.0)\n", + "Requirement already satisfied: grpcio>=1.48.2 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.48.2)\n", + "Requirement already satisfied: google-auth<3,>=1.6.3 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.23.0)\n", + "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.0.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.4.4)\n", + "Requirement already satisfied: protobuf>=3.19.6 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.19.6)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (0.7.1)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.3.7)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (5.3.1)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (0.3.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (4.9)\n", + "Requirement already satisfied: urllib3<2.0 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.26.16)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in ./llama-env/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.3.1)\n", + "Requirement already satisfied: six>=1.5.2 in ./llama-env/lib/python3.8/site-packages (from grpcio>=1.48.2->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.16.0)\n", + "Requirement already satisfied: importlib-metadata>=4.4 in ./llama-env/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (6.8.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in ./llama-env/lib/python3.8/site-packages (from requests->torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in ./llama-env/lib/python3.8/site-packages (from requests->torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in ./llama-env/lib/python3.8/site-packages (from requests->torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (2023.7.22)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in ./llama-env/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.1.3)\n", + "Requirement already satisfied: zipp>=0.5 in ./llama-env/lib/python3.8/site-packages (from importlib-metadata>=4.4->markdown>=2.6.8->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.17.0)\n", + "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in ./llama-env/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (0.5.0)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in ./llama-env/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.2.2)\n", + "Installing collected packages: torch\n", + " Attempting uninstall: torch\n", + " Found existing installation: torch 2.1.0\n", + " Uninstalling torch-2.1.0:\n", + " Successfully uninstalled torch-2.1.0\n", + "Successfully installed torch-1.13.1\n", + "Successfully installed packages from /home/oamontoy/.local/workspace/requirements.txt.\n", + "\n", + "New workspace directory structure:\n", + "workspace\n", + "├── logs\n", + "│ └── cnn_mnist\n", + "│ ├── events.out.tfevents.1695850586.M50CYP2SBSTD.111429.0\n", + "│ ├── events.out.tfevents.1695942084.M50CYP2SBSTD.4924.0\n", + "│ ├── events.out.tfevents.1695849809.M50CYP2SBSTD.107313.0\n", + "│ ├── events.out.tfevents.1695850472.M50CYP2SBSTD.110437.0\n", + "│ ├── events.out.tfevents.1695942744.M50CYP2SBSTD.15635.0\n", + "│ ├── events.out.tfevents.1696008244.M50CYP2SBSTD.98097.0\n", + "│ ├── events.out.tfevents.1695850981.M50CYP2SBSTD.114740.0\n", + "│ ├── events.out.tfevents.1695939101.M50CYP2SBSTD.143673.0\n", + "│ ├── events.out.tfevents.1695850850.M50CYP2SBSTD.113094.0\n", + "│ ├── events.out.tfevents.1695850404.M50CYP2SBSTD.109391.0\n", + "│ ├── events.out.tfevents.1695942232.M50CYP2SBSTD.7126.0\n", + "│ └── events.out.tfevents.1695849986.M50CYP2SBSTD.107937.0\n", + "├── .workspace\n", + "├── final_model.pth\n", + "├── plan\n", + "│ ├── plan.yaml\n", + "│ ├── defaults\n", + "│ ├── data.yaml\n", + "│ └── cols.yaml\n", + "├── agg_to_col_two_signed_cert.zip\n", + "├── requirements.txt\n", + "├── data\n", + "├── save\n", + "│ ├── torch_cnn_mnist_best.pbuf\n", + "│ ├── torch_cnn_mnist_last.pbuf\n", + "│ └── torch_cnn_mnist_init.pbuf\n", + "├── agg_to_col_one_signed_cert.zip\n", + "├── src\n", + "│ ├── pt_cnn.py\n", + "│ ├── mnist_utils.py\n", + "│ ├── __pycache__\n", + "│ │ ├── __init__.cpython-38.pyc\n", + "│ │ └── mnist_utils.cpython-38.pyc\n", + "│ ├── ptmnist_inmemory.py\n", + "│ └── __init__.py\n", + "└── cert\n", + "\n", + "8 directories, 30 files\n", + "Setting Up Certificate Authority...\n", + "\n", + "1. Create Root CA\n", + "1.1 Create Directories\n", + "1.2 Create Database\n", + "1.3 Create CA Request and Certificate\n", + "2. Create Signing Certificate\n", + "2.1 Create Directories\n", + "2.2 Create Database\n", + "2.3 Create Signing Certificate CSR\n", + "2.4 Sign Signing Certificate CSR\n", + "3 Create Certificate Chain\n", + "\n", + "Done.\n", + "Creating AGGREGATOR certificate key pair with following settings: CN=\u001b[31mm50cyp2sbstd\u001b[0m, SAN=\u001b[31mDNS:m50cyp2sbstd\u001b[0m\n", + " Writing AGGREGATOR certificate key pair to: \u001b[32m/home/oamontoy/workspace/cert/server\u001b[0m\n", + "The CSR Hash for file \u001b[32mserver/agg_m50cyp2sbstd.csr\u001b[0m = \u001b[31md49a1328c9e8ccfb65a4d583018704fd9d24b3301bb800ceb9f50b591937e1a5f8f419238b5e4c24af732693d37ce088\u001b[0m\n", + " Signing AGGREGATOR certificate\n", + "Creating COLLABORATOR certificate key pair with following settings: CN=\u001b[31mone\u001b[0m, SAN=\u001b[31mDNS:one\u001b[0m\n", + " Moving COLLABORATOR certificate to: \u001b[32m/home/oamontoy/workspace/cert/col_one\u001b[0m\n", + "The CSR Hash for file \u001b[32mcol_one.csr\u001b[0m = \u001b[31m0caea6371d4b13f51be51507794c4c18e0a9cb408f286f2f81a4b179380b15b3215e94d739ec952065fbc7eb3b2edbba\u001b[0m\n", + " Signing COLLABORATOR certificate\n", + "\n", + "Registering \u001b[32mone\u001b[0m in \u001b[32m/home/oamontoy/.local/workspace/plan/cols.yaml\u001b[0m\n", + "Creating COLLABORATOR certificate key pair with following settings: CN=\u001b[31mtwo\u001b[0m, SAN=\u001b[31mDNS:two\u001b[0m\n", + " Moving COLLABORATOR certificate to: \u001b[32m/home/oamontoy/workspace/cert/col_two\u001b[0m\n", + "The CSR Hash for file \u001b[32mcol_two.csr\u001b[0m = \u001b[31m3e6ffe3d25d39bb6f3f1fb851eb8da60d4cbf4e0bee78ad0f7731cc0e6bb47433830523f2c39dc0ca7f0ce79b69cc6c3\u001b[0m\n", + " Signing COLLABORATOR certificate\n", + "\n", + "Registering \u001b[32mtwo\u001b[0m in \u001b[32m/home/oamontoy/.local/workspace/plan/cols.yaml\u001b[0m\n" + ] + } + ], "source": [ "#Setup default workspace, logging, etc.\n", "fx.init('torch_cnn_mnist')" @@ -90,7 +215,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -134,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -162,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -221,7 +346,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -339,7 +464,7 @@ " loss = output.loss\n", " loss.backward()\n", " torch.nn.utils.clip_grad_norm_(self.model.parameters(),1.0)\n", - " self.model.step()\n", + " self.model.zero_grad()\n", " losses.append(loss.detach().cpu().numpy())\n", " loss = np.mean(losses)\n", " if self.model.config.problem_type == \"regression\":\n", @@ -379,9 +504,54 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_159004/1723172838.py:2: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n", + " metric = load_metric('glue', \"mrpc\")\n", + "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ] + }, + { + "data": { + "text/html": [ + "
[08:48:31] WARNING  tried to remove tensor: __opt_state_needed not present in the tensor dict                                                       utils.py:172\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[08:48:31]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m tried to remove tensor: __opt_state_needed not present in the tensor dict \u001b]8;id=932122;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=685149;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py#172\u001b\\\u001b[2m172\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ] + }, + { + "data": { + "text/html": [ + "
[08:48:34] WARNING  tried to remove tensor: __opt_state_needed not present in the tensor dict                                                       utils.py:172\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[08:48:34]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m tried to remove tensor: __opt_state_needed not present in the tensor dict \u001b]8;id=38894;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=682120;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py#172\u001b\\\u001b[2m172\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "num_collaborators = 2\n", "metric = load_metric('glue', \"mrpc\")\n", @@ -397,9 +567,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original training data size: 3668\n", + "Original validation data size: 1725\n", + "\n", + "Collaborator 0's training data size: 1834\n", + "Collaborator 0's validation data size: 863\n", + "\n", + "Collaborator 1's training data size: 1834\n", + "Collaborator 1's validation data size: 862\n", + "\n" + ] + } + ], "source": [ "#Original TinyImageNet dataset\n", "print(f'Original training data size: {len(fl_data.train_set)}')\n", @@ -417,9 +603,329 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
           INFO     Updating aggregator.settings.rounds_to_train to 3...                                                                           native.py:102\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Updating aggregator.settings.rounds_to_train to \u001b[1;36m3\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=22181;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py\u001b\\\u001b[2mnative.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=265323;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py#102\u001b\\\u001b[2m102\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     FL-Plan hash is 86b08340e96ba9e485169da1f860ea968811d1bf2e6867774fae4398426dd33c6ae56ca202002d393e3a4d91f946c1bc                 plan.py:235\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m FL-Plan hash is \u001b[34m86b08340e96ba9e485169da1f860ea968811d1bf2e6867774fae4398426dd33c6ae56ca202002d393e3a4d91f946c1bc\u001b[0m \u001b]8;id=965130;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=969631;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#235\u001b\\\u001b[2m235\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Building 🡆 Object NoCompressionPipeline from openfl.pipelines Module.                                                            plan.py:171\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mNoCompressionPipeline\u001b[0m from \u001b[31mopenfl.pipelines\u001b[0m Module. \u001b]8;id=418157;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=469081;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Creating Initial Weights File    🠆 save/torch_cnn_mnist_init.pbuf                                                              native.py:277\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Creating Initial Weights File 🠆 save/torch_cnn_mnist_init.pbuf \u001b]8;id=949808;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py\u001b\\\u001b[2mnative.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=806002;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py#277\u001b\\\u001b[2m277\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Starting Experiment...                                                                                                         native.py:281\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Starting Experiment\u001b[33m...\u001b[0m \u001b]8;id=768304;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py\u001b\\\u001b[2mnative.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=820559;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py#281\u001b\\\u001b[2m281\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Building 🡆 Object RandomGroupedAssigner from openfl.component Module.                                                            plan.py:171\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mRandomGroupedAssigner\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=959697;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=588178;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Building 🡆 Object CutoffTimeBasedStragglerHandling from openfl.component.straggler_handling_functions Module.                    plan.py:171\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mCutoffTimeBasedStragglerHandling\u001b[0m from \u001b[31mopenfl.component.straggler_handling_functions\u001b[0m Module. \u001b]8;id=199471;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=897448;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Importing 🡆 Object write_metric from src.mnist_utils Module.                                                                     plan.py:199\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Importing \u001b[31m🡆\u001b[0m Object \u001b[31mwrite_metric\u001b[0m from \u001b[31msrc.mnist_utils\u001b[0m Module. \u001b]8;id=125418;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=640295;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#199\u001b\\\u001b[2m199\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/oamontoy/workspace/llama-env/lib/python3.8/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory\n", + " warn(f\"Failed to load image Python extension: {e}\")\n" + ] + }, + { + "data": { + "text/html": [ + "
[08:48:35] INFO     Building 🡆 Object Aggregator from openfl.component Module.                                                                       plan.py:171\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[08:48:35]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mAggregator\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=299428;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=109248;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Using custom log metric: <function write_metric at 0x7f5c68ad28b0>                                                          aggregator.py:97\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Using custom log metric: \u001b[1m<\u001b[0m\u001b[1;95mfunction\u001b[0m\u001b[39m write_metric at \u001b[0m\u001b[1;36m0x7f5c68ad28b0\u001b[0m\u001b[1m>\u001b[0m \u001b]8;id=655419;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=161984;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#97\u001b\\\u001b[2m97\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Building 🡆 Object Collaborator from openfl.component Module.                                                                     plan.py:171\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mCollaborator\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=390053;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=853120;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Building 🡆 Object Collaborator from openfl.component Module.                                                                     plan.py:171\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mCollaborator\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=612439;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=601521;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Waiting for tasks...                                                                                                     collaborator.py:178\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Waiting for tasks\u001b[33m...\u001b[0m \u001b]8;id=806029;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=928412;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#178\u001b\\\u001b[2m178\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Sending tasks to collaborator one for round 0                                                                              aggregator.py:329\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Sending tasks to collaborator one for round \u001b[1;36m0\u001b[0m \u001b]8;id=515202;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=103453;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#329\u001b\\\u001b[2m329\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Received the following tasks: ['aggregated_model_validation', 'train', 'locally_tuned_model_validation']                 collaborator.py:168\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Received the following tasks: \u001b[1m[\u001b[0m\u001b[32m'aggregated_model_validation'\u001b[0m, \u001b[32m'train'\u001b[0m, \u001b[32m'locally_tuned_model_validation'\u001b[0m\u001b[1m]\u001b[0m \u001b]8;id=266582;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=901781;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
[08:48:36] INFO     Using TaskRunner subclassing API                                                                                         collaborator.py:253\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[08:48:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Using TaskRunner subclassing API \u001b]8;id=457102;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=223379;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#253\u001b\\\u001b[2m253\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/oamontoy/workspace/sec-openfl/openfl/federated/task/runner_pt.py:284: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.)\n", + " new_state[k] = pt.from_numpy(tensor_dict.pop(k)).to(device)\n", + "You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n" + ] + }, + { + "data": { + "text/html": [ + "
[08:49:31] METRIC   Round 0, collaborator one is sending metric for task aggregated_model_validation: acc   0.673233                         collaborator.py:415\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[08:49:31]\u001b[0m\u001b[2;36m \u001b[0mMETRIC Round \u001b[1;36m0\u001b[0m, collaborator one is sending metric for task aggregated_model_validation: acc \u001b[1;36m0.673233\u001b[0m \u001b]8;id=512008;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=179476;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#415\u001b\\\u001b[2m415\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Collaborator one is sending task results for aggregated_model_validation, round 0                                          aggregator.py:520\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Collaborator one is sending task results for aggregated_model_validation, round \u001b[1;36m0\u001b[0m \u001b]8;id=921737;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=943670;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#520\u001b\\\u001b[2m520\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           METRIC   Round 0, collaborator validate_agg aggregated_model_validation result acc:      0.673233                                   aggregator.py:559\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0mMETRIC Round \u001b[1;36m0\u001b[0m, collaborator validate_agg aggregated_model_validation result acc: \u001b[1;36m0.673233\u001b[0m \u001b]8;id=778930;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=605180;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#559\u001b\\\u001b[2m559\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Using TaskRunner subclassing API                                                                                         collaborator.py:253\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Using TaskRunner subclassing API \u001b]8;id=239383;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=814426;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#253\u001b\\\u001b[2m253\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Run 0 epoch of 0 round                                                                                                      runner_pt.py:155\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Run \u001b[1;36m0\u001b[0m epoch of \u001b[1;36m0\u001b[0m round \u001b]8;id=118270;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/task/runner_pt.py\u001b\\\u001b[2mrunner_pt.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=848215;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/task/runner_pt.py#155\u001b\\\u001b[2m155\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "ename": "AttributeError", + "evalue": "'RobertaForSequenceClassification' object has no attribute 'step'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/peft_model.py:434\u001b[0m, in \u001b[0;36mPeftModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 433\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 434\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__getattr__\u001b[39;49m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 435\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n", + "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/torch/nn/modules/module.py:1695\u001b[0m, in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1688\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparameters\u001b[39m(\u001b[39mself\u001b[39m, recurse: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterator[Parameter]:\n\u001b[1;32m 1689\u001b[0m \u001b[39m \u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"Returns an iterator over module parameters.\u001b[39;00m\n\u001b[1;32m 1690\u001b[0m \n\u001b[1;32m 1691\u001b[0m \u001b[39m This is typically passed to an optimizer.\u001b[39;00m\n\u001b[1;32m 1692\u001b[0m \n\u001b[1;32m 1693\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m 1694\u001b[0m \u001b[39m recurse (bool): if True, then yields parameters of this module\u001b[39;00m\n\u001b[0;32m-> 1695\u001b[0m \u001b[39m and all submodules. Otherwise, yields only parameters that\u001b[39;00m\n\u001b[1;32m 1696\u001b[0m \u001b[39m are direct members of this module.\u001b[39;00m\n\u001b[1;32m 1697\u001b[0m \n\u001b[1;32m 1698\u001b[0m \u001b[39m Yields:\u001b[39;00m\n\u001b[1;32m 1699\u001b[0m \u001b[39m Parameter: module parameter\u001b[39;00m\n\u001b[1;32m 1700\u001b[0m \n\u001b[1;32m 1701\u001b[0m \u001b[39m Example::\u001b[39;00m\n\u001b[1;32m 1702\u001b[0m \n\u001b[1;32m 1703\u001b[0m \u001b[39m >>> # xdoctest: +SKIP(\"undefined vars\")\u001b[39;00m\n\u001b[1;32m 1704\u001b[0m \u001b[39m >>> for param in model.parameters():\u001b[39;00m\n\u001b[1;32m 1705\u001b[0m \u001b[39m >>> print(type(param), param.size())\u001b[39;00m\n\u001b[1;32m 1706\u001b[0m \u001b[39m (20L,)\u001b[39;00m\n\u001b[1;32m 1707\u001b[0m \u001b[39m (20L, 1L, 5L, 5L)\u001b[39;00m\n\u001b[1;32m 1708\u001b[0m \n\u001b[1;32m 1709\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 1710\u001b[0m \u001b[39mfor\u001b[39;00m name, param \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnamed_parameters(recurse\u001b[39m=\u001b[39mrecurse):\n", + "\u001b[0;31mAttributeError\u001b[0m: 'PeftModelForSequenceClassification' object has no attribute 'step'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/tuners/lora.py:492\u001b[0m, in \u001b[0;36mLoraModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 491\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 492\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__getattr__\u001b[39;49m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 493\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n", + "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/torch/nn/modules/module.py:1695\u001b[0m, in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1688\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparameters\u001b[39m(\u001b[39mself\u001b[39m, recurse: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterator[Parameter]:\n\u001b[1;32m 1689\u001b[0m \u001b[39m \u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"Returns an iterator over module parameters.\u001b[39;00m\n\u001b[1;32m 1690\u001b[0m \n\u001b[1;32m 1691\u001b[0m \u001b[39m This is typically passed to an optimizer.\u001b[39;00m\n\u001b[1;32m 1692\u001b[0m \n\u001b[1;32m 1693\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m 1694\u001b[0m \u001b[39m recurse (bool): if True, then yields parameters of this module\u001b[39;00m\n\u001b[0;32m-> 1695\u001b[0m \u001b[39m and all submodules. Otherwise, yields only parameters that\u001b[39;00m\n\u001b[1;32m 1696\u001b[0m \u001b[39m are direct members of this module.\u001b[39;00m\n\u001b[1;32m 1697\u001b[0m \n\u001b[1;32m 1698\u001b[0m \u001b[39m Yields:\u001b[39;00m\n\u001b[1;32m 1699\u001b[0m \u001b[39m Parameter: module parameter\u001b[39;00m\n\u001b[1;32m 1700\u001b[0m \n\u001b[1;32m 1701\u001b[0m \u001b[39m Example::\u001b[39;00m\n\u001b[1;32m 1702\u001b[0m \n\u001b[1;32m 1703\u001b[0m \u001b[39m >>> # xdoctest: +SKIP(\"undefined vars\")\u001b[39;00m\n\u001b[1;32m 1704\u001b[0m \u001b[39m >>> for param in model.parameters():\u001b[39;00m\n\u001b[1;32m 1705\u001b[0m \u001b[39m >>> print(type(param), param.size())\u001b[39;00m\n\u001b[1;32m 1706\u001b[0m \u001b[39m (20L,)\u001b[39;00m\n\u001b[1;32m 1707\u001b[0m \u001b[39m (20L, 1L, 5L, 5L)\u001b[39;00m\n\u001b[1;32m 1708\u001b[0m \n\u001b[1;32m 1709\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 1710\u001b[0m \u001b[39mfor\u001b[39;00m name, param \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnamed_parameters(recurse\u001b[39m=\u001b[39mrecurse):\n", + "\u001b[0;31mAttributeError\u001b[0m: 'LoraModel' object has no attribute 'step'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/home/oamontoy/workspace/sec-openfl/openfl-tutorials/Federated_PyTorch_LLM.ipynb Cell 18\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 1\u001b[0m \u001b[39m#Run experiment, return trained FederatedModel\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m final_fl_model \u001b[39m=\u001b[39m fx\u001b[39m.\u001b[39;49mrun_experiment(collaborators,{\u001b[39m'\u001b[39;49m\u001b[39maggregator.settings.rounds_to_train\u001b[39;49m\u001b[39m'\u001b[39;49m:\u001b[39m3\u001b[39;49m})\n", + "File \u001b[0;32m~/workspace/sec-openfl/openfl/native/native.py:295\u001b[0m, in \u001b[0;36mrun_experiment\u001b[0;34m(collaborator_dict, override_config)\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m plan\u001b[39m.\u001b[39mauthorized_cols:\n\u001b[1;32m 294\u001b[0m collaborator \u001b[39m=\u001b[39m collaborators[col]\n\u001b[0;32m--> 295\u001b[0m collaborator\u001b[39m.\u001b[39;49mrun_simulation()\n\u001b[1;32m 297\u001b[0m \u001b[39m# Set the weights for the final model\u001b[39;00m\n\u001b[1;32m 298\u001b[0m model\u001b[39m.\u001b[39mrebuild_model(\n\u001b[1;32m 299\u001b[0m rounds_to_train \u001b[39m-\u001b[39m \u001b[39m1\u001b[39m, aggregator\u001b[39m.\u001b[39mlast_tensor_dict, validation\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n", + "File \u001b[0;32m~/workspace/sec-openfl/openfl/component/collaborator/collaborator.py:170\u001b[0m, in \u001b[0;36mCollaborator.run_simulation\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlogger\u001b[39m.\u001b[39minfo(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mReceived the following tasks: \u001b[39m\u001b[39m{\u001b[39;00mtasks\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m)\n\u001b[1;32m 169\u001b[0m \u001b[39mfor\u001b[39;00m task \u001b[39min\u001b[39;00m tasks:\n\u001b[0;32m--> 170\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdo_task(task, round_number)\n\u001b[1;32m 171\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlogger\u001b[39m.\u001b[39minfo(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mAll tasks completed on \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcollaborator_name\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 172\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mfor round \u001b[39m\u001b[39m{\u001b[39;00mround_number\u001b[39m}\u001b[39;00m\u001b[39m...\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 173\u001b[0m \u001b[39mbreak\u001b[39;00m\n", + "File \u001b[0;32m~/workspace/sec-openfl/openfl/component/collaborator/collaborator.py:255\u001b[0m, in \u001b[0;36mCollaborator.do_task\u001b[0;34m(self, task, round_number)\u001b[0m\n\u001b[1;32m 252\u001b[0m func \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtask_runner, func_name)\n\u001b[1;32m 253\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlogger\u001b[39m.\u001b[39minfo(\u001b[39m'\u001b[39m\u001b[39mUsing TaskRunner subclassing API\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m--> 255\u001b[0m global_output_tensor_dict, local_output_tensor_dict \u001b[39m=\u001b[39m func(\n\u001b[1;32m 256\u001b[0m col_name\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mcollaborator_name,\n\u001b[1;32m 257\u001b[0m round_num\u001b[39m=\u001b[39;49mround_number,\n\u001b[1;32m 258\u001b[0m input_tensor_dict\u001b[39m=\u001b[39;49minput_tensor_dict,\n\u001b[1;32m 259\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 261\u001b[0m \u001b[39m# Save global and local output_tensor_dicts to TensorDB\u001b[39;00m\n\u001b[1;32m 262\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtensor_db\u001b[39m.\u001b[39mcache_tensor(global_output_tensor_dict)\n", + "File \u001b[0;32m~/workspace/sec-openfl/openfl/federated/task/runner_pt.py:159\u001b[0m, in \u001b[0;36mPyTorchTaskRunner.train_batches\u001b[0;34m(self, col_name, round_num, input_tensor_dict, use_tqdm, epochs, **kwargs)\u001b[0m\n\u001b[1;32m 157\u001b[0m \u001b[39mif\u001b[39;00m use_tqdm:\n\u001b[1;32m 158\u001b[0m loader \u001b[39m=\u001b[39m tqdm\u001b[39m.\u001b[39mtqdm(loader, desc\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mtrain epoch\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m--> 159\u001b[0m metric \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtrain_epoch(loader)\n\u001b[1;32m 160\u001b[0m \u001b[39m# Output metric tensors (scalar)\u001b[39;00m\n\u001b[1;32m 161\u001b[0m origin \u001b[39m=\u001b[39m col_name\n", + "\u001b[1;32m/home/oamontoy/workspace/sec-openfl/openfl-tutorials/Federated_PyTorch_LLM.ipynb Cell 18\u001b[0m line \u001b[0;36m1\n\u001b[1;32m 113\u001b[0m loss\u001b[39m.\u001b[39mbackward()\n\u001b[1;32m 114\u001b[0m torch\u001b[39m.\u001b[39mnn\u001b[39m.\u001b[39mutils\u001b[39m.\u001b[39mclip_grad_norm_(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmodel\u001b[39m.\u001b[39mparameters(),\u001b[39m1.0\u001b[39m)\n\u001b[0;32m--> 115\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmodel\u001b[39m.\u001b[39;49mstep()\n\u001b[1;32m 116\u001b[0m losses\u001b[39m.\u001b[39mappend(loss\u001b[39m.\u001b[39mdetach()\u001b[39m.\u001b[39mcpu()\u001b[39m.\u001b[39mnumpy())\n\u001b[1;32m 117\u001b[0m loss \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mmean(losses)\n", + "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/peft_model.py:436\u001b[0m, in \u001b[0;36mPeftModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 434\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__getattr__\u001b[39m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 435\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n\u001b[0;32m--> 436\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mbase_model, name)\n", + "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/tuners/lora.py:494\u001b[0m, in \u001b[0;36mLoraModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__getattr__\u001b[39m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 493\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n\u001b[0;32m--> 494\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmodel, name)\n", + "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/torch/nn/modules/module.py:1695\u001b[0m, in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1688\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparameters\u001b[39m(\u001b[39mself\u001b[39m, recurse: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterator[Parameter]:\n\u001b[1;32m 1689\u001b[0m \u001b[39m \u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"Returns an iterator over module parameters.\u001b[39;00m\n\u001b[1;32m 1690\u001b[0m \n\u001b[1;32m 1691\u001b[0m \u001b[39m This is typically passed to an optimizer.\u001b[39;00m\n\u001b[1;32m 1692\u001b[0m \n\u001b[1;32m 1693\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m 1694\u001b[0m \u001b[39m recurse (bool): if True, then yields parameters of this module\u001b[39;00m\n\u001b[0;32m-> 1695\u001b[0m \u001b[39m and all submodules. Otherwise, yields only parameters that\u001b[39;00m\n\u001b[1;32m 1696\u001b[0m \u001b[39m are direct members of this module.\u001b[39;00m\n\u001b[1;32m 1697\u001b[0m \n\u001b[1;32m 1698\u001b[0m \u001b[39m Yields:\u001b[39;00m\n\u001b[1;32m 1699\u001b[0m \u001b[39m Parameter: module parameter\u001b[39;00m\n\u001b[1;32m 1700\u001b[0m \n\u001b[1;32m 1701\u001b[0m \u001b[39m Example::\u001b[39;00m\n\u001b[1;32m 1702\u001b[0m \n\u001b[1;32m 1703\u001b[0m \u001b[39m >>> # xdoctest: +SKIP(\"undefined vars\")\u001b[39;00m\n\u001b[1;32m 1704\u001b[0m \u001b[39m >>> for param in model.parameters():\u001b[39;00m\n\u001b[1;32m 1705\u001b[0m \u001b[39m >>> print(type(param), param.size())\u001b[39;00m\n\u001b[1;32m 1706\u001b[0m \u001b[39m (20L,)\u001b[39;00m\n\u001b[1;32m 1707\u001b[0m \u001b[39m (20L, 1L, 5L, 5L)\u001b[39;00m\n\u001b[1;32m 1708\u001b[0m \n\u001b[1;32m 1709\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 1710\u001b[0m \u001b[39mfor\u001b[39;00m name, param \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnamed_parameters(recurse\u001b[39m=\u001b[39mrecurse):\n\u001b[1;32m 1711\u001b[0m \u001b[39myield\u001b[39;00m param\n", + "\u001b[0;31mAttributeError\u001b[0m: 'RobertaForSequenceClassification' object has no attribute 'step'" + ] + } + ], "source": [ "#Run experiment, return trained FederatedModel\n", "final_fl_model = fx.run_experiment(collaborators,{'aggregator.settings.rounds_to_train':3})" From 3cacb715548646461bd9d19bb3c814a4f098af0d Mon Sep 17 00:00:00 2001 From: porteratzo Date: Tue, 17 Oct 2023 10:57:43 -0700 Subject: [PATCH 3/4] fixes --- openfl-tutorials/Federated_PyTorch_LLM.ipynb | 680 +++---------------- 1 file changed, 105 insertions(+), 575 deletions(-) diff --git a/openfl-tutorials/Federated_PyTorch_LLM.ipynb b/openfl-tutorials/Federated_PyTorch_LLM.ipynb index ca41b3c13d..78b9978604 100644 --- a/openfl-tutorials/Federated_PyTorch_LLM.ipynb +++ b/openfl-tutorials/Federated_PyTorch_LLM.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -39,9 +39,8 @@ "import openfl.native as fx\n", "import torch\n", "import torch as pt\n", - "from accelerate import Accelerator\n", "from datasets import Dataset, load_dataset, load_metric\n", - "from openfl.federated import PyTorchTaskRunner, TaskRunner\n", + "from openfl.federated import PyTorchTaskRunner\n", "from openfl.federated.task.runner_pt import change_tags\n", "from openfl.utilities import Metric, TensorKey\n", "from openfl.utilities.data_splitters import EqualNumPyDataSplitter\n", @@ -52,9 +51,9 @@ "from torch.utils.data import DataLoader\n", "from tqdm import tqdm\n", "import torch.nn as nn\n", - "\n", - "from transformers import (AutoConfig, AutoModelForSequenceClassification,\n", - " AutoTokenizer, DataCollatorWithPadding)" + "from transformers.trainer_pt_utils import get_parameter_names\n", + "from transformers import (AutoModelForSequenceClassification,\n", + " AutoTokenizer, DataCollatorWithPadding, get_scheduler)" ] }, { @@ -66,134 +65,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating Workspace Directories\n", - "Creating Workspace Templates\n", - "Collecting torch==1.13.1 (from -r /home/oamontoy/.local/workspace/requirements.txt (line 1))\n", - " Using cached torch-1.13.1-cp38-cp38-manylinux1_x86_64.whl (887.4 MB)\n", - "Requirement already satisfied: torchvision==0.14.1 in ./llama-env/lib/python3.8/site-packages (from -r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (0.14.1)\n", - "Requirement already satisfied: tensorboard in ./llama-env/lib/python3.8/site-packages (from -r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.14.0)\n", - "Requirement already satisfied: wheel>=0.38.0 in ./llama-env/lib/python3.8/site-packages (from -r /home/oamontoy/.local/workspace/requirements.txt (line 4)) (0.41.2)\n", - "Requirement already satisfied: typing-extensions in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (4.8.0)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (11.7.99)\n", - "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (8.5.0.96)\n", - "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (11.10.3.66)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (11.7.99)\n", - "Requirement already satisfied: numpy in ./llama-env/lib/python3.8/site-packages (from torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (1.24.4)\n", - "Requirement already satisfied: requests in ./llama-env/lib/python3.8/site-packages (from torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (2.31.0)\n", - "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in ./llama-env/lib/python3.8/site-packages (from torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (10.0.1)\n", - "Requirement already satisfied: setuptools in ./llama-env/lib/python3.8/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (68.2.2)\n", - "Requirement already satisfied: absl-py>=0.4 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.0.0)\n", - "Requirement already satisfied: grpcio>=1.48.2 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.48.2)\n", - "Requirement already satisfied: google-auth<3,>=1.6.3 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.23.0)\n", - "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.0.0)\n", - "Requirement already satisfied: markdown>=2.6.8 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.4.4)\n", - "Requirement already satisfied: protobuf>=3.19.6 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.19.6)\n", - "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (0.7.1)\n", - "Requirement already satisfied: werkzeug>=1.0.1 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.3.7)\n", - "Requirement already satisfied: cachetools<6.0,>=2.0.0 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (5.3.1)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (0.3.0)\n", - "Requirement already satisfied: rsa<5,>=3.1.4 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (4.9)\n", - "Requirement already satisfied: urllib3<2.0 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.26.16)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in ./llama-env/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.3.1)\n", - "Requirement already satisfied: six>=1.5.2 in ./llama-env/lib/python3.8/site-packages (from grpcio>=1.48.2->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.16.0)\n", - "Requirement already satisfied: importlib-metadata>=4.4 in ./llama-env/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (6.8.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in ./llama-env/lib/python3.8/site-packages (from requests->torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in ./llama-env/lib/python3.8/site-packages (from requests->torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (3.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in ./llama-env/lib/python3.8/site-packages (from requests->torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (2023.7.22)\n", - "Requirement already satisfied: MarkupSafe>=2.1.1 in ./llama-env/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.1.3)\n", - "Requirement already satisfied: zipp>=0.5 in ./llama-env/lib/python3.8/site-packages (from importlib-metadata>=4.4->markdown>=2.6.8->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.17.0)\n", - "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in ./llama-env/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (0.5.0)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in ./llama-env/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.2.2)\n", - "Installing collected packages: torch\n", - " Attempting uninstall: torch\n", - " Found existing installation: torch 2.1.0\n", - " Uninstalling torch-2.1.0:\n", - " Successfully uninstalled torch-2.1.0\n", - "Successfully installed torch-1.13.1\n", - "Successfully installed packages from /home/oamontoy/.local/workspace/requirements.txt.\n", - "\n", - "New workspace directory structure:\n", - "workspace\n", - "├── logs\n", - "│ └── cnn_mnist\n", - "│ ├── events.out.tfevents.1695850586.M50CYP2SBSTD.111429.0\n", - "│ ├── events.out.tfevents.1695942084.M50CYP2SBSTD.4924.0\n", - "│ ├── events.out.tfevents.1695849809.M50CYP2SBSTD.107313.0\n", - "│ ├── events.out.tfevents.1695850472.M50CYP2SBSTD.110437.0\n", - "│ ├── events.out.tfevents.1695942744.M50CYP2SBSTD.15635.0\n", - "│ ├── events.out.tfevents.1696008244.M50CYP2SBSTD.98097.0\n", - "│ ├── events.out.tfevents.1695850981.M50CYP2SBSTD.114740.0\n", - "│ ├── events.out.tfevents.1695939101.M50CYP2SBSTD.143673.0\n", - "│ ├── events.out.tfevents.1695850850.M50CYP2SBSTD.113094.0\n", - "│ ├── events.out.tfevents.1695850404.M50CYP2SBSTD.109391.0\n", - "│ ├── events.out.tfevents.1695942232.M50CYP2SBSTD.7126.0\n", - "│ └── events.out.tfevents.1695849986.M50CYP2SBSTD.107937.0\n", - "├── .workspace\n", - "├── final_model.pth\n", - "├── plan\n", - "│ ├── plan.yaml\n", - "│ ├── defaults\n", - "│ ├── data.yaml\n", - "│ └── cols.yaml\n", - "├── agg_to_col_two_signed_cert.zip\n", - "├── requirements.txt\n", - "├── data\n", - "├── save\n", - "│ ├── torch_cnn_mnist_best.pbuf\n", - "│ ├── torch_cnn_mnist_last.pbuf\n", - "│ └── torch_cnn_mnist_init.pbuf\n", - "├── agg_to_col_one_signed_cert.zip\n", - "├── src\n", - "│ ├── pt_cnn.py\n", - "│ ├── mnist_utils.py\n", - "│ ├── __pycache__\n", - "│ │ ├── __init__.cpython-38.pyc\n", - "│ │ └── mnist_utils.cpython-38.pyc\n", - "│ ├── ptmnist_inmemory.py\n", - "│ └── __init__.py\n", - "└── cert\n", - "\n", - "8 directories, 30 files\n", - "Setting Up Certificate Authority...\n", - "\n", - "1. Create Root CA\n", - "1.1 Create Directories\n", - "1.2 Create Database\n", - "1.3 Create CA Request and Certificate\n", - "2. Create Signing Certificate\n", - "2.1 Create Directories\n", - "2.2 Create Database\n", - "2.3 Create Signing Certificate CSR\n", - "2.4 Sign Signing Certificate CSR\n", - "3 Create Certificate Chain\n", - "\n", - "Done.\n", - "Creating AGGREGATOR certificate key pair with following settings: CN=\u001b[31mm50cyp2sbstd\u001b[0m, SAN=\u001b[31mDNS:m50cyp2sbstd\u001b[0m\n", - " Writing AGGREGATOR certificate key pair to: \u001b[32m/home/oamontoy/workspace/cert/server\u001b[0m\n", - "The CSR Hash for file \u001b[32mserver/agg_m50cyp2sbstd.csr\u001b[0m = \u001b[31md49a1328c9e8ccfb65a4d583018704fd9d24b3301bb800ceb9f50b591937e1a5f8f419238b5e4c24af732693d37ce088\u001b[0m\n", - " Signing AGGREGATOR certificate\n", - "Creating COLLABORATOR certificate key pair with following settings: CN=\u001b[31mone\u001b[0m, SAN=\u001b[31mDNS:one\u001b[0m\n", - " Moving COLLABORATOR certificate to: \u001b[32m/home/oamontoy/workspace/cert/col_one\u001b[0m\n", - "The CSR Hash for file \u001b[32mcol_one.csr\u001b[0m = \u001b[31m0caea6371d4b13f51be51507794c4c18e0a9cb408f286f2f81a4b179380b15b3215e94d739ec952065fbc7eb3b2edbba\u001b[0m\n", - " Signing COLLABORATOR certificate\n", - "\n", - "Registering \u001b[32mone\u001b[0m in \u001b[32m/home/oamontoy/.local/workspace/plan/cols.yaml\u001b[0m\n", - "Creating COLLABORATOR certificate key pair with following settings: CN=\u001b[31mtwo\u001b[0m, SAN=\u001b[31mDNS:two\u001b[0m\n", - " Moving COLLABORATOR certificate to: \u001b[32m/home/oamontoy/workspace/cert/col_two\u001b[0m\n", - "The CSR Hash for file \u001b[32mcol_two.csr\u001b[0m = \u001b[31m3e6ffe3d25d39bb6f3f1fb851eb8da60d4cbf4e0bee78ad0f7731cc0e6bb47433830523f2c39dc0ca7f0ce79b69cc6c3\u001b[0m\n", - " Signing COLLABORATOR certificate\n", - "\n", - "Registering \u001b[32mtwo\u001b[0m in \u001b[32m/home/oamontoy/.local/workspace/plan/cols.yaml\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "#Setup default workspace, logging, etc.\n", "fx.init('torch_cnn_mnist')" @@ -215,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -242,7 +116,7 @@ " data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding=\"longest\")\n", " return data_collator, tokenized_datasets\n", "\n", - "base_model_name = \"roberta-large\"\n", + "base_model_name = \"roberta-base\"\n", "padding_side = \"right\"\n", "tokenizer = AutoTokenizer.from_pretrained(base_model_name, padding_side=padding_side)\n", "if getattr(tokenizer, \"pad_token_id\") is None:\n", @@ -259,7 +133,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -287,13 +161,13 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class GlueMrpcFederatedDataset(DataLoader):\n", " def __init__(self, train_set, valid_set, batch_size, data_collator=None):\n", - " self.data_splitter = EqualNumPyDataSplitter()\n", + " self.data_splitter = EqualNumPyDataSplitter(shuffle=True)\n", " if isinstance(train_set,Dataset):\n", " self.train_set = GlueMrpc.from_dict(train_set.to_dict())\n", " else:\n", @@ -326,7 +200,7 @@ " return DataLoader(self.train_set, batch_size=self.batch_size, collate_fn=data_collator)\n", " \n", " def get_valid_loader(self):\n", - " return DataLoader(self.valid_set, collate_fn=data_collator)\n", + " return DataLoader(self.valid_set, batch_size=self.batch_size, collate_fn=data_collator)\n", " \n", " def get_train_data_size(self):\n", " return len(self.train_set)\n", @@ -334,7 +208,8 @@ " def get_valid_data_size(self):\n", " return len(self.valid_set)\n", " \n", - "fl_data = GlueMrpcFederatedDataset(train_set, valid_set, batch_size=32)" + "fl_data = GlueMrpcFederatedDataset(train_set, valid_set, batch_size=32)\n", + "metric = load_metric('glue', \"mrpc\")" ] }, { @@ -346,52 +221,82 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class LLMTaskRunner(PyTorchTaskRunner):\n", - " def __init__(self, base_model_name, data_loader, device=None, metric=None, **kwargs):\n", - " kwargs['data_loader'] = data_loader\n", + " def __init__(\n", + " self, base_model_name, data_loader, device=None, metric=None, **kwargs\n", + " ):\n", + " kwargs[\"data_loader\"] = data_loader\n", " super().__init__(device, **kwargs)\n", " self.base_model_name = base_model_name\n", " self.metric = metric\n", " self._init_model()\n", " self._init_optimizer()\n", - " \n", + " self.save_models = []\n", + "\n", " def _init_model(self):\n", " model = AutoModelForSequenceClassification.from_pretrained(\n", - " self.base_model_name, return_dict=True)\n", - " peft_config = LoraConfig(task_type=TaskType.SEQ_CLS, inference_mode=False, r=16, lora_alpha=16, lora_dropout=0.1, bias=\"all\")\n", + " self.base_model_name, return_dict=True\n", + " )\n", + " peft_config = LoraConfig(\n", + " task_type=TaskType.SEQ_CLS,\n", + " inference_mode=False,\n", + " r=16,\n", + " lora_alpha=16,\n", + " lora_dropout=0.1,\n", + " bias=\"lora_only\",\n", + " )\n", " self.model = get_peft_model(model, peft_config)\n", - " \n", + "\n", " def _init_optimizer(self):\n", - " no_decay = [\"bias\", \"LayerNorm.weight\"]\n", + " ALL_LAYERNORM_LAYERS = [nn.LayerNorm]\n", + " decay_parameters = get_parameter_names(self.model, ALL_LAYERNORM_LAYERS)\n", + " decay_parameters = [name for name in decay_parameters if \"bias\" not in name]\n", + "\n", " optimizer_grouped_parameters = [\n", - " {\n", - " \"params\": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],\n", - " \"weight_decay\": 0.01,\n", - " },\n", - " {\n", - " \"params\": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],\n", - " \"weight_decay\": 0.0,\n", - " },\n", - " ]\n", - " self.optimizer = AdamW(optimizer_grouped_parameters, lr=0.01)\n", - " \n", + " {\n", + " \"params\": [\n", + " p\n", + " for n, p in self.model.named_parameters()\n", + " if (n in decay_parameters and p.requires_grad)\n", + " ],\n", + " \"weight_decay\": 0.01,\n", + " },\n", + " {\n", + " \"params\": [\n", + " p\n", + " for n, p in self.model.named_parameters()\n", + " if (n not in decay_parameters and p.requires_grad)\n", + " ],\n", + " \"weight_decay\": 0.0,\n", + " },\n", + " ]\n", + " self.optimizer = AdamW(optimizer_grouped_parameters, lr=0.001)\n", + " self.lr_scheduler = get_scheduler(\n", + " name=\"linear\",\n", + " optimizer=self.optimizer,\n", + " num_warmup_steps=0,\n", + " num_training_steps=len(self.data_loader.train_set) * 5,\n", + " )\n", + "\n", " self.training_round_completed = False\n", " self.initialize_tensorkeys_for_functions()\n", - " \n", + "\n", + " def train(self):\n", + " return self.model.train()\n", + "\n", " def state_dict(self):\n", " return get_peft_model_state_dict(self.model)\n", - " \n", + "\n", " def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):\n", - " return set_peft_model_state_dict(\n", - " self.model, state_dict\n", - " )\n", - " \n", - " def validate(self, col_name, round_num, input_tensor_dict,\n", - " use_tqdm=False, **kwargs):\n", + " return set_peft_model_state_dict(self.model, state_dict)\n", + "\n", + " def validate(\n", + " self, col_name, round_num, input_tensor_dict, use_tqdm=False, **kwargs\n", + " ):\n", " \"\"\"Validate.\n", "\n", " Run validation of the model on the local data.\n", @@ -407,40 +312,42 @@ " local_output_dict: Tensors to maintain in the local TensorDB\n", "\n", " \"\"\"\n", + " self.save_models.append(input_tensor_dict.copy())\n", " self.rebuild_model(round_num, input_tensor_dict, validation=True)\n", " self.model.eval()\n", + " \n", + "\n", " self.model.to(self.device)\n", " val_score = 0\n", " total_samples = 0\n", "\n", " loader = self.data_loader.get_valid_loader()\n", " if use_tqdm:\n", - " loader = tqdm(loader, desc='validate')\n", + " loader = tqdm(loader, desc=\"validate\")\n", "\n", " with pt.no_grad():\n", " for sample in loader:\n", - " samples = sample['input_ids'].shape[0]\n", + " samples = sample[\"input_ids\"].shape[0]\n", " total_samples += samples\n", " output = self.model(**sample)\n", " # get the index of the max log-probability\n", " logits = output.logits\n", " predictions = torch.argmax(logits, dim=-1)\n", - " metric.add_batch(predictions=predictions, references=sample['labels'])\n", - " val_score = metric.compute()['accuracy']\n", + " metric.add_batch(predictions=predictions, references=sample[\"labels\"])\n", + " val_score = metric.compute()[\"accuracy\"]\n", "\n", " origin = col_name\n", - " suffix = 'validate'\n", - " if kwargs['apply'] == 'local':\n", - " suffix += '_local'\n", + " suffix = \"validate\"\n", + " if kwargs[\"apply\"] == \"local\":\n", + " suffix += \"_local\"\n", " else:\n", - " suffix += '_agg'\n", - " tags = ('metric',)\n", + " suffix += \"_agg\"\n", + " tags = (\"metric\",)\n", " tags = change_tags(tags, add_field=suffix)\n", " # TODO figure out a better way to pass in metric for this pytorch\n", " # validate function\n", " output_tensor_dict = {\n", - " TensorKey('acc', origin, round_num, True, tags):\n", - " np.array(val_score)\n", + " TensorKey(\"acc\", origin, round_num, True, tags): np.array(val_score)\n", " }\n", "\n", " # Empty list represents metrics that should only be stored locally\n", @@ -459,12 +366,13 @@ " \"\"\"\n", " losses = []\n", " for sample in batch_generator:\n", - " self.optimizer.zero_grad()\n", + " self.model.zero_grad()\n", " output = self.model(**sample)\n", " loss = output.loss\n", " loss.backward()\n", - " torch.nn.utils.clip_grad_norm_(self.model.parameters(),1.0)\n", - " self.model.zero_grad()\n", + " torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)\n", + " self.optimizer.step()\n", + " self.lr_scheduler.step()\n", " losses.append(loss.detach().cpu().numpy())\n", " loss = np.mean(losses)\n", " if self.model.config.problem_type == \"regression\":\n", @@ -474,10 +382,14 @@ " elif self.model.config.problem_type == \"multi_label_classification\":\n", " loss_fct = BCEWithLogitsLoss()\n", " return Metric(name=loss_fct._get_name(), value=np.array(loss))\n", - " \n", - " \n", - " def save_native(self, filepath, model_state_dict_key='model_state_dict',\n", - " optimizer_state_dict_key='optimizer_state_dict', **kwargs):\n", + "\n", + " def save_native(\n", + " self,\n", + " filepath,\n", + " model_state_dict_key=\"model_state_dict\",\n", + " optimizer_state_dict_key=\"optimizer_state_dict\",\n", + " **kwargs,\n", + " ):\n", " \"\"\"\n", " Save model and optimizer states in a picked file specified by the \\\n", " filepath. model_/optimizer_state_dicts are stored in the keys provided. \\\n", @@ -497,64 +409,18 @@ " \"\"\"\n", " pickle_dict = {\n", " model_state_dict_key: get_peft_model_state_dict(self.model),\n", - " optimizer_state_dict_key: self.optimizer.state_dict()\n", + " optimizer_state_dict_key: self.optimizer.state_dict(),\n", " }\n", " pt.save(pickle_dict, filepath)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_159004/1723172838.py:2: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n", - " metric = load_metric('glue', \"mrpc\")\n", - "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']\n", - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" - ] - }, - { - "data": { - "text/html": [ - "
[08:48:31] WARNING  tried to remove tensor: __opt_state_needed not present in the tensor dict                                                       utils.py:172\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[08:48:31]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m tried to remove tensor: __opt_state_needed not present in the tensor dict \u001b]8;id=932122;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=685149;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py#172\u001b\\\u001b[2m172\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']\n", - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" - ] - }, - { - "data": { - "text/html": [ - "
[08:48:34] WARNING  tried to remove tensor: __opt_state_needed not present in the tensor dict                                                       utils.py:172\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[08:48:34]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m tried to remove tensor: __opt_state_needed not present in the tensor dict \u001b]8;id=38894;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=682120;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py#172\u001b\\\u001b[2m172\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "num_collaborators = 2\n", - "metric = load_metric('glue', \"mrpc\")\n", "collaborator_models = [\n", " LLMTaskRunner(\n", " base_model_name,\n", @@ -567,25 +433,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Original training data size: 3668\n", - "Original validation data size: 1725\n", - "\n", - "Collaborator 0's training data size: 1834\n", - "Collaborator 0's validation data size: 863\n", - "\n", - "Collaborator 1's training data size: 1834\n", - "Collaborator 1's validation data size: 862\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "#Original TinyImageNet dataset\n", "print(f'Original training data size: {len(fl_data.train_set)}')\n", @@ -603,332 +453,12 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
           INFO     Updating aggregator.settings.rounds_to_train to 3...                                                                           native.py:102\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Updating aggregator.settings.rounds_to_train to \u001b[1;36m3\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=22181;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py\u001b\\\u001b[2mnative.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=265323;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py#102\u001b\\\u001b[2m102\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     FL-Plan hash is 86b08340e96ba9e485169da1f860ea968811d1bf2e6867774fae4398426dd33c6ae56ca202002d393e3a4d91f946c1bc                 plan.py:235\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m FL-Plan hash is \u001b[34m86b08340e96ba9e485169da1f860ea968811d1bf2e6867774fae4398426dd33c6ae56ca202002d393e3a4d91f946c1bc\u001b[0m \u001b]8;id=965130;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=969631;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#235\u001b\\\u001b[2m235\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Building 🡆 Object NoCompressionPipeline from openfl.pipelines Module.                                                            plan.py:171\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mNoCompressionPipeline\u001b[0m from \u001b[31mopenfl.pipelines\u001b[0m Module. \u001b]8;id=418157;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=469081;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Creating Initial Weights File    🠆 save/torch_cnn_mnist_init.pbuf                                                              native.py:277\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Creating Initial Weights File 🠆 save/torch_cnn_mnist_init.pbuf \u001b]8;id=949808;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py\u001b\\\u001b[2mnative.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=806002;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py#277\u001b\\\u001b[2m277\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Starting Experiment...                                                                                                         native.py:281\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Starting Experiment\u001b[33m...\u001b[0m \u001b]8;id=768304;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py\u001b\\\u001b[2mnative.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=820559;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py#281\u001b\\\u001b[2m281\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Building 🡆 Object RandomGroupedAssigner from openfl.component Module.                                                            plan.py:171\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mRandomGroupedAssigner\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=959697;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=588178;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Building 🡆 Object CutoffTimeBasedStragglerHandling from openfl.component.straggler_handling_functions Module.                    plan.py:171\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mCutoffTimeBasedStragglerHandling\u001b[0m from \u001b[31mopenfl.component.straggler_handling_functions\u001b[0m Module. \u001b]8;id=199471;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=897448;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Importing 🡆 Object write_metric from src.mnist_utils Module.                                                                     plan.py:199\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Importing \u001b[31m🡆\u001b[0m Object \u001b[31mwrite_metric\u001b[0m from \u001b[31msrc.mnist_utils\u001b[0m Module. \u001b]8;id=125418;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=640295;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#199\u001b\\\u001b[2m199\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/oamontoy/workspace/llama-env/lib/python3.8/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory\n", - " warn(f\"Failed to load image Python extension: {e}\")\n" - ] - }, - { - "data": { - "text/html": [ - "
[08:48:35] INFO     Building 🡆 Object Aggregator from openfl.component Module.                                                                       plan.py:171\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[08:48:35]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mAggregator\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=299428;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=109248;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Using custom log metric: <function write_metric at 0x7f5c68ad28b0>                                                          aggregator.py:97\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Using custom log metric: \u001b[1m<\u001b[0m\u001b[1;95mfunction\u001b[0m\u001b[39m write_metric at \u001b[0m\u001b[1;36m0x7f5c68ad28b0\u001b[0m\u001b[1m>\u001b[0m \u001b]8;id=655419;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=161984;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#97\u001b\\\u001b[2m97\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Building 🡆 Object Collaborator from openfl.component Module.                                                                     plan.py:171\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mCollaborator\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=390053;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=853120;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Building 🡆 Object Collaborator from openfl.component Module.                                                                     plan.py:171\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mCollaborator\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=612439;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=601521;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Waiting for tasks...                                                                                                     collaborator.py:178\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Waiting for tasks\u001b[33m...\u001b[0m \u001b]8;id=806029;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=928412;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#178\u001b\\\u001b[2m178\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Sending tasks to collaborator one for round 0                                                                              aggregator.py:329\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Sending tasks to collaborator one for round \u001b[1;36m0\u001b[0m \u001b]8;id=515202;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=103453;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#329\u001b\\\u001b[2m329\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Received the following tasks: ['aggregated_model_validation', 'train', 'locally_tuned_model_validation']                 collaborator.py:168\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Received the following tasks: \u001b[1m[\u001b[0m\u001b[32m'aggregated_model_validation'\u001b[0m, \u001b[32m'train'\u001b[0m, \u001b[32m'locally_tuned_model_validation'\u001b[0m\u001b[1m]\u001b[0m \u001b]8;id=266582;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=901781;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[08:48:36] INFO     Using TaskRunner subclassing API                                                                                         collaborator.py:253\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[08:48:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Using TaskRunner subclassing API \u001b]8;id=457102;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=223379;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#253\u001b\\\u001b[2m253\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/oamontoy/workspace/sec-openfl/openfl/federated/task/runner_pt.py:284: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.)\n", - " new_state[k] = pt.from_numpy(tensor_dict.pop(k)).to(device)\n", - "You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n" - ] - }, - { - "data": { - "text/html": [ - "
[08:49:31] METRIC   Round 0, collaborator one is sending metric for task aggregated_model_validation: acc   0.673233                         collaborator.py:415\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[08:49:31]\u001b[0m\u001b[2;36m \u001b[0mMETRIC Round \u001b[1;36m0\u001b[0m, collaborator one is sending metric for task aggregated_model_validation: acc \u001b[1;36m0.673233\u001b[0m \u001b]8;id=512008;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=179476;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#415\u001b\\\u001b[2m415\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Collaborator one is sending task results for aggregated_model_validation, round 0                                          aggregator.py:520\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Collaborator one is sending task results for aggregated_model_validation, round \u001b[1;36m0\u001b[0m \u001b]8;id=921737;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=943670;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#520\u001b\\\u001b[2m520\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           METRIC   Round 0, collaborator validate_agg aggregated_model_validation result acc:      0.673233                                   aggregator.py:559\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0mMETRIC Round \u001b[1;36m0\u001b[0m, collaborator validate_agg aggregated_model_validation result acc: \u001b[1;36m0.673233\u001b[0m \u001b]8;id=778930;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=605180;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#559\u001b\\\u001b[2m559\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Using TaskRunner subclassing API                                                                                         collaborator.py:253\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Using TaskRunner subclassing API \u001b]8;id=239383;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=814426;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#253\u001b\\\u001b[2m253\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Run 0 epoch of 0 round                                                                                                      runner_pt.py:155\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Run \u001b[1;36m0\u001b[0m epoch of \u001b[1;36m0\u001b[0m round \u001b]8;id=118270;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/task/runner_pt.py\u001b\\\u001b[2mrunner_pt.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=848215;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/task/runner_pt.py#155\u001b\\\u001b[2m155\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "ename": "AttributeError", - "evalue": "'RobertaForSequenceClassification' object has no attribute 'step'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/peft_model.py:434\u001b[0m, in \u001b[0;36mPeftModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 433\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 434\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__getattr__\u001b[39;49m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 435\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n", - "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/torch/nn/modules/module.py:1695\u001b[0m, in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1688\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparameters\u001b[39m(\u001b[39mself\u001b[39m, recurse: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterator[Parameter]:\n\u001b[1;32m 1689\u001b[0m \u001b[39m \u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"Returns an iterator over module parameters.\u001b[39;00m\n\u001b[1;32m 1690\u001b[0m \n\u001b[1;32m 1691\u001b[0m \u001b[39m This is typically passed to an optimizer.\u001b[39;00m\n\u001b[1;32m 1692\u001b[0m \n\u001b[1;32m 1693\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m 1694\u001b[0m \u001b[39m recurse (bool): if True, then yields parameters of this module\u001b[39;00m\n\u001b[0;32m-> 1695\u001b[0m \u001b[39m and all submodules. Otherwise, yields only parameters that\u001b[39;00m\n\u001b[1;32m 1696\u001b[0m \u001b[39m are direct members of this module.\u001b[39;00m\n\u001b[1;32m 1697\u001b[0m \n\u001b[1;32m 1698\u001b[0m \u001b[39m Yields:\u001b[39;00m\n\u001b[1;32m 1699\u001b[0m \u001b[39m Parameter: module parameter\u001b[39;00m\n\u001b[1;32m 1700\u001b[0m \n\u001b[1;32m 1701\u001b[0m \u001b[39m Example::\u001b[39;00m\n\u001b[1;32m 1702\u001b[0m \n\u001b[1;32m 1703\u001b[0m \u001b[39m >>> # xdoctest: +SKIP(\"undefined vars\")\u001b[39;00m\n\u001b[1;32m 1704\u001b[0m \u001b[39m >>> for param in model.parameters():\u001b[39;00m\n\u001b[1;32m 1705\u001b[0m \u001b[39m >>> print(type(param), param.size())\u001b[39;00m\n\u001b[1;32m 1706\u001b[0m \u001b[39m (20L,)\u001b[39;00m\n\u001b[1;32m 1707\u001b[0m \u001b[39m (20L, 1L, 5L, 5L)\u001b[39;00m\n\u001b[1;32m 1708\u001b[0m \n\u001b[1;32m 1709\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 1710\u001b[0m \u001b[39mfor\u001b[39;00m name, param \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnamed_parameters(recurse\u001b[39m=\u001b[39mrecurse):\n", - "\u001b[0;31mAttributeError\u001b[0m: 'PeftModelForSequenceClassification' object has no attribute 'step'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/tuners/lora.py:492\u001b[0m, in \u001b[0;36mLoraModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 491\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 492\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__getattr__\u001b[39;49m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 493\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n", - "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/torch/nn/modules/module.py:1695\u001b[0m, in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1688\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparameters\u001b[39m(\u001b[39mself\u001b[39m, recurse: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterator[Parameter]:\n\u001b[1;32m 1689\u001b[0m \u001b[39m \u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"Returns an iterator over module parameters.\u001b[39;00m\n\u001b[1;32m 1690\u001b[0m \n\u001b[1;32m 1691\u001b[0m \u001b[39m This is typically passed to an optimizer.\u001b[39;00m\n\u001b[1;32m 1692\u001b[0m \n\u001b[1;32m 1693\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m 1694\u001b[0m \u001b[39m recurse (bool): if True, then yields parameters of this module\u001b[39;00m\n\u001b[0;32m-> 1695\u001b[0m \u001b[39m and all submodules. Otherwise, yields only parameters that\u001b[39;00m\n\u001b[1;32m 1696\u001b[0m \u001b[39m are direct members of this module.\u001b[39;00m\n\u001b[1;32m 1697\u001b[0m \n\u001b[1;32m 1698\u001b[0m \u001b[39m Yields:\u001b[39;00m\n\u001b[1;32m 1699\u001b[0m \u001b[39m Parameter: module parameter\u001b[39;00m\n\u001b[1;32m 1700\u001b[0m \n\u001b[1;32m 1701\u001b[0m \u001b[39m Example::\u001b[39;00m\n\u001b[1;32m 1702\u001b[0m \n\u001b[1;32m 1703\u001b[0m \u001b[39m >>> # xdoctest: +SKIP(\"undefined vars\")\u001b[39;00m\n\u001b[1;32m 1704\u001b[0m \u001b[39m >>> for param in model.parameters():\u001b[39;00m\n\u001b[1;32m 1705\u001b[0m \u001b[39m >>> print(type(param), param.size())\u001b[39;00m\n\u001b[1;32m 1706\u001b[0m \u001b[39m (20L,)\u001b[39;00m\n\u001b[1;32m 1707\u001b[0m \u001b[39m (20L, 1L, 5L, 5L)\u001b[39;00m\n\u001b[1;32m 1708\u001b[0m \n\u001b[1;32m 1709\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 1710\u001b[0m \u001b[39mfor\u001b[39;00m name, param \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnamed_parameters(recurse\u001b[39m=\u001b[39mrecurse):\n", - "\u001b[0;31mAttributeError\u001b[0m: 'LoraModel' object has no attribute 'step'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/home/oamontoy/workspace/sec-openfl/openfl-tutorials/Federated_PyTorch_LLM.ipynb Cell 18\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 1\u001b[0m \u001b[39m#Run experiment, return trained FederatedModel\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m final_fl_model \u001b[39m=\u001b[39m fx\u001b[39m.\u001b[39;49mrun_experiment(collaborators,{\u001b[39m'\u001b[39;49m\u001b[39maggregator.settings.rounds_to_train\u001b[39;49m\u001b[39m'\u001b[39;49m:\u001b[39m3\u001b[39;49m})\n", - "File \u001b[0;32m~/workspace/sec-openfl/openfl/native/native.py:295\u001b[0m, in \u001b[0;36mrun_experiment\u001b[0;34m(collaborator_dict, override_config)\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m plan\u001b[39m.\u001b[39mauthorized_cols:\n\u001b[1;32m 294\u001b[0m collaborator \u001b[39m=\u001b[39m collaborators[col]\n\u001b[0;32m--> 295\u001b[0m collaborator\u001b[39m.\u001b[39;49mrun_simulation()\n\u001b[1;32m 297\u001b[0m \u001b[39m# Set the weights for the final model\u001b[39;00m\n\u001b[1;32m 298\u001b[0m model\u001b[39m.\u001b[39mrebuild_model(\n\u001b[1;32m 299\u001b[0m rounds_to_train \u001b[39m-\u001b[39m \u001b[39m1\u001b[39m, aggregator\u001b[39m.\u001b[39mlast_tensor_dict, validation\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n", - "File \u001b[0;32m~/workspace/sec-openfl/openfl/component/collaborator/collaborator.py:170\u001b[0m, in \u001b[0;36mCollaborator.run_simulation\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlogger\u001b[39m.\u001b[39minfo(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mReceived the following tasks: \u001b[39m\u001b[39m{\u001b[39;00mtasks\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m)\n\u001b[1;32m 169\u001b[0m \u001b[39mfor\u001b[39;00m task \u001b[39min\u001b[39;00m tasks:\n\u001b[0;32m--> 170\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdo_task(task, round_number)\n\u001b[1;32m 171\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlogger\u001b[39m.\u001b[39minfo(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mAll tasks completed on \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcollaborator_name\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 172\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mfor round \u001b[39m\u001b[39m{\u001b[39;00mround_number\u001b[39m}\u001b[39;00m\u001b[39m...\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 173\u001b[0m \u001b[39mbreak\u001b[39;00m\n", - "File \u001b[0;32m~/workspace/sec-openfl/openfl/component/collaborator/collaborator.py:255\u001b[0m, in \u001b[0;36mCollaborator.do_task\u001b[0;34m(self, task, round_number)\u001b[0m\n\u001b[1;32m 252\u001b[0m func \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtask_runner, func_name)\n\u001b[1;32m 253\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlogger\u001b[39m.\u001b[39minfo(\u001b[39m'\u001b[39m\u001b[39mUsing TaskRunner subclassing API\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m--> 255\u001b[0m global_output_tensor_dict, local_output_tensor_dict \u001b[39m=\u001b[39m func(\n\u001b[1;32m 256\u001b[0m col_name\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mcollaborator_name,\n\u001b[1;32m 257\u001b[0m round_num\u001b[39m=\u001b[39;49mround_number,\n\u001b[1;32m 258\u001b[0m input_tensor_dict\u001b[39m=\u001b[39;49minput_tensor_dict,\n\u001b[1;32m 259\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 261\u001b[0m \u001b[39m# Save global and local output_tensor_dicts to TensorDB\u001b[39;00m\n\u001b[1;32m 262\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtensor_db\u001b[39m.\u001b[39mcache_tensor(global_output_tensor_dict)\n", - "File \u001b[0;32m~/workspace/sec-openfl/openfl/federated/task/runner_pt.py:159\u001b[0m, in \u001b[0;36mPyTorchTaskRunner.train_batches\u001b[0;34m(self, col_name, round_num, input_tensor_dict, use_tqdm, epochs, **kwargs)\u001b[0m\n\u001b[1;32m 157\u001b[0m \u001b[39mif\u001b[39;00m use_tqdm:\n\u001b[1;32m 158\u001b[0m loader \u001b[39m=\u001b[39m tqdm\u001b[39m.\u001b[39mtqdm(loader, desc\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mtrain epoch\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m--> 159\u001b[0m metric \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtrain_epoch(loader)\n\u001b[1;32m 160\u001b[0m \u001b[39m# Output metric tensors (scalar)\u001b[39;00m\n\u001b[1;32m 161\u001b[0m origin \u001b[39m=\u001b[39m col_name\n", - "\u001b[1;32m/home/oamontoy/workspace/sec-openfl/openfl-tutorials/Federated_PyTorch_LLM.ipynb Cell 18\u001b[0m line \u001b[0;36m1\n\u001b[1;32m 113\u001b[0m loss\u001b[39m.\u001b[39mbackward()\n\u001b[1;32m 114\u001b[0m torch\u001b[39m.\u001b[39mnn\u001b[39m.\u001b[39mutils\u001b[39m.\u001b[39mclip_grad_norm_(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmodel\u001b[39m.\u001b[39mparameters(),\u001b[39m1.0\u001b[39m)\n\u001b[0;32m--> 115\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmodel\u001b[39m.\u001b[39;49mstep()\n\u001b[1;32m 116\u001b[0m losses\u001b[39m.\u001b[39mappend(loss\u001b[39m.\u001b[39mdetach()\u001b[39m.\u001b[39mcpu()\u001b[39m.\u001b[39mnumpy())\n\u001b[1;32m 117\u001b[0m loss \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mmean(losses)\n", - "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/peft_model.py:436\u001b[0m, in \u001b[0;36mPeftModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 434\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__getattr__\u001b[39m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 435\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n\u001b[0;32m--> 436\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mbase_model, name)\n", - "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/tuners/lora.py:494\u001b[0m, in \u001b[0;36mLoraModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__getattr__\u001b[39m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 493\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n\u001b[0;32m--> 494\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmodel, name)\n", - "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/torch/nn/modules/module.py:1695\u001b[0m, in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1688\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparameters\u001b[39m(\u001b[39mself\u001b[39m, recurse: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterator[Parameter]:\n\u001b[1;32m 1689\u001b[0m \u001b[39m \u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"Returns an iterator over module parameters.\u001b[39;00m\n\u001b[1;32m 1690\u001b[0m \n\u001b[1;32m 1691\u001b[0m \u001b[39m This is typically passed to an optimizer.\u001b[39;00m\n\u001b[1;32m 1692\u001b[0m \n\u001b[1;32m 1693\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m 1694\u001b[0m \u001b[39m recurse (bool): if True, then yields parameters of this module\u001b[39;00m\n\u001b[0;32m-> 1695\u001b[0m \u001b[39m and all submodules. Otherwise, yields only parameters that\u001b[39;00m\n\u001b[1;32m 1696\u001b[0m \u001b[39m are direct members of this module.\u001b[39;00m\n\u001b[1;32m 1697\u001b[0m \n\u001b[1;32m 1698\u001b[0m \u001b[39m Yields:\u001b[39;00m\n\u001b[1;32m 1699\u001b[0m \u001b[39m Parameter: module parameter\u001b[39;00m\n\u001b[1;32m 1700\u001b[0m \n\u001b[1;32m 1701\u001b[0m \u001b[39m Example::\u001b[39;00m\n\u001b[1;32m 1702\u001b[0m \n\u001b[1;32m 1703\u001b[0m \u001b[39m >>> # xdoctest: +SKIP(\"undefined vars\")\u001b[39;00m\n\u001b[1;32m 1704\u001b[0m \u001b[39m >>> for param in model.parameters():\u001b[39;00m\n\u001b[1;32m 1705\u001b[0m \u001b[39m >>> print(type(param), param.size())\u001b[39;00m\n\u001b[1;32m 1706\u001b[0m \u001b[39m (20L,)\u001b[39;00m\n\u001b[1;32m 1707\u001b[0m \u001b[39m (20L, 1L, 5L, 5L)\u001b[39;00m\n\u001b[1;32m 1708\u001b[0m \n\u001b[1;32m 1709\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 1710\u001b[0m \u001b[39mfor\u001b[39;00m name, param \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnamed_parameters(recurse\u001b[39m=\u001b[39mrecurse):\n\u001b[1;32m 1711\u001b[0m \u001b[39myield\u001b[39;00m param\n", - "\u001b[0;31mAttributeError\u001b[0m: 'RobertaForSequenceClassification' object has no attribute 'step'" - ] - } - ], + "outputs": [], "source": [ "#Run experiment, return trained FederatedModel\n", - "final_fl_model = fx.run_experiment(collaborators,{'aggregator.settings.rounds_to_train':3})" + "final_fl_model = fx.run_experiment(collaborators,{'aggregator.settings.rounds_to_train':1,\"tasks.train.kwargs.epochs\":10})" ] }, { From 00a7d92a978400071c1036828d7ff23c5dde8542 Mon Sep 17 00:00:00 2001 From: porteratzo Date: Thu, 26 Oct 2023 09:52:32 -0700 Subject: [PATCH 4/4] changes --- openfl-tutorials/Federated_PyTorch_LLM.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openfl-tutorials/Federated_PyTorch_LLM.ipynb b/openfl-tutorials/Federated_PyTorch_LLM.ipynb index 78b9978604..ba5b2bede9 100644 --- a/openfl-tutorials/Federated_PyTorch_LLM.ipynb +++ b/openfl-tutorials/Federated_PyTorch_LLM.ipynb @@ -4,14 +4,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Federated PyTorch TinyImageNet Tutorial" + "# Federated PyTorch LLM Tutorial" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "This notebook is an example of Transfer Learning \n", + "This notebook is an example of LLM fine-tuning\n", "\n", "Custom DataLoader is used with OpenFL Python API" ] @@ -77,7 +77,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now we are ready to define our dataset and model to perform federated learning on. The dataset should be composed of a numpy arrayWe start with a simple fully connected model that is trained on the MNIST dataset. " + "Now we are ready to define our dataset and model to perform federated learning on. The dataset should be composed of a numpy arrayWe start with a simple Roberta model that is trained on the glue mrpc dataset. " ] }, { @@ -458,7 +458,7 @@ "outputs": [], "source": [ "#Run experiment, return trained FederatedModel\n", - "final_fl_model = fx.run_experiment(collaborators,{'aggregator.settings.rounds_to_train':1,\"tasks.train.kwargs.epochs\":10})" + "final_fl_model = fx.run_experiment(collaborators,{'aggregator.settings.rounds_to_train':10,\"tasks.train.kwargs.epochs\":2})" ] }, {