diff --git a/cmd/suggestion/nasenvelopenet/Dockerfile b/cmd/suggestion/nasenvelopenet/Dockerfile new file mode 100644 index 00000000000..c825462d32c --- /dev/null +++ b/cmd/suggestion/nasenvelopenet/Dockerfile @@ -0,0 +1,8 @@ +FROM python:3.6 + +ADD . /usr/src/app/github.com/kubeflow/katib +WORKDIR /usr/src/app/github.com/kubeflow/katib/cmd/suggestion/nasenvelopenet +RUN pip install --no-cache-dir -r requirements.txt +ENV PYTHONPATH /usr/src/app/github.com/kubeflow/katib:/usr/src/app/github.com/kubeflow/katib/pkg/api/python + +ENTRYPOINT ["python", "-u", "main.py"] diff --git a/cmd/suggestion/nasenvelopenet/__init__.py b/cmd/suggestion/nasenvelopenet/__init__.py new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/cmd/suggestion/nasenvelopenet/__init__.py @@ -0,0 +1 @@ + diff --git a/cmd/suggestion/nasenvelopenet/main.py b/cmd/suggestion/nasenvelopenet/main.py new file mode 100644 index 00000000000..d9da12cefa8 --- /dev/null +++ b/cmd/suggestion/nasenvelopenet/main.py @@ -0,0 +1,29 @@ +import grpc +from concurrent import futures + +import time + +from pkg.api.python import api_pb2_grpc +from pkg.suggestion.nasenvelopenet_service import EnvelopenetService +from pkg.suggestion.types import DEFAULT_PORT +from logging import getLogger, StreamHandler, INFO, DEBUG + + +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 + + +def serve(): + print("NAS Envelopenet Suggestion Service") + server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) + api_pb2_grpc.add_SuggestionServicer_to_server(EnvelopenetService(), server) + server.add_insecure_port(DEFAULT_PORT) + print("Listening...") + server.start() + try: + while True: + time.sleep(_ONE_DAY_IN_SECONDS) + except KeyboardInterrupt: + server.stop(0) + +if __name__ == "__main__": + serve() diff --git a/cmd/suggestion/nasenvelopenet/requirements.txt b/cmd/suggestion/nasenvelopenet/requirements.txt new file mode 100644 index 00000000000..155e383b5f7 --- /dev/null +++ b/cmd/suggestion/nasenvelopenet/requirements.txt @@ -0,0 +1,3 @@ +grpcio +protobuf +googleapis-common-protos diff --git a/examples/nasjob-example-envelopenet.yaml b/examples/nasjob-example-envelopenet.yaml new file mode 100644 index 00000000000..d45f1c0e4b8 --- /dev/null +++ b/examples/nasjob-example-envelopenet.yaml @@ -0,0 +1,126 @@ +apiVersion: "kubeflow.org/v1alpha1" +kind: StudyJob +metadata: + namespace: kubeflow + labels: + controller-tools.k8s.io: "1.0" + name: nasenvelopenet +spec: + studyName: nasenvelopenet + owner: crd + optimizationtype: maximize + objectivevaluename: MeanSSS + optimizationgoal: 0.99 + requestcount: 6 + metricsnames: + - accuracy + nasConfig: + graphConfig: + numLayers: 3 + inputSize: + - 32 + - 32 + - 3 + outputSize: + - 10 + operations: + - operationType: envelopenet + parameterconfigs: + - name: envelopecell + parametertype: categorical + feasible: + list: + - "1" + - "3" + - "5" + - "3sep" + - "5sep" + - "7sep" + - name: layers_per_stage + parametertype: categorical + feasible: + list: + - "3" + - "3" + - "3" + - name: construction + parametertype: categorical + feasible: + list: + - "1" + - "1" + - "0" + - name: parameter_limits + parametertype: categorical + feasible: + list: + - "0" + - "0" + - "0" + - name: max_filter_prune + parametertype: categorical + feasible: + list: + - "6" + - name: skip + parametertype: categorical + feasible: + list: + - "1" + - name: outputs + parametertype: categorical + feasible: + list: + - "64" + - "128" + - "256" + - "512" + + workerSpec: + goTemplate: + rawTemplate: |- + apiVersion: batch/v1 + kind: Job + metadata: + name: {{.WorkerID}} + namespace: {{.NameSpace}} + spec: + template: + spec: + containers: + - name: {{.WorkerID}} + image: docker.io/anubhavgarg/envelopenet_training_container + command: + - "python3.5" + - "-u" + - "run_trial.py" + {{- with .HyperParameters}} + {{- range .}} + - "--{{.Name}}={{.Value}}" + {{- end}} + {{- end}} + resources: + limits: + nvidia.com/gpu: 1 + restartPolicy: OnFailure + suggestionSpec: + suggestionAlgorithm: "nasenvelopenet" + suggestionParameters: + - name: "max_layers_per_stage" + value: "763" + - name: "gpus" + value: "01" + - name: "gpu_usage" + value: "0.47" + - name: "steps" + value: "1000" + - name: "batch_size" + value: "50" + - name: "dataset" + value: "cifar10" + - name: "data_dir" + value: "data/" + - name: "iterations" + value: "5" + - name: "log_stats" + value: "True" diff --git a/manifests/vizier/suggestion/NAS-Envelopenet/deployment.yaml b/manifests/vizier/suggestion/NAS-Envelopenet/deployment.yaml new file mode 100644 index 00000000000..388ee4ce985 --- /dev/null +++ b/manifests/vizier/suggestion/NAS-Envelopenet/deployment.yaml @@ -0,0 +1,23 @@ +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: vizier-suggestion-nasenvelopenet + namespace: kubeflow + labels: + app: vizier + component: suggestion-nasenvelopenet +spec: + replicas: 1 + template: + metadata: + name: vizier-suggestion-nasenvelopenet + labels: + app: vizier + component: suggestion-nasenvelopenet + spec: + containers: + - name: vizier-suggestion-nasenvelopenet + image: docker.io/anubhavgarg/nas_envelopenet_service + ports: + - name: api + containerPort: 6789 diff --git a/manifests/vizier/suggestion/NAS-Envelopenet/service.yaml b/manifests/vizier/suggestion/NAS-Envelopenet/service.yaml new file mode 100644 index 00000000000..d75cda4fd5b --- /dev/null +++ b/manifests/vizier/suggestion/NAS-Envelopenet/service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + name: vizier-suggestion-nasenvelopenet + namespace: kubeflow + labels: + app: vizier + component: suggestion-nasenvelopenet +spec: + type: ClusterIP + ports: + - port: 6789 + protocol: TCP + name: api + selector: + app: vizier + component: suggestion-nasenvelopenet diff --git a/pkg/suggestion/NAS_Envelopenet/README.md b/pkg/suggestion/NAS_Envelopenet/README.md new file mode 100644 index 00000000000..c1f3d5e2d42 --- /dev/null +++ b/pkg/suggestion/NAS_Envelopenet/README.md @@ -0,0 +1,28 @@ +# About the Neural Architecture Search with Envelopenet Suggestion + +The algorithm follows the idea proposed in *Fast Neural Architecture Construction using EnvelopeNets* by Kamath et al.(https://arxiv.org/pdf/1803.06744.pdf). It is not a Reinforcement Learning or evolution based NAS, rather a method to construct deep network +architectures by pruning and expansion of a base network. This approach directly compares the utility of different filters using statistics derived from filter featuremaps reach a state where the utility of different filters +within a network can be compared and hence can be used to construct networks. + +# Envelopenets + +The EnvelopeCell is a set of M convolution blocks connected in parallel. E.g. one of the EnvelopeCells used in this work has 6 convolution blocks connected in parallel: 1x1 convolution, +3x3 convolution, 3x3 separable convolution, 5x5 convolution, 5x5 separable convolution and 7x7 separable convolution. The EnvelopeNet consists of a number of the EnvelopeCells stacked in series organized into stages of +n layers, separated by wideners. + +# Output of `GetSuggestion()` + +The output of `GetSuggestion()` is the `architecture` + +`architecture` is a json string of the definition of a neural architecture. The format is as stated above. One example is: +``` +{'type': 'macro', 'network': [{'filters': ['1', '3', '5', '3sep', '5sep', '7sep'], 'outputs': 26, 'inputs': []}, {'filters': ['1', '3', '5', '3sep', '5sep', '7sep'], 'outputs': 26, 'inputs': []}, + {'filters': ['1', '3', '5', '3sep', '5sep', '7sep'], 'outputs': 64, 'inputs': [1]}, {'widener': {}}, {'filters': ['1', '3', '5', '3sep', '5sep', '7sep'], 'outputs': 53, 'inputs': [3, 2, 1]}, + {'filters': ['1', '3', '5', '3sep', '5sep', '7sep'], 'outputs': 53, 'inputs': [4, 3, 2, 1]}, {'filters': ['1', '3', '5', '3sep', '5sep', '7sep'], 'outputs': 128, 'inputs': [5, 4, 3, 2, 1]}, {'widener': {}}, + {'filters': ['1', '3', '5', '3sep', '5sep', '7sep'], 'outputs': 106, 'inputs': [7, 6, 5, 4, 3, 2, 1]}, {'filters': ['1', '3', '5', '3sep', '5sep', '7sep'], 'outputs': 106, 'inputs': [8, 7, 6, 5, 4, 3, 2, 1]}, + {'filters': ['1', '3', '5', '3sep', '5sep', '7sep'], 'outputs': 256, 'inputs': [9, 8, 7, 6, 5, 4, 3, 2, 1]}]} +``` +# Flow of example + +The parameters and initial architecture from yaml file is passed to ModelConstructor, which constructs the model and trains. Then it sends the featuremap statistics from this truncated training and the process to refine the architecture +continues till the max_iterations parameter. diff --git a/pkg/suggestion/NAS_Envelopenet/generate_arch.py b/pkg/suggestion/NAS_Envelopenet/generate_arch.py new file mode 100644 index 00000000000..65214c80e47 --- /dev/null +++ b/pkg/suggestion/NAS_Envelopenet/generate_arch.py @@ -0,0 +1,312 @@ +import copy +import re +import math +from collections import Counter + +class NACAlg: + def __init__(self, config, initcell): + self.config = config + self.max_filter_prune = int(self.config["max_filter_prune"]) + self.envelopecell = self.config["envelopecell"] + self.init_cell = initcell + self.layers_per_stage = self.config["layers_per_stage"] + self.max_layers_per_stage = self.config["max_layers_per_stage"] + self.stages = self.config["stages"] + self.parameter_limits = self.config["parameter_limits"] + self.construction = self.config["construction"] + self.branch = {"1":0,"3":1,"5":2,"3sep":3,"5sep":4,"7sep":5} + self.map = {0:"1",1:"3",2:"5",3:"3sep",4:"5sep",5:"7sep"} + + + def generate(self): + """ Generate a network arch based on network config params: Used for + the initial network + """ + return self.gen_envelopenet_bystages() + + def construct(self, arch, samples): + """ Construct a new network based on current arch, metrics from last + run and construction config params + """ + self.arch = arch + return self.construct_envelopenet_bystages(samples) + + def construct_envelopenet_bystages(self, samples): + arch = {"type":"macro","network":[]} + print("Constructing") + offset_count = [0] + stages = [] + stage = [] + stagecellnames = {} + ssidx = {} + lidx = 1 + ssidx[0] = 1 + stagenum = 0 + for layer in self.arch["network"]: + if 'widener' in layer: + lidx += 1 + stages.append(stage) + stage = [] + stagenum += 1 + ssidx[stagenum] = lidx + else: + for branch in layer["filters"]: + cellname = 'Cell' + str(lidx) + "/" + branch + if stagenum not in stagecellnames: + stagecellnames[stagenum] = [] + stagecellnames[stagenum].append(cellname) + stage.append(layer) + lidx += 1 + stages.append(stage) + stagenum = 0 + narch = [] + for stage in stages: + if int(self.construction[stagenum]) and len( + stage) <= int(self.max_layers_per_stage[stagenum]): + + prune = self.select_prunable( + stagecellnames[stagenum], samples) + nstage = self.prune_filters(ssidx[stagenum], stage, prune) + nstage = self.add_cell(nstage) + offset_count += [offset_count[-1]] * len(nstage) + offset_count[-1] += 1 + else: + nstage = copy.deepcopy(stage) + offset_count += [offset_count[-1]] * len(nstage) + self.set_outputs(nstage, stagenum) + if stagenum != len(stages) - 1: + nstage = self.add_widener(nstage) + offset_count.append(offset_count[-1]) + narch += (nstage) + stagenum += 1 + + offset_count = offset_count[1:] + arch["network"] = narch + narch = self.insert_skip(arch, samples, offset_count, dense_connect=False) + return narch + + def remove_logging(self, line): + line = re.sub(r"\d\d\d\d.*ops.cc:79\] ", "", line) + return line + + + def get_samples(self, samples, filter_string='MeanSSS'): + filtered_log = [line for line in samples if line.startswith(filter_string)] + return filtered_log + + def get_filter_sample(self, sample): + fields = sample.split(":") + filt = fields[1] + value = float(fields[2].split(']')[0].lstrip('[')) + return filt, value + + def set_outputs(self, stage, stagenum): + init = self.init_cell + sinit = sorted(init.keys()) + """ Input channels = output of last layer (conv) in the init """ + for layer in sinit: + for branch in init[layer]: + if "outputs" in init[layer][branch]: + inputchannels = init[layer][branch]["outputs"] + width = math.pow(2, stagenum) * inputchannels + if self.parameter_limits[stagenum]: + """ Parameter limiting: Calculate output of the internal filters such that + overall params is maintained constant + """ + layers = float(len(stage)) + + outputs = int((width / (layers - 2.0)) * + (math.pow(layers - 1.0, 0.5) - 1)) + + lidx = 0 + for layer in stage: + if "widener" in layer: + lidx += 1 + continue + if lidx == len(stage) - \ + 1 or self.parameter_limits[stagenum] is False: + layer["outputs"] = int(width) + elif "filters" in layer: + layer["outputs"] = outputs + lidx += 1 + + def select_prunable(self, stagecellnames, samples): + measurements = {} + for sample in samples: + if sample == '': + continue + sample = self.remove_logging(sample) + filt, value = self.get_filter_sample(sample) + + """ Prune only filters in this stage """ + if filt not in stagecellnames: + continue + + if filt not in measurements: + measurements[filt] = [] + measurements[filt].append(value) + + metrics = {} + for filt in measurements: + metrics[filt] = measurements[filt][-1] + + smetrics = sorted(metrics, key=metrics.get, reverse=False) + """ Count number of cells in each layer """ + cellcount = {} + for cellbr in metrics: + cellidx = cellbr.split("/")[0].lstrip("Cell") + if cellidx not in cellcount: + cellcount[cellidx] = 0 + cellcount[cellidx] += 1 + + """ Make sure we do not prune all cells in one layer """ + prunedcount = {} + prune = [] + for smetric in smetrics: + prunecellidx = smetric.split("/")[0].lstrip("Cell") + if prunecellidx not in prunedcount: + prunedcount[prunecellidx] = 0 + if prunedcount[prunecellidx] + 1 < cellcount[prunecellidx]: + + prune.append(smetric) + prunedcount[prunecellidx] += 1 + """ Limit number of pruned cells to min of threshold * number of + filters in stage and maxfilter prune """ + threshold = (1.0 / 3.0) + prunecount = min(self.max_filter_prune, int( + threshold * float(len(stagecellnames)))) + if len(prune) >= prunecount: + break + if not prune: + print("Error: No cells to prune") + exit(-1) + return prune + + def prune_filters(self, ssidx, stage, prune): + """ Generate a pruned network without the wideners """ + narch = [] + lidx = 0 + nfilterlayers = 0 + for layer in stage: + if 'widener' in layer: + lidx += 1 + continue + narch.append(copy.deepcopy(stage[lidx])) + for filt in stage[lidx]["filters"]: + fidx = self.branch.get(filt) + for prn in prune: + prunecidx = prn.split("/")[0].lstrip("Cell") + prunef = prn.split("/")[1] + prunefidx=self.branch.get(prunef) + if ssidx + lidx == int(prunecidx) and \ + fidx == prunefidx: + narch[-1]["filters"].remove(self.map.get(prunefidx)) + print("Narc: " + str(narch[-1])) + nfilterlayers += 1 + lidx += 1 + return narch + + def add_cell(self, narch): + narch.append({"filters": self.envelopecell}) + return narch + + def add_widener(self, narch): + narch.append({"widener": {}}) + return narch + + def group_by_layer(self, samples): + stats = {} + for sample in samples[::-1]: + source_node = int(re.search(r'.*:source-(\d+)dest-(\d+).*', sample).group(1)) + dest_node = int(re.search(r'.*:source-(\d+)dest-(\d+).*', sample).group(2)) + if dest_node not in stats.keys(): + stats[dest_node] = {} + if source_node not in stats[dest_node].keys(): + stats[dest_node][source_node] = float(re.search(r'\[(-?\d+\.\d+)\]', sample).group(1)) + return stats + + + def insert_skip(self, narch, samples=None, offset_count=None, dense_connect=False): + new_network = narch['network'] + if "skip" not in self.config or not self.config['skip']: + return narch + + if dense_connect == True: + for layer_id, layer in enumerate(narch['network']): + if "filters" in layer: + new_network[layer_id]["inputs"] = [] + for connections in range(layer_id - 1, 0, -1): + new_network[layer_id]["inputs"].append(connections) + else: + threshold = 0.5 + new_connections = [] + scalar_filtered_samples = self.get_samples(samples, filter_string='scalar') + l2norm_filtered_samples = self.get_samples(samples, filter_string='l2norm') + scalar_stats = self.group_by_layer(scalar_filtered_samples) + l2norm_stats = self.group_by_layer(l2norm_filtered_samples) + """ scalar stats are being used right now """ + for layer_id, layer in enumerate(narch['network'][1:], start=1): + if "filters" in layer: + if offset_count[layer_id] != offset_count[layer_id - 1]: + """ New layer has been added at this position, connect densely """ + new_network[layer_id]["inputs"] = [] + for connections in range(layer_id - 1, 0, -1): + new_network[layer_id]["inputs"].append(connections) + new_connections.append(layer_id + 1) + else: + previous_layer_id = layer_id - offset_count[layer_id] + if (previous_layer_id+1) in scalar_stats.keys(): + number_to_keep = len(scalar_stats[previous_layer_id+1]) - int(threshold * len(scalar_stats[previous_layer_id+1])) + print("layer_id = {}, number_to_keep = {}, scalar_stats = {}".format( + layer_id, number_to_keep, scalar_stats[previous_layer_id+1])) + connections = scalar_stats[previous_layer_id+1] + pruned_connections = list(zip(*Counter(connections).most_common(number_to_keep)))[0] + + updated_pruned_connections = [] + for connection in pruned_connections: + offset = offset_count[connection - 1] + while offset_count[offset + connection - 1] != offset: + offset = offset_count[offset + connection - 1] + new_index = connection + offset + updated_pruned_connections.append(new_index) + new_network[layer_id]["inputs"] = updated_pruned_connections+ new_connections + + narch['network'] = new_network + return narch + + def insert_wideners(self, narch): + """ Insert wideners, + Space maxwideners equally with a minimum spacing of self.minwidenerintval + Last widenerintval may have less layers than others """ + + """ widenerintval= nfilterlayers//self.maxwideners """ + widenerintval = len(narch) // self.maxwideners + if widenerintval < self.minwidenerintval: + widenerintval = self.minwidenerintval + nlayer = 1 + insertindices = [] + for layer in narch: + """ Do not add a widener if it is the last layer """ + if nlayer % widenerintval == 0 and nlayer != len(narch): + insertindices.append(nlayer) + nlayer += 1 + idxcnt = 0 + for layeridx in insertindices: + lidx = layeridx + idxcnt + """ Adjust insertion indices after inserts """ + narch.insert(lidx, {"widener": {}}) + idxcnt += 1 + return narch + + def gen_envelopenet_bystages(self): + self.arch = {"type":"macro","network":[]} + for stageidx in range(int(self.stages)): + stage = [] + for idx1 in range(int(self.layers_per_stage[stageidx])): + stage.append({"filters": self.envelopecell}) + self.set_outputs(stage, stageidx) + if stageidx != int(self.stages) - 1: + stage = self.add_widener(stage) + self.arch["network"] += stage + self.arch = self.insert_skip(self.arch, dense_connect=True) + return self.arch diff --git a/pkg/suggestion/NAS_Envelopenet/nac_gen.py b/pkg/suggestion/NAS_Envelopenet/nac_gen.py new file mode 100644 index 00000000000..65ddfd233a7 --- /dev/null +++ b/pkg/suggestion/NAS_Envelopenet/nac_gen.py @@ -0,0 +1,16 @@ +from pkg.suggestion.NAS_Envelopenet.generate_arch import NACAlg + +class NAC: + def __init__(self, + envelopenet_params = dict()): + initcell = { + "Layer0": {"Branch0": {"block": "conv2d", "kernel_size": [1, 1], "outputs": 64}}, + "Layer2": {"Branch0": {"block": "lrn" }} + } + self.alg = NACAlg(envelopenet_params, initcell) + + def get_init_arch(self): + return self.alg.generate() + + def get_arch(self, arch, result): + return self.alg.construct(arch, result) diff --git a/pkg/suggestion/NAS_Envelopenet/operation.py b/pkg/suggestion/NAS_Envelopenet/operation.py new file mode 100644 index 00000000000..60da41460db --- /dev/null +++ b/pkg/suggestion/NAS_Envelopenet/operation.py @@ -0,0 +1,27 @@ +from pkg.api.python import api_pb2 + +class SearchSpace(object): + def __init__(self, operations): + self.operation_list = list(operations.operation) + self.search_space = dict() + self._parse_operations() + + def _parse_operations(self): + + for operation in self.operation_list: + opt_spec = list(operation.parameter_configs.configs) + """ avail_space is dict with the format {"spec_nam": [spec feasible values]} """ + avail_space = dict() + + for ispec in opt_spec: + spec_name = ispec.name + if ispec.parameter_type == api_pb2.CATEGORICAL: + avail_space[spec_name] = list(ispec.feasible.list) + if len(avail_space[spec_name])==1: + avail_space[spec_name]=int(avail_space[spec_name][0]) + elif ispec.parameter_type == api_pb2.INT: + spec_min = int(ispec.feasible.min) + spec_max = int(ispec.feasible.max) + spec_step = int(ispec.feasible.step) + avail_space[spec_name] = range(spec_min, spec_max+1, spec_step) + self.search_space = avail_space diff --git a/pkg/suggestion/NAS_Envelopenet/suggestion_param.py b/pkg/suggestion/NAS_Envelopenet/suggestion_param.py new file mode 100644 index 00000000000..774db771730 --- /dev/null +++ b/pkg/suggestion/NAS_Envelopenet/suggestion_param.py @@ -0,0 +1,57 @@ +def parseSuggestionParam(params_raw): + param_standard = { + "gpus": ['categorical', list, []], + "gpu_usage": ['value', float, [1e-6, 1.0]], + "steps": ['value', int, [0, 'inf']], + "batch_size": ['value', int, [1, 'inf']], + "dataset": ['categorical', str, ["cifar10", "imagenet"]], + "iterations": ['value', int, [0, 20]], + "log_stats": ['categorical', bool, [True, False]], + "data_dir":['categorical', str, ["data/"]], + "max_layers_per_stage":['categorical', list, []] + } + + suggestion_params = { + "data_dir":"data/", + "gpus": [], + "gpu_usage": 0.47, + "steps": 10000, + "batch_size": 50, + "dataset": "cifar10", + "iterations": 5, + "log_stats": True, + "max_layers_per_stage":[7,6,3] + } + + def checktype(param_name, param_value, check_mode, supposed_type, supposed_range=None): + correct = True + + try: + converted_value = supposed_type(param_value) + except: + correct = False + print("Parameter {} is of wrong type. Set back to default value {}" + .format(param_name, suggestion_params[param_name])) + + if correct and check_mode == 'value': + if not ((supposed_range[0] == '-inf' or converted_value >= supposed_range[0]) and + (supposed_range[1] == 'inf' or converted_value <= supposed_range[1])): + correct = False + print("Parameter {} out of range. Set back to default value {}" + .format(param_name, suggestion_params[param_name])) + + if correct: + suggestion_params[param_name] = converted_value + + + for param in params_raw: + if param.name in suggestion_params.keys(): + checktype(param.name, + param.value, + param_standard[param.name][0], # mode + param_standard[param.name][1], # type + param_standard[param.name][2]) # range + else: + print("Unknown Parameter name: {}".format(param.name)) + + return suggestion_params diff --git a/pkg/suggestion/nasenvelopenet_service.py b/pkg/suggestion/nasenvelopenet_service.py new file mode 100644 index 00000000000..e9c4cda0fef --- /dev/null +++ b/pkg/suggestion/nasenvelopenet_service.py @@ -0,0 +1,130 @@ +import grpc +from pkg.api.python import api_pb2 +from pkg.api.python import api_pb2_grpc +import logging +import json +from pkg.suggestion.NAS_Envelopenet.operation import SearchSpace +from pkg.suggestion.NAS_Envelopenet.suggestion_param import parseSuggestionParam +from pkg.suggestion.NAS_Envelopenet.nac_gen import NAC + +class EnvelopenetService(api_pb2_grpc.SuggestionServicer): + def __init__(self): + self.manager_addr = "vizier-core" + self.manager_port = 6789 + self.current_study_id = "" + self.current_trial_id = "" + self.ctrl_cache_file = "" + self.is_first_run = True + self.current_itr=0 + logging.basicConfig(level=logging.INFO) + self.logger = logging.getLogger("Suggestion") + + def generate_arch(self, request): + self.current_study_id = request.study_id + self.current_trial_id = "" + self._get_search_space(request.study_id) + self._get_suggestion_param(request.param_id) + self.restruct_itr=self.suggestion_config["iterations"] + self.generator = NAC( + self.search_space) + + + def GetSuggestions(self, request, context): + if request.study_id != self.current_study_id: + self.generate_arch(request) + + if self.current_itr==0: + self.arch=self.generator.get_init_arch() + elif self.current_itr<=self.restruct_itr: + result = self.GetEvaluationResult(request.study_id, self.prev_trial_id) + self.arch=self.generator.get_arch(self.arch, result) + + self.logger.info("Architecture at itr={}".format(self.current_itr)) + self.logger.info(self.arch) + arch_json=json.dumps(self.arch) + config_json=json.dumps(self.suggestion_config) + arch=str(arch_json).replace('\"', '\'') + config=str(config_json).replace('\"', '\'') + + trials = [] + trials.append(api_pb2.Trial( + study_id=request.study_id, + parameter_set=[ + api_pb2.Parameter( + name="architecture", + value=arch, + parameter_type= api_pb2.CATEGORICAL), + api_pb2.Parameter( + name="parameters", + value=config, + parameter_type= api_pb2.CATEGORICAL), + api_pb2.Parameter( + name="current_itr", + value=str(self.current_itr), + parameter_type= api_pb2.CATEGORICAL) + ], + ) + ) + + channel = grpc.beta.implementations.insecure_channel(self.manager_addr, self.manager_port) + with api_pb2.beta_create_Manager_stub(channel) as client: + for i, t in enumerate(trials): + ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t), 10) + trials[i].trial_id = ctrep.trial_id + self.prev_trial_id = ctrep.trial_id + + self.current_itr+=1 + + return api_pb2.GetSuggestionsReply(trials=trials) + + def GetEvaluationResult(self, studyID, trialID): + worker_list = [] + channel = grpc.beta.implementations.insecure_channel(self.manager_addr, self.manager_port) + with api_pb2.beta_create_Manager_stub(channel) as client: + gwfrep = client.GetWorkerFullInfo(api_pb2.GetWorkerFullInfoRequest(study_id=studyID, trial_id=trialID, only_latest_log=False), 10) + worker_list = gwfrep.worker_full_infos + for w in worker_list: + if w.Worker.status == api_pb2.COMPLETED: + for ml in w.metrics_logs: + if ml.name == self.objective_name: + samples=self.get_featuremap_statistics(ml) + return samples + + + def _get_search_space(self, studyID): + + channel = grpc.beta.implementations.insecure_channel(self.manager_addr, self.manager_port) + with api_pb2.beta_create_Manager_stub(channel) as client: + gsrep = client.GetStudy(api_pb2.GetStudyRequest(study_id=studyID), 10) + + self.objective_name = gsrep.study_config.objective_value_name + all_params = gsrep.study_config.nas_config + graph_config = all_params.graph_config + search_space_raw = all_params.operations + + self.stages = int(graph_config.num_layers) + self.input_size = list(map(int, graph_config.input_size)) + self.output_size = list(map(int, graph_config.output_size)) + search_space_object = SearchSpace(search_space_raw) + self.search_space = search_space_object.search_space + self.search_space.update({"stages":self.stages}) + self.logger.info("Search Space: {}".format(self.search_space)) + + def _get_suggestion_param(self, paramID): + channel = grpc.beta.implementations.insecure_channel(self.manager_addr, self.manager_port) + with api_pb2.beta_create_Manager_stub(channel) as client: + gsprep = client.GetSuggestionParameters(api_pb2.GetSuggestionParametersRequest(param_id=paramID), 10) + + params_raw = gsprep.suggestion_parameters + suggestion_params = parseSuggestionParam(params_raw) + self.suggestion_config = suggestion_params + self.suggestion_config.update({"input_size":self.input_size[0]}) + self.suggestion_config.update({"output_size":self.output_size[0]}) + self.search_space.update({"max_layers_per_stage":self.suggestion_config["max_layers_per_stage"]}) + self.logger.info("Suggestion Config: {}".format(self.suggestion_config)) + + def get_featuremap_statistics(self, metric_object): + samples=list() + for i in range(len(metric_object.values)): + samples.append(metric_object.values[i].value) + return samples