trainer.proto

// Copyright 2023 The Deeplab2 Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package deeplab2;

option java_multiple_files = true;

// Configure the solver options.
// Next ID: 18
message SolverOptions {
  optional bool use_sync_batchnorm = 1 [default = true];
  optional float batchnorm_momentum = 14 [default = 0.99];
  optional float batchnorm_epsilon = 15 [default = 0.001];
  // Set the learning rate policy for training. Available policies: 'poly',
  // 'cosine'.
  optional string learning_policy = 2 [default = 'poly'];
  // Set the base learning rate for model training.
  optional float base_learning_rate = 3 [default = 1e-3];
  // Set the power value used in the poly learning policy.
  optional float poly_learning_power = 4 [default = 0.9];
  // End learning rate for polynomial learning rate schedule.
  optional float poly_end_learning_rate = 5 [default = 0.0];
  // Set the number of steps for the warmup phase. We currently only
  // support linear warmup, i.e., if global_step < warmup_steps, the
  // learning rate will be `global_step / warmup_steps * base_learning_rate`.
  optional int32 warmup_steps = 6 [default = 0];
  // Set the optimizer method. Supported types: 'adam', 'sgd'.
  optional string optimizer = 7 [default = 'adam'];
  // Set the value of the weight decay for training.
  optional float weight_decay = 8 [default = 0];
  // Set the value of the weight decay of AdamW optimizer for training. We need
  // to use a different value from weight_decay here, as weight_decay is used
  // to set conv_kernel_weight_decay in the model.
  optional float adamw_weight_decay = 17 [default = 0];
  // Set whether to use gradient clipping or not.
  optional bool use_gradient_clipping = 9 [default = false];
  // Set the norm used in gradient clipping.
  optional float clip_gradient_norm = 10 [default = 10.0];
  // Set the number of steps for training.
  optional int32 training_number_of_steps = 11 [default = 60000];
  // Set the backbone learning rate multiplier when different learning rates
  // are desired for the backbone and for the other layers. For example,
  // MaX-DeepLab uses this field to set a 0.1x learning rate for the pretrained
  // backbone parameters.
  optional float backbone_learning_rate_multiplier = 16 [default = 1.0];
}

/********** Submessages used to config loss options **********/
// Configure the loss options.
message LossOptions {
  message SingleLossOptions {
    // Set the name of the loss.
    optional string name = 1;
    // Set the global weight of the loss used to weight the contribution of this
    // loss with respect to all other losses.
    optional float weight = 2 [default = 1.0];
    // Set the percentage of top-k pixels to be used for backpropagation.
    optional float top_k_percent = 3 [default = 1.0];
  }
  // Set the loss options for the semantic segmentation output.
  optional SingleLossOptions semantic_loss = 1;
  // Set the loss options for the center head.
  optional SingleLossOptions center_loss = 2;
  // Set the loss options for the regression head.
  optional SingleLossOptions regression_loss = 3;
  // Set the loss options for the motion head.
  optional SingleLossOptions motion_loss = 4;
  // Set the loss options for the next regression head.
  optional SingleLossOptions next_regression_loss = 5;
  // Set the loss options for the PQ-style loss.
  optional SingleLossOptions pq_style_loss = 6;
  // Set the loss options for the mask id cross entropy loss.
  optional SingleLossOptions mask_id_cross_entropy_loss = 7;
  // Set the loss options for the instance discrimination loss.
  optional SingleLossOptions instance_discrimination_loss = 8;
  // Set the loss options for the depth loss.
  optional SingleLossOptions depth_loss = 9;
}

// Configure the trainer options.
message TrainerOptions {
  // Set the maximum number of checkpoints to keep.
  optional int32 num_checkpoints_to_keep = 1 [default = 5];
  // Set the number of steps after which a checkpoint should be made.
  optional int32 save_checkpoints_steps = 2 [default = 1000];
  // Set after how many steps the summary should be written. Must be a multiple
  // of steps_per_loop.
  optional int32 save_summaries_steps = 3 [default = 1000];
  // Set how many steps one `inner` train loop should have. This relates to the
  // orbit framework:
  // https://github.com/tensorflow/models/blob/master/orbit/controller.py#L33
  optional int32 steps_per_loop = 4 [default = 1000];
  // Set the loss options.
  optional LossOptions loss_options = 5;
  // Set the solver options.
  optional SolverOptions solver_options = 6;
}