def get_trainer

def get_trainer_parser()

in twml/twml/argument_parser.py [0:0]
231 lines of code
1 McCabe index (conditional complexity)

def get_trainer_parser():
  """
  Add common commandline args to parse for the Trainer class.
  Typically, the user calls this function and then parses cmd-line arguments
  into an argparse.Namespace object which is then passed to the Trainer constructor
  via the params argument.

  See the `code <_modules/twml/argument_parser.html#get_trainer_parser>`_
  for a list and description of all cmd-line arguments.

  Args:
    learning_rate_decay:
      Defaults to False. When True, parses learning rate decay arguments.

  Returns:
    argparse.ArgumentParser instance with some useful args already added.
  """
  parser = twml.DefaultSubcommandArgParse(formatter_class=SortingHelpFormatter)

  parser.add_argument(
    "--save_dir", type=str, default=tempfile.mkdtemp(),
    help="Path to the training result directory."
         "supports local filesystem path and hdfs://default/<path> which requires "
         "setting HDFS configuration via env variable HADOOP_CONF_DIR ")
  parser.add_argument(
    "--export_dir", type=str, default=None,
    help="Path to the directory to export a SavedModel for prediction servers.")
  parser.add_argument(
    "--log_aggregation_app_id", type=str, default=None,
    help="specify app_id for log aggregation. disabled by default.")
  parser.add_argument(
    "--train.batch_size", "--train_batch_size", type=int, default=32,
    dest='train_batch_size',
    help="number of samples per training batch")
  parser.add_argument(
    "--eval.batch_size", "--eval_batch_size", type=int, default=32,
    dest='eval_batch_size',
    help="number of samples per cross-validation batch. Defaults to train_batch_size")
  parser.add_argument(
    "--train.learning_rate", "--learning_rate", type=float, default=0.002,
    dest='learning_rate',
    help="learning rate. Scales the gradient update.")
  parser.add_argument(
    "--train.steps", "--train_steps", type=int, default=-1,
    dest='train_steps',
    help="number of training batches before running evaluation."
         "Defaults to -1 (runs through entire dataset). "
         "Only used for Trainer.[train,learn]. "
         "For Trainer.train_and_evaluate, use train.max_steps instead. ")
  parser.add_argument(
    "--eval.steps", "--eval_steps", type=int, default=-1,
    dest="eval_steps",
    help="number of steps per evaluation. Each batch is a step."
         "Defaults to -1 (runs through entire dataset). ")
  parser.add_argument(
    "--eval.period", "--eval_period", type=int, default=600,
    dest="eval_period",
    help="Trainer.train_and_evaluate waits for this long after each evaluation. "
         "Defaults to 600 seconds (evaluate every ten minutes). "
         "Note that anything lower than 10*60seconds is probably a bad idea because TF saves "
         "checkpoints every 10mins by default. eval.delay is time to wait before doing first eval. "
         "eval.period is time between successive evals.")
  parser.add_argument(
    "--eval.delay", "--eval_delay", type=int, default=120,
    dest="eval_delay",
    help="Trainer.train_and_evaluate waits for this long before performing the first evaluation"
         "Defaults to 120 seconds (evaluate after first 2 minutes of training). "
         "eval.delay is time to wait before doing first eval. "
         "eval.period is time between successive evals.")
  parser.add_argument(
    "--train.max_steps", "--train_max_steps", type=int, default=None,
    dest="train_max_steps",
    help="Stop training after this many global steps. Each training batch is its own step."
         "If set to None, step after one train()/evaluate() call. Useful when train.steps=-1."
         "If set to a non-positive value, loop forever. Usually useful with early stopping.")
  parser.add_argument(
    "--train.log_metrics", dest="train_log_metrics", action="store_true", default=False,
    help="Set this to true to see metrics during training. "
         "WARNING: metrics during training does not represent model performance. "
         "WARNING: use for debugging only as this slows down training.")
  parser.add_argument(
    "--train.early_stop_patience", "--early_stop_patience", type=int, default=-1,
    dest="early_stop_patience",
    help="max number of evaluations (epochs) to wait for an improvement in the early_stop_metric."
         "Defaults to -1 (no early-stopping)."
         "NOTE: This can not be enabled when --distributed is also set.")
  parser.add_argument(
    "--train.early_stop_tolerance", "--early_stop_tolerance", type=float, default=0,
    dest="early_stop_tolerance",
    help="a non-negative tolerance for comparing early_stop_metric."
         "e.g. when maximizing the condition is current_metric > best_metric + tolerance."
         "Defaults to 0.")
  parser.add_argument(
    "--train.dataset_shards", "--train_dataset_shards",
    dest="train_dataset_shards",
    type=int, default=None,
    help="An int value that indicates the number of partitions (shards) for the dataset. This is"
    " useful for codistillation and other techniques that require each worker to train on disjoint"
    " partitions of the dataset.")
  parser.add_argument(
    "--train.dataset_shard_index", "--train_dataset_shard_index",
    dest="train_dataset_shard_index",
    type=int, default=None,
    help="An int value (starting at zero) that indicates which partition (shard) of the dataset"
    " to use if --train.dataset_shards is set.")
  parser.add_argument(
    "--continue_from_checkpoint", dest="continue_from_checkpoint", action="store_true",
    help="DEPRECATED. This option is currently a no-op."
    " Continuing from the provided checkpoint is now the default."
    " Use --overwrite_save_dir if you would like to override it instead"
    " and restart training from scratch.")
  parser.add_argument(
    "--overwrite_save_dir", dest="overwrite_save_dir", action="store_true",
    help="Delete the contents of the current save_dir if it exists")
  parser.add_argument(
    "--data_threads", "--num_threads", type=int, default=2,
    dest="num_threads",
    help="Number of threads to use for loading the dataset. "
         "num_threads is deprecated and to be removed in future versions. Use data_threads.")
  parser.add_argument(
    "--max_duration", "--max_duration", type=float, default=None,
    dest="max_duration",
    help="Maximum duration (in secs) that training/validation will be allowed to run for before being automatically terminated.")
  parser.add_argument(
    "--num_workers", type=int, default=None,
    help="Number of workers to use when training in hogwild manner on a single node.")
  parser.add_argument(
    "--distributed", dest="distributed", action="store_true",
    help="Pass this flag to use train_and_evaluate to train in a distributed fashion"
         "NOTE: You can not use early stopping when --distributed is enabled"
  )
  parser.add_argument(
    "--distributed_training_cleanup",
    dest="distributed_training_cleanup",
    action="store_true",
    help="Set if using distributed training on GKE to stop TwitterSetDeployment"
         "from continuing training upon restarts (will be deprecated once we migrate off"
         "TwitterSetDeployment for distributed training on GKE)."
  )
  parser.add_argument(
    "--disable_auto_ps_shutdown", default=False, action="store_true",
    help="Disable the functionality of automatically shutting down parameter server after "
         "distributed training complete (either succeed or failed)."
  )
  parser.add_argument(
    "--disable_tensorboard", default=False, action="store_true",
    help="Do not start the TensorBoard server."
  )
  parser.add_argument(
    "--tensorboard_port", type=int, default=None,
    help="Port for tensorboard to run on. Ignored if --disable_tensorboard is set.")
  parser.add_argument(
    "--health_port", type=int, default=None,
    help="Port to listen on for health-related endpoints (e.g. graceful shutdown)."
         "Not user-facing as it is set automatically by the twml_cli."
  )
  parser.add_argument(
    "--stats_port", type=int, default=None,
    help="Port to listen on for stats endpoints"
  )
  parser.add_argument(
    "--experiment_tracking_path",
    dest="experiment_tracking_path",
    type=str, default=None,
    help="The tracking path of this experiment. Format: \
        user_name:project_name:experiment_name:run_name. The path is used to track and display \
        a record of this experiment on ML Dashboard. Note: this embedded experiment tracking is \
        disabled when the deprecated Model Repo TrackRun is used in your model config. ")
  parser.add_argument(
    "--disable_experiment_tracking",
    dest="disable_experiment_tracking",
    action="store_true",
    help="Whether experiment tracking should be disabled.")
  parser.add_argument(
    "--config.save_checkpoints_secs", "--save_checkpoints_secs", type=int, default=600,
    dest='save_checkpoints_secs',
    help="Configures the tf.estimator.RunConfig.save_checkpoints_secs attribute. "
    "Specifies how often checkpoints are saved in seconds. Defaults to 10*60 seconds.")
  parser.add_argument(
    "--config.keep_checkpoint_max", "--keep_checkpoint_max", type=int, default=20,
    dest='keep_checkpoint_max',
    help="Configures the tf.estimator.RunConfig.keep_checkpoint_max attribute. "
    "Specifies how many checkpoints to keep. Defaults to 20.")
  parser.add_argument(
    "--config.tf_random_seed", "--tf_random_seed", type=int, default=None,
    dest='tf_random_seed',
    help="Configures the tf.estimator.RunConfig.tf_random_seed attribute. "
         "Specifies the seed to use. Defaults to None.")
  parser.add_argument(
    "--optimizer", type=str, default='SGD',
    help="Optimizer to use: SGD (Default), Adagrad, Adam, Ftrl, Momentum, RMSProp, LazyAdam, DGC.")
  parser.add_argument(
    "--gradient_noise_scale", type=float, default=None,
    help="adds 0-mean normal noise scaled by this value. Defaults to None.")
  parser.add_argument(
    "--clip_gradients", type=float, default=None,
    help="If specified, a global clipping is applied to prevent "
         "the norm of the gradient to exceed this value. Defaults to None.")
  parser.add_argument(
    "--dgc.density", "--dgc_density", type=float, default=0.1,
    dest="dgc_density",
    help="Specifies gradient density level when using deep gradient compression optimizer."
         "E.g., default value being 0.1 means that only top 10%% most significant rows "
         "(based on absolute value sums) are kept."
  )
  parser.add_argument(
    "--dgc.density_decay", "--dgc_density_decay", type=bool, default=True,
    dest="dgc_density_decay",
    help="Specifies whether to (exponentially) decay the gradient density level when"
         " doing gradient compression. If set 'False', the 'density_decay_steps', "
         "'density_decay_rate' and 'min_density' arguments will be ignored."
  )
  parser.add_argument(
    "--dgc.density_decay_steps", "--dgc_density_decay_steps", type=int, default=10000,
    dest="dgc_density_decay_steps",
    help="Specifies the step interval to perform density decay."
  )
  parser.add_argument(
    "--dgc.density_decay_rate", "--dgc_density_decay_rate", type=float, default=0.5,
    dest="dgc_density_decay_rate",
    help="Specifies the decay rate when perfoming density decay."
  )
  parser.add_argument(
    "--dgc.min_density", "--dgc_min_density", type=float, default=0.1,
    dest="dgc_min_density",
    help="Specifies the minimum density level when perfoming density decay."
  )
  parser.add_argument(
    "--dgc.accumulation", "--dgc_accumulation", type=bool, default=False,
    dest="dgc_accumulation",
    help="Specifies whether to accumulate small gradients when using deep gradient compression "
         "optimizer."
  )
  parser.add_argument(
    "--show_optimizer_summaries", dest="show_optimizer_summaries", action="store_true",
    help="When specified, displays gradients and learning rate in tensorboard."
    "Turning it on has 10-20%% performance hit. Enable for debugging only")

  parser.add_argument(
    "--num_mkl_threads", dest="num_mkl_threads", default=1, type=int,
    help="Specifies how many threads to use for MKL"
    "inter_op_ parallelism_threds is set to TWML_NUM_CPUS / num_mkl_threads."
    "intra_op_parallelism_threads is set to num_mkl_threads.")

  parser.add_argument("--verbosity", type=_set_log_level, choices=LOG_LEVELS.keys(), default=None,
    help="Sets log level to a given verbosity.")

  parser.add_argument(
    "--feature_importance.algorithm", dest="feature_importance_algorithm",
    type=str, default=TREE, choices=[SERIAL, TREE],
    help="""