in twml/twml/argument_parser.py [0:0]
def get_trainer_parser():
"""
Add common commandline args to parse for the Trainer class.
Typically, the user calls this function and then parses cmd-line arguments
into an argparse.Namespace object which is then passed to the Trainer constructor
via the params argument.
See the `code <_modules/twml/argument_parser.html#get_trainer_parser>`_
for a list and description of all cmd-line arguments.
Args:
learning_rate_decay:
Defaults to False. When True, parses learning rate decay arguments.
Returns:
argparse.ArgumentParser instance with some useful args already added.
"""
parser = twml.DefaultSubcommandArgParse(formatter_class=SortingHelpFormatter)
parser.add_argument(
"--save_dir", type=str, default=tempfile.mkdtemp(),
help="Path to the training result directory."
"supports local filesystem path and hdfs://default/<path> which requires "
"setting HDFS configuration via env variable HADOOP_CONF_DIR ")
parser.add_argument(
"--export_dir", type=str, default=None,
help="Path to the directory to export a SavedModel for prediction servers.")
parser.add_argument(
"--log_aggregation_app_id", type=str, default=None,
help="specify app_id for log aggregation. disabled by default.")
parser.add_argument(
"--train.batch_size", "--train_batch_size", type=int, default=32,
dest='train_batch_size',
help="number of samples per training batch")
parser.add_argument(
"--eval.batch_size", "--eval_batch_size", type=int, default=32,
dest='eval_batch_size',
help="number of samples per cross-validation batch. Defaults to train_batch_size")
parser.add_argument(
"--train.learning_rate", "--learning_rate", type=float, default=0.002,
dest='learning_rate',
help="learning rate. Scales the gradient update.")
parser.add_argument(
"--train.steps", "--train_steps", type=int, default=-1,
dest='train_steps',
help="number of training batches before running evaluation."
"Defaults to -1 (runs through entire dataset). "
"Only used for Trainer.[train,learn]. "
"For Trainer.train_and_evaluate, use train.max_steps instead. ")
parser.add_argument(
"--eval.steps", "--eval_steps", type=int, default=-1,
dest="eval_steps",
help="number of steps per evaluation. Each batch is a step."
"Defaults to -1 (runs through entire dataset). ")
parser.add_argument(
"--eval.period", "--eval_period", type=int, default=600,
dest="eval_period",
help="Trainer.train_and_evaluate waits for this long after each evaluation. "
"Defaults to 600 seconds (evaluate every ten minutes). "
"Note that anything lower than 10*60seconds is probably a bad idea because TF saves "
"checkpoints every 10mins by default. eval.delay is time to wait before doing first eval. "
"eval.period is time between successive evals.")
parser.add_argument(
"--eval.delay", "--eval_delay", type=int, default=120,
dest="eval_delay",
help="Trainer.train_and_evaluate waits for this long before performing the first evaluation"
"Defaults to 120 seconds (evaluate after first 2 minutes of training). "
"eval.delay is time to wait before doing first eval. "
"eval.period is time between successive evals.")
parser.add_argument(
"--train.max_steps", "--train_max_steps", type=int, default=None,
dest="train_max_steps",
help="Stop training after this many global steps. Each training batch is its own step."
"If set to None, step after one train()/evaluate() call. Useful when train.steps=-1."
"If set to a non-positive value, loop forever. Usually useful with early stopping.")
parser.add_argument(
"--train.log_metrics", dest="train_log_metrics", action="store_true", default=False,
help="Set this to true to see metrics during training. "
"WARNING: metrics during training does not represent model performance. "
"WARNING: use for debugging only as this slows down training.")
parser.add_argument(
"--train.early_stop_patience", "--early_stop_patience", type=int, default=-1,
dest="early_stop_patience",
help="max number of evaluations (epochs) to wait for an improvement in the early_stop_metric."
"Defaults to -1 (no early-stopping)."
"NOTE: This can not be enabled when --distributed is also set.")
parser.add_argument(
"--train.early_stop_tolerance", "--early_stop_tolerance", type=float, default=0,
dest="early_stop_tolerance",
help="a non-negative tolerance for comparing early_stop_metric."
"e.g. when maximizing the condition is current_metric > best_metric + tolerance."
"Defaults to 0.")
parser.add_argument(
"--train.dataset_shards", "--train_dataset_shards",
dest="train_dataset_shards",
type=int, default=None,
help="An int value that indicates the number of partitions (shards) for the dataset. This is"
" useful for codistillation and other techniques that require each worker to train on disjoint"
" partitions of the dataset.")
parser.add_argument(
"--train.dataset_shard_index", "--train_dataset_shard_index",
dest="train_dataset_shard_index",
type=int, default=None,
help="An int value (starting at zero) that indicates which partition (shard) of the dataset"
" to use if --train.dataset_shards is set.")
parser.add_argument(
"--continue_from_checkpoint", dest="continue_from_checkpoint", action="store_true",
help="DEPRECATED. This option is currently a no-op."
" Continuing from the provided checkpoint is now the default."
" Use --overwrite_save_dir if you would like to override it instead"
" and restart training from scratch.")
parser.add_argument(
"--overwrite_save_dir", dest="overwrite_save_dir", action="store_true",
help="Delete the contents of the current save_dir if it exists")
parser.add_argument(
"--data_threads", "--num_threads", type=int, default=2,
dest="num_threads",
help="Number of threads to use for loading the dataset. "
"num_threads is deprecated and to be removed in future versions. Use data_threads.")
parser.add_argument(
"--max_duration", "--max_duration", type=float, default=None,
dest="max_duration",
help="Maximum duration (in secs) that training/validation will be allowed to run for before being automatically terminated.")
parser.add_argument(
"--num_workers", type=int, default=None,
help="Number of workers to use when training in hogwild manner on a single node.")
parser.add_argument(
"--distributed", dest="distributed", action="store_true",
help="Pass this flag to use train_and_evaluate to train in a distributed fashion"
"NOTE: You can not use early stopping when --distributed is enabled"
)
parser.add_argument(
"--distributed_training_cleanup",
dest="distributed_training_cleanup",
action="store_true",
help="Set if using distributed training on GKE to stop TwitterSetDeployment"
"from continuing training upon restarts (will be deprecated once we migrate off"
"TwitterSetDeployment for distributed training on GKE)."
)
parser.add_argument(
"--disable_auto_ps_shutdown", default=False, action="store_true",
help="Disable the functionality of automatically shutting down parameter server after "
"distributed training complete (either succeed or failed)."
)
parser.add_argument(
"--disable_tensorboard", default=False, action="store_true",
help="Do not start the TensorBoard server."
)
parser.add_argument(
"--tensorboard_port", type=int, default=None,
help="Port for tensorboard to run on. Ignored if --disable_tensorboard is set.")
parser.add_argument(
"--health_port", type=int, default=None,
help="Port to listen on for health-related endpoints (e.g. graceful shutdown)."
"Not user-facing as it is set automatically by the twml_cli."
)
parser.add_argument(
"--stats_port", type=int, default=None,
help="Port to listen on for stats endpoints"
)
parser.add_argument(
"--experiment_tracking_path",
dest="experiment_tracking_path",
type=str, default=None,
help="The tracking path of this experiment. Format: \
user_name:project_name:experiment_name:run_name. The path is used to track and display \
a record of this experiment on ML Dashboard. Note: this embedded experiment tracking is \
disabled when the deprecated Model Repo TrackRun is used in your model config. ")
parser.add_argument(
"--disable_experiment_tracking",
dest="disable_experiment_tracking",
action="store_true",
help="Whether experiment tracking should be disabled.")
parser.add_argument(
"--config.save_checkpoints_secs", "--save_checkpoints_secs", type=int, default=600,
dest='save_checkpoints_secs',
help="Configures the tf.estimator.RunConfig.save_checkpoints_secs attribute. "
"Specifies how often checkpoints are saved in seconds. Defaults to 10*60 seconds.")
parser.add_argument(
"--config.keep_checkpoint_max", "--keep_checkpoint_max", type=int, default=20,
dest='keep_checkpoint_max',
help="Configures the tf.estimator.RunConfig.keep_checkpoint_max attribute. "
"Specifies how many checkpoints to keep. Defaults to 20.")
parser.add_argument(
"--config.tf_random_seed", "--tf_random_seed", type=int, default=None,
dest='tf_random_seed',
help="Configures the tf.estimator.RunConfig.tf_random_seed attribute. "
"Specifies the seed to use. Defaults to None.")
parser.add_argument(
"--optimizer", type=str, default='SGD',
help="Optimizer to use: SGD (Default), Adagrad, Adam, Ftrl, Momentum, RMSProp, LazyAdam, DGC.")
parser.add_argument(
"--gradient_noise_scale", type=float, default=None,
help="adds 0-mean normal noise scaled by this value. Defaults to None.")
parser.add_argument(
"--clip_gradients", type=float, default=None,
help="If specified, a global clipping is applied to prevent "
"the norm of the gradient to exceed this value. Defaults to None.")
parser.add_argument(
"--dgc.density", "--dgc_density", type=float, default=0.1,
dest="dgc_density",
help="Specifies gradient density level when using deep gradient compression optimizer."
"E.g., default value being 0.1 means that only top 10%% most significant rows "
"(based on absolute value sums) are kept."
)
parser.add_argument(
"--dgc.density_decay", "--dgc_density_decay", type=bool, default=True,
dest="dgc_density_decay",
help="Specifies whether to (exponentially) decay the gradient density level when"
" doing gradient compression. If set 'False', the 'density_decay_steps', "
"'density_decay_rate' and 'min_density' arguments will be ignored."
)
parser.add_argument(
"--dgc.density_decay_steps", "--dgc_density_decay_steps", type=int, default=10000,
dest="dgc_density_decay_steps",
help="Specifies the step interval to perform density decay."
)
parser.add_argument(
"--dgc.density_decay_rate", "--dgc_density_decay_rate", type=float, default=0.5,
dest="dgc_density_decay_rate",
help="Specifies the decay rate when perfoming density decay."
)
parser.add_argument(
"--dgc.min_density", "--dgc_min_density", type=float, default=0.1,
dest="dgc_min_density",
help="Specifies the minimum density level when perfoming density decay."
)
parser.add_argument(
"--dgc.accumulation", "--dgc_accumulation", type=bool, default=False,
dest="dgc_accumulation",
help="Specifies whether to accumulate small gradients when using deep gradient compression "
"optimizer."
)
parser.add_argument(
"--show_optimizer_summaries", dest="show_optimizer_summaries", action="store_true",
help="When specified, displays gradients and learning rate in tensorboard."
"Turning it on has 10-20%% performance hit. Enable for debugging only")
parser.add_argument(
"--num_mkl_threads", dest="num_mkl_threads", default=1, type=int,
help="Specifies how many threads to use for MKL"
"inter_op_ parallelism_threds is set to TWML_NUM_CPUS / num_mkl_threads."
"intra_op_parallelism_threads is set to num_mkl_threads.")
parser.add_argument("--verbosity", type=_set_log_level, choices=LOG_LEVELS.keys(), default=None,
help="Sets log level to a given verbosity.")
parser.add_argument(
"--feature_importance.algorithm", dest="feature_importance_algorithm",
type=str, default=TREE, choices=[SERIAL, TREE],
help="""