in sdks/python/apache_beam/options/pipeline_options.py [0:0]
def _add_argparse_args(cls, parser):
parser.add_argument(
'--num_workers',
type=int,
default=None,
help=(
'Number of workers to use when executing the Dataflow job. If not '
'set, the Dataflow service will use a reasonable default.'))
parser.add_argument(
'--max_num_workers',
type=int,
default=None,
help=(
'Maximum number of workers to use when executing the Dataflow job.'
))
parser.add_argument(
'--autoscaling_algorithm',
type=str,
choices=['NONE', 'THROUGHPUT_BASED'],
default=None, # Meaning unset, distinct from 'NONE' meaning don't scale
help=
('If and how to autoscale the workerpool.'))
parser.add_argument(
'--worker_machine_type',
'--machine_type',
dest='machine_type',
default=None,
help=(
'Machine type to create Dataflow worker VMs as. See '
'https://cloud.google.com/compute/docs/machine-types '
'for a list of valid options. If not set, '
'the Dataflow service will choose a reasonable '
'default.'))
parser.add_argument(
'--disk_size_gb',
type=int,
default=None,
help=(
'Remote worker disk size, in gigabytes, or 0 to use the default '
'size. If not set, the Dataflow service will use a reasonable '
'default.'))
parser.add_argument(
'--worker_disk_type',
'--disk_type',
dest='disk_type',
default=None,
help=('Specifies what type of persistent disk should be used.'))
parser.add_argument(
'--worker_region',
default=None,
help=(
'The Compute Engine region (https://cloud.google.com/compute/docs/'
'regions-zones/regions-zones) in which worker processing should '
'occur, e.g. "us-west1". Mutually exclusive with worker_zone. If '
'neither worker_region nor worker_zone is specified, default to '
'same value as --region.'))
parser.add_argument(
'--worker_zone',
default=None,
help=(
'The Compute Engine zone (https://cloud.google.com/compute/docs/'
'regions-zones/regions-zones) in which worker processing should '
'occur, e.g. "us-west1-a". Mutually exclusive with worker_region. '
'If neither worker_region nor worker_zone is specified, the '
'Dataflow service will choose a zone in --region based on '
'available capacity.'))
parser.add_argument(
'--zone',
default=None,
help=(
'GCE availability zone for launching workers. Default is up to the '
'Dataflow service. This flag is deprecated, and will be replaced '
'by worker_zone.'))
parser.add_argument(
'--network',
default=None,
help=(
'GCE network for launching workers. Default is up to the Dataflow '
'service.'))
parser.add_argument(
'--subnetwork',
default=None,
help=(
'GCE subnetwork for launching workers. Default is up to the '
'Dataflow service. Expected format is '
'regions/REGION/subnetworks/SUBNETWORK or the fully qualified '
'subnetwork name. For more information, see '
'https://cloud.google.com/compute/docs/vpc/'))
parser.add_argument(
'--worker_harness_container_image',
default=None,
help=(
'Docker registry location of container image to use for the '
'worker harness. If not set, an appropriate approved Google Cloud '
'Dataflow image will be used based on the version of the '
'SDK. Note: This flag is deprecated and only supports '
'approved Google Cloud Dataflow container images. To provide a '
'custom container image, use sdk_container_image instead.'))
parser.add_argument(
'--sdk_container_image',
default=None,
help=(
'Docker registry location of container image to use for the '
'worker harness. If not set, an appropriate approved Google Cloud '
'Dataflow image will be used based on the version of the '
'SDK. If set for a non-portable pipeline, only official '
'Google Cloud Dataflow container images may be used here.'))
parser.add_argument(
'--sdk_harness_container_image_overrides',
action='append',
default=None,
help=(
'Overrides for SDK harness container images. Could be for the '
'local SDK or for a remote SDK that pipeline has to support due '
'to a cross-language transform. Each entry consist of two values '
'separated by a comma where first value gives a regex to '
'identify the container image to override and the second value '
'gives the replacement container image.'))
parser.add_argument(
'--use_public_ips',
default=None,
action='store_true',
help='Whether to assign public IP addresses to the worker VMs.')
parser.add_argument(
'--no_use_public_ips',
dest='use_public_ips',
default=None,
action='store_false',
help='Whether to assign only private IP addresses to the worker VMs.')
parser.add_argument(
'--min_cpu_platform',
dest='min_cpu_platform',
type=str,
help='GCE minimum CPU platform. Default is determined by GCP.')
parser.add_argument(
'--dataflow_worker_jar',
dest='dataflow_worker_jar',
type=str,
help='Dataflow worker jar file. If specified, the jar file is staged '
'in GCS, then gets loaded by workers. End users usually '
'should not use this feature.')