| <html><body> |
| <style> |
| |
| body, h1, h2, h3, div, span, p, pre, a { |
| margin: 0; |
| padding: 0; |
| border: 0; |
| font-weight: inherit; |
| font-style: inherit; |
| font-size: 100%; |
| font-family: inherit; |
| vertical-align: baseline; |
| } |
| |
| body { |
| font-size: 13px; |
| padding: 1em; |
| } |
| |
| h1 { |
| font-size: 26px; |
| margin-bottom: 1em; |
| } |
| |
| h2 { |
| font-size: 24px; |
| margin-bottom: 1em; |
| } |
| |
| h3 { |
| font-size: 20px; |
| margin-bottom: 1em; |
| margin-top: 1em; |
| } |
| |
| pre, code { |
| line-height: 1.5; |
| font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace; |
| } |
| |
| pre { |
| margin-top: 0.5em; |
| } |
| |
| h1, h2, h3, p { |
| font-family: Arial, sans serif; |
| } |
| |
| h1, h2, h3 { |
| border-bottom: solid #CCC 1px; |
| } |
| |
| .toc_element { |
| margin-top: 0.5em; |
| } |
| |
| .firstline { |
| margin-left: 2 em; |
| } |
| |
| .method { |
| margin-top: 1em; |
| border: solid 1px #CCC; |
| padding: 1em; |
| background: #EEE; |
| } |
| |
| .details { |
| font-weight: bold; |
| font-size: 14px; |
| } |
| |
| </style> |
| |
| <h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.jobs.html">jobs</a> . <a href="dataflow_v1b3.projects.jobs.workItems.html">workItems</a></h1> |
| <h2>Instance Methods</h2> |
| <p class="toc_element"> |
| <code><a href="#lease">lease(projectId, jobId, body, x__xgafv=None)</a></code></p> |
| <p class="firstline">Leases a dataflow WorkItem to run.</p> |
| <p class="toc_element"> |
| <code><a href="#reportStatus">reportStatus(projectId, jobId, body, x__xgafv=None)</a></code></p> |
| <p class="firstline">Reports the status of dataflow WorkItems leased by a worker.</p> |
| <h3>Method Details</h3> |
| <div class="method"> |
| <code class="details" id="lease">lease(projectId, jobId, body, x__xgafv=None)</code> |
| <pre>Leases a dataflow WorkItem to run. |
| |
| Args: |
| projectId: string, Identifies the project this worker belongs to. (required) |
| jobId: string, Identifies the workflow job this worker belongs to. (required) |
| body: object, The request body. (required) |
| The object takes the form of: |
| |
| { # Request to lease WorkItems. |
| "workItemTypes": [ # Filter for WorkItem type. |
| "A String", |
| ], |
| "workerCapabilities": [ # Worker capabilities. WorkItems might be limited to workers with specific |
| # capabilities. |
| "A String", |
| ], |
| "requestedLeaseDuration": "A String", # The initial lease period. |
| "workerId": "A String", # Identifies the worker leasing work -- typically the ID of the |
| # virtual machine running the worker. |
| "currentWorkerTime": "A String", # The current timestamp at the worker. |
| "location": "A String", # The location which contains the WorkItem's job. |
| } |
| |
| x__xgafv: string, V1 error format. |
| Allowed values |
| 1 - v1 error format |
| 2 - v2 error format |
| |
| Returns: |
| An object of the form: |
| |
| { # Response to a request to lease WorkItems. |
| "workItems": [ # A list of the leased WorkItems. |
| { # WorkItem represents basic information about a WorkItem to be executed |
| # in the cloud. |
| "reportStatusInterval": "A String", # Recommended reporting interval. |
| "leaseExpireTime": "A String", # Time when the lease on this Work will expire. |
| "seqMapTask": { # Describes a particular function to invoke. # Additional information for SeqMapTask WorkItems. |
| "inputs": [ # Information about each of the inputs. |
| { # Information about a side input of a DoFn or an input of a SeqDoFn. |
| "sources": [ # The source(s) to read element(s) from to get the value of this side input. |
| # If more than one source, then the elements are taken from the |
| # sources, in the specified order if order matters. |
| # At least one source is required. |
| { # A source that records can be read and decoded from. |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| ], |
| "kind": { # How to interpret the source element(s) as a side input value. |
| "a_key": "", # Properties of the object. |
| }, |
| "tag": "A String", # The id of the tag the user code will access this side input by; |
| # this should correspond to the tag of some MultiOutputInfo. |
| }, |
| ], |
| "outputInfos": [ # Information about each of the outputs. |
| { # Information about an output of a SeqMapTask. |
| "tag": "A String", # The id of the TupleTag the user code will tag the output value by. |
| "sink": { # A sink that records can be encoded and written to. # The sink to write the output value to. |
| "codec": { # The codec to use to encode data written to the sink. |
| "a_key": "", # Properties of the object. |
| }, |
| "spec": { # The sink to write to, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| }, |
| }, |
| ], |
| "stageName": "A String", # System-defined name of the stage containing the SeqDo operation. |
| # Unique across the workflow. |
| "systemName": "A String", # System-defined name of the SeqDo operation. |
| # Unique across the workflow. |
| "userFn": { # The user function to invoke. |
| "a_key": "", # Properties of the object. |
| }, |
| "name": "A String", # The user-provided name of the SeqDo operation. |
| }, |
| "projectId": "A String", # Identifies the cloud project this WorkItem belongs to. |
| "sourceOperationTask": { # A work item that represents the different operations that can be # Additional information for source operation WorkItems. |
| # performed on a user-defined Source specification. |
| "getMetadata": { # A request to compute the SourceMetadata of a Source. # Information about a request to get metadata about a source. |
| "source": { # A source that records can be read and decoded from. # Specification of the source whose metadata should be computed. |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| }, |
| "split": { # Represents the operation to split a high-level Source specification # Information about a request to split a source. |
| # into bundles (parts for parallel processing). |
| # |
| # At a high level, splitting of a source into bundles happens as follows: |
| # SourceSplitRequest is applied to the source. If it returns |
| # SOURCE_SPLIT_OUTCOME_USE_CURRENT, no further splitting happens and the source |
| # is used "as is". Otherwise, splitting is applied recursively to each |
| # produced DerivedSource. |
| # |
| # As an optimization, for any Source, if its does_not_need_splitting is |
| # true, the framework assumes that splitting this source would return |
| # SOURCE_SPLIT_OUTCOME_USE_CURRENT, and doesn't initiate a SourceSplitRequest. |
| # This applies both to the initial source being split and to bundles |
| # produced from it. |
| "source": { # A source that records can be read and decoded from. # Specification of the source to be split. |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| "options": { # Hints for splitting a Source into bundles (parts for parallel # Hints for tuning the splitting process. |
| # processing) using SourceSplitRequest. |
| "desiredShardSizeBytes": "A String", # DEPRECATED in favor of desired_bundle_size_bytes. |
| "desiredBundleSizeBytes": "A String", # The source should be split into a set of bundles where the estimated size |
| # of each is approximately this many bytes. |
| }, |
| }, |
| }, |
| "initialReportIndex": "A String", # The initial index to use when reporting the status of the WorkItem. |
| "mapTask": { # MapTask consists of an ordered set of instructions, each of which # Additional information for MapTask WorkItems. |
| # describes one particular low-level operation for the worker to |
| # perform in order to accomplish the MapTask's WorkItem. |
| # |
| # Each instruction must appear in the list before any instructions which |
| # depends on its output. |
| "systemName": "A String", # System-defined name of this MapTask. |
| # Unique across the workflow. |
| "stageName": "A String", # System-defined name of the stage containing this MapTask. |
| # Unique across the workflow. |
| "instructions": [ # The instructions in the MapTask. |
| { # Describes a particular operation comprising a MapTask. |
| "name": "A String", # User-provided name of this operation. |
| "read": { # An instruction that reads records. # Additional information for Read instructions. |
| # Takes no inputs, produces one output. |
| "source": { # A source that records can be read and decoded from. # The source to read from. |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| }, |
| "outputs": [ # Describes the outputs of the instruction. |
| { # An output of an instruction. |
| "name": "A String", # The user-provided name of this output. |
| "onlyCountKeyBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions |
| # should only report the key size. |
| "codec": { # The codec to use to encode data being written via this output. |
| "a_key": "", # Properties of the object. |
| }, |
| "systemName": "A String", # System-defined name of this output. |
| # Unique across the workflow. |
| "originalName": "A String", # System-defined name for this output in the original workflow graph. |
| # Outputs that do not contribute to an original instruction do not set this. |
| "onlyCountValueBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions |
| # should only report the value size. |
| }, |
| ], |
| "partialGroupByKey": { # An instruction that does a partial group-by-key. # Additional information for PartialGroupByKey instructions. |
| # One input and one output. |
| "sideInputs": [ # Zero or more side inputs. |
| { # Information about a side input of a DoFn or an input of a SeqDoFn. |
| "sources": [ # The source(s) to read element(s) from to get the value of this side input. |
| # If more than one source, then the elements are taken from the |
| # sources, in the specified order if order matters. |
| # At least one source is required. |
| { # A source that records can be read and decoded from. |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| ], |
| "kind": { # How to interpret the source element(s) as a side input value. |
| "a_key": "", # Properties of the object. |
| }, |
| "tag": "A String", # The id of the tag the user code will access this side input by; |
| # this should correspond to the tag of some MultiOutputInfo. |
| }, |
| ], |
| "inputElementCodec": { # The codec to use for interpreting an element in the input PTable. |
| "a_key": "", # Properties of the object. |
| }, |
| "originalCombineValuesStepName": "A String", # If this instruction includes a combining function, this is the name of the |
| # CombineValues instruction lifted into this instruction. |
| "valueCombiningFn": { # The value combining function to invoke. |
| "a_key": "", # Properties of the object. |
| }, |
| "input": { # An input of an instruction, as a reference to an output of a # Describes the input to the partial group-by-key instruction. |
| # producer instruction. |
| "outputNum": 42, # The output index (origin zero) within the producer. |
| "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces |
| # the output to be consumed by this input. This index is relative |
| # to the list of instructions in this input's instruction's |
| # containing MapTask. |
| }, |
| "originalCombineValuesInputStoreName": "A String", # If this instruction includes a combining function this is the name of the |
| # intermediate store between the GBK and the CombineValues. |
| }, |
| "write": { # An instruction that writes records. # Additional information for Write instructions. |
| # Takes one input, produces no outputs. |
| "input": { # An input of an instruction, as a reference to an output of a # The input. |
| # producer instruction. |
| "outputNum": 42, # The output index (origin zero) within the producer. |
| "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces |
| # the output to be consumed by this input. This index is relative |
| # to the list of instructions in this input's instruction's |
| # containing MapTask. |
| }, |
| "sink": { # A sink that records can be encoded and written to. # The sink to write to. |
| "codec": { # The codec to use to encode data written to the sink. |
| "a_key": "", # Properties of the object. |
| }, |
| "spec": { # The sink to write to, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| }, |
| }, |
| "systemName": "A String", # System-defined name of this operation. |
| # Unique across the workflow. |
| "flatten": { # An instruction that copies its inputs (zero or more) to its (single) output. # Additional information for Flatten instructions. |
| "inputs": [ # Describes the inputs to the flatten instruction. |
| { # An input of an instruction, as a reference to an output of a |
| # producer instruction. |
| "outputNum": 42, # The output index (origin zero) within the producer. |
| "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces |
| # the output to be consumed by this input. This index is relative |
| # to the list of instructions in this input's instruction's |
| # containing MapTask. |
| }, |
| ], |
| }, |
| "originalName": "A String", # System-defined name for the operation in the original workflow graph. |
| "parDo": { # An instruction that does a ParDo operation. # Additional information for ParDo instructions. |
| # Takes one main input and zero or more side inputs, and produces |
| # zero or more outputs. |
| # Runs user code. |
| "sideInputs": [ # Zero or more side inputs. |
| { # Information about a side input of a DoFn or an input of a SeqDoFn. |
| "sources": [ # The source(s) to read element(s) from to get the value of this side input. |
| # If more than one source, then the elements are taken from the |
| # sources, in the specified order if order matters. |
| # At least one source is required. |
| { # A source that records can be read and decoded from. |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| ], |
| "kind": { # How to interpret the source element(s) as a side input value. |
| "a_key": "", # Properties of the object. |
| }, |
| "tag": "A String", # The id of the tag the user code will access this side input by; |
| # this should correspond to the tag of some MultiOutputInfo. |
| }, |
| ], |
| "input": { # An input of an instruction, as a reference to an output of a # The input. |
| # producer instruction. |
| "outputNum": 42, # The output index (origin zero) within the producer. |
| "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces |
| # the output to be consumed by this input. This index is relative |
| # to the list of instructions in this input's instruction's |
| # containing MapTask. |
| }, |
| "multiOutputInfos": [ # Information about each of the outputs, if user_fn is a MultiDoFn. |
| { # Information about an output of a multi-output DoFn. |
| "tag": "A String", # The id of the tag the user code will emit to this output by; this |
| # should correspond to the tag of some SideInputInfo. |
| }, |
| ], |
| "numOutputs": 42, # The number of outputs. |
| "userFn": { # The user function to invoke. |
| "a_key": "", # Properties of the object. |
| }, |
| }, |
| }, |
| ], |
| }, |
| "jobId": "A String", # Identifies the workflow job this WorkItem belongs to. |
| "configuration": "A String", # Work item-specific configuration as an opaque blob. |
| "streamingSetupTask": { # A task which initializes part of a streaming Dataflow job. # Additional information for StreamingSetupTask WorkItems. |
| "workerHarnessPort": 42, # The TCP port used by the worker to communicate with the Dataflow |
| # worker harness. |
| "drain": True or False, # The user has requested drain. |
| "streamingComputationTopology": { # Global topology of the streaming Dataflow job, including all # The global topology of the streaming Dataflow job. |
| # computations and their sharded locations. |
| "computations": [ # The computations associated with a streaming Dataflow job. |
| { # All configuration data for a particular Computation. |
| "inputs": [ # The inputs to the computation. |
| { # Describes a stream of data, either as input to be processed or as |
| # output of a streaming Dataflow job. |
| "streamingStageLocation": { # Identifies the location of a streaming computation stage, for # The stream is part of another computation within the current |
| # streaming Dataflow job. |
| # stage-to-stage communication. |
| "streamId": "A String", # Identifies the particular stream within the streaming Dataflow |
| # job. |
| }, |
| "pubsubLocation": { # Identifies a pubsub location to use for transferring data into or # The stream is a pubsub stream. |
| # out of a streaming Dataflow job. |
| "idLabel": "A String", # If set, contains a pubsub label from which to extract record ids. |
| # If left empty, record deduplication will be strictly best effort. |
| "timestampLabel": "A String", # If set, contains a pubsub label from which to extract record timestamps. |
| # If left empty, record timestamps will be generated upon arrival. |
| "dropLateData": True or False, # Indicates whether the pipeline allows late-arriving data. |
| "topic": "A String", # A pubsub topic, in the form of |
| # "pubsub.googleapis.com/topics/<project-id>/<topic-name>" |
| "trackingSubscription": "A String", # If set, specifies the pubsub subscription that will be used for tracking |
| # custom time timestamps for watermark estimation. |
| "withAttributes": True or False, # If true, then the client has requested to get pubsub attributes. |
| "subscription": "A String", # A pubsub subscription, in the form of |
| # "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>" |
| }, |
| "customSourceLocation": { # Identifies the location of a custom souce. # The stream is a custom source. |
| "stateful": True or False, # Whether this source is stateful. |
| }, |
| "sideInputLocation": { # Identifies the location of a streaming side input. # The stream is a streaming side input. |
| "stateFamily": "A String", # Identifies the state family where this side input is stored. |
| "tag": "A String", # Identifies the particular side input within the streaming Dataflow job. |
| }, |
| }, |
| ], |
| "outputs": [ # The outputs from the computation. |
| { # Describes a stream of data, either as input to be processed or as |
| # output of a streaming Dataflow job. |
| "streamingStageLocation": { # Identifies the location of a streaming computation stage, for # The stream is part of another computation within the current |
| # streaming Dataflow job. |
| # stage-to-stage communication. |
| "streamId": "A String", # Identifies the particular stream within the streaming Dataflow |
| # job. |
| }, |
| "pubsubLocation": { # Identifies a pubsub location to use for transferring data into or # The stream is a pubsub stream. |
| # out of a streaming Dataflow job. |
| "idLabel": "A String", # If set, contains a pubsub label from which to extract record ids. |
| # If left empty, record deduplication will be strictly best effort. |
| "timestampLabel": "A String", # If set, contains a pubsub label from which to extract record timestamps. |
| # If left empty, record timestamps will be generated upon arrival. |
| "dropLateData": True or False, # Indicates whether the pipeline allows late-arriving data. |
| "topic": "A String", # A pubsub topic, in the form of |
| # "pubsub.googleapis.com/topics/<project-id>/<topic-name>" |
| "trackingSubscription": "A String", # If set, specifies the pubsub subscription that will be used for tracking |
| # custom time timestamps for watermark estimation. |
| "withAttributes": True or False, # If true, then the client has requested to get pubsub attributes. |
| "subscription": "A String", # A pubsub subscription, in the form of |
| # "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>" |
| }, |
| "customSourceLocation": { # Identifies the location of a custom souce. # The stream is a custom source. |
| "stateful": True or False, # Whether this source is stateful. |
| }, |
| "sideInputLocation": { # Identifies the location of a streaming side input. # The stream is a streaming side input. |
| "stateFamily": "A String", # Identifies the state family where this side input is stored. |
| "tag": "A String", # Identifies the particular side input within the streaming Dataflow job. |
| }, |
| }, |
| ], |
| "keyRanges": [ # The key ranges processed by the computation. |
| { # Location information for a specific key-range of a sharded computation. |
| # Currently we only support UTF-8 character splits to simplify encoding into |
| # JSON. |
| "deprecatedPersistentDirectory": "A String", # DEPRECATED. The location of the persistent state for this range, as a |
| # persistent directory in the worker local filesystem. |
| "start": "A String", # The start (inclusive) of the key range. |
| "deliveryEndpoint": "A String", # The physical location of this range assignment to be used for |
| # streaming computation cross-worker message delivery. |
| "end": "A String", # The end (exclusive) of the key range. |
| "dataDisk": "A String", # The name of the data disk where data for this range is stored. |
| # This name is local to the Google Cloud Platform project and uniquely |
| # identifies the disk within that project, for example |
| # "myproject-1014-104817-4c2-harness-0-disk-1". |
| }, |
| ], |
| "computationId": "A String", # The ID of the computation. |
| "systemStageName": "A String", # The system stage name. |
| "stateFamilies": [ # The state family values. |
| { # State family configuration. |
| "stateFamily": "A String", # The state family value. |
| "isRead": True or False, # If true, this family corresponds to a read operation. |
| }, |
| ], |
| }, |
| ], |
| "dataDiskAssignments": [ # The disks assigned to a streaming Dataflow job. |
| { # Data disk assignment for a given VM instance. |
| "vmInstance": "A String", # VM instance name the data disks mounted to, for example |
| # "myproject-1014-104817-4c2-harness-0". |
| "dataDisks": [ # Mounted data disks. The order is important a data disk's 0-based index in |
| # this list defines which persistent directory the disk is mounted to, for |
| # example the list of { "myproject-1014-104817-4c2-harness-0-disk-0" }, |
| # { "myproject-1014-104817-4c2-harness-0-disk-1" }. |
| "A String", |
| ], |
| }, |
| ], |
| "forwardingKeyBits": 42, # The size (in bits) of keys that will be assigned to source messages. |
| "userStageToComputationNameMap": { # Maps user stage names to stable computation names. |
| "a_key": "A String", |
| }, |
| "persistentStateVersion": 42, # Version number for persistent state. |
| }, |
| "receiveWorkPort": 42, # The TCP port on which the worker should listen for messages from |
| # other streaming computation workers. |
| }, |
| "id": "A String", # Identifies this WorkItem. |
| "streamingConfigTask": { # A task that carries configuration information for streaming computations. # Additional information for StreamingConfigTask WorkItems. |
| "userStepToStateFamilyNameMap": { # Map from user step names to state families. |
| "a_key": "A String", |
| }, |
| "windmillServicePort": "A String", # If present, the worker must use this port to communicate with Windmill |
| # Service dispatchers. Only applicable when windmill_service_endpoint is |
| # specified. |
| "streamingComputationConfigs": [ # Set of computation configuration information. |
| { # Configuration information for a single streaming computation. |
| "computationId": "A String", # Unique identifier for this computation. |
| "systemName": "A String", # System defined name for this computation. |
| "stageName": "A String", # Stage name of this computation. |
| "instructions": [ # Instructions that comprise the computation. |
| { # Describes a particular operation comprising a MapTask. |
| "name": "A String", # User-provided name of this operation. |
| "read": { # An instruction that reads records. # Additional information for Read instructions. |
| # Takes no inputs, produces one output. |
| "source": { # A source that records can be read and decoded from. # The source to read from. |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| }, |
| "outputs": [ # Describes the outputs of the instruction. |
| { # An output of an instruction. |
| "name": "A String", # The user-provided name of this output. |
| "onlyCountKeyBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions |
| # should only report the key size. |
| "codec": { # The codec to use to encode data being written via this output. |
| "a_key": "", # Properties of the object. |
| }, |
| "systemName": "A String", # System-defined name of this output. |
| # Unique across the workflow. |
| "originalName": "A String", # System-defined name for this output in the original workflow graph. |
| # Outputs that do not contribute to an original instruction do not set this. |
| "onlyCountValueBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions |
| # should only report the value size. |
| }, |
| ], |
| "partialGroupByKey": { # An instruction that does a partial group-by-key. # Additional information for PartialGroupByKey instructions. |
| # One input and one output. |
| "sideInputs": [ # Zero or more side inputs. |
| { # Information about a side input of a DoFn or an input of a SeqDoFn. |
| "sources": [ # The source(s) to read element(s) from to get the value of this side input. |
| # If more than one source, then the elements are taken from the |
| # sources, in the specified order if order matters. |
| # At least one source is required. |
| { # A source that records can be read and decoded from. |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| ], |
| "kind": { # How to interpret the source element(s) as a side input value. |
| "a_key": "", # Properties of the object. |
| }, |
| "tag": "A String", # The id of the tag the user code will access this side input by; |
| # this should correspond to the tag of some MultiOutputInfo. |
| }, |
| ], |
| "inputElementCodec": { # The codec to use for interpreting an element in the input PTable. |
| "a_key": "", # Properties of the object. |
| }, |
| "originalCombineValuesStepName": "A String", # If this instruction includes a combining function, this is the name of the |
| # CombineValues instruction lifted into this instruction. |
| "valueCombiningFn": { # The value combining function to invoke. |
| "a_key": "", # Properties of the object. |
| }, |
| "input": { # An input of an instruction, as a reference to an output of a # Describes the input to the partial group-by-key instruction. |
| # producer instruction. |
| "outputNum": 42, # The output index (origin zero) within the producer. |
| "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces |
| # the output to be consumed by this input. This index is relative |
| # to the list of instructions in this input's instruction's |
| # containing MapTask. |
| }, |
| "originalCombineValuesInputStoreName": "A String", # If this instruction includes a combining function this is the name of the |
| # intermediate store between the GBK and the CombineValues. |
| }, |
| "write": { # An instruction that writes records. # Additional information for Write instructions. |
| # Takes one input, produces no outputs. |
| "input": { # An input of an instruction, as a reference to an output of a # The input. |
| # producer instruction. |
| "outputNum": 42, # The output index (origin zero) within the producer. |
| "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces |
| # the output to be consumed by this input. This index is relative |
| # to the list of instructions in this input's instruction's |
| # containing MapTask. |
| }, |
| "sink": { # A sink that records can be encoded and written to. # The sink to write to. |
| "codec": { # The codec to use to encode data written to the sink. |
| "a_key": "", # Properties of the object. |
| }, |
| "spec": { # The sink to write to, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| }, |
| }, |
| "systemName": "A String", # System-defined name of this operation. |
| # Unique across the workflow. |
| "flatten": { # An instruction that copies its inputs (zero or more) to its (single) output. # Additional information for Flatten instructions. |
| "inputs": [ # Describes the inputs to the flatten instruction. |
| { # An input of an instruction, as a reference to an output of a |
| # producer instruction. |
| "outputNum": 42, # The output index (origin zero) within the producer. |
| "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces |
| # the output to be consumed by this input. This index is relative |
| # to the list of instructions in this input's instruction's |
| # containing MapTask. |
| }, |
| ], |
| }, |
| "originalName": "A String", # System-defined name for the operation in the original workflow graph. |
| "parDo": { # An instruction that does a ParDo operation. # Additional information for ParDo instructions. |
| # Takes one main input and zero or more side inputs, and produces |
| # zero or more outputs. |
| # Runs user code. |
| "sideInputs": [ # Zero or more side inputs. |
| { # Information about a side input of a DoFn or an input of a SeqDoFn. |
| "sources": [ # The source(s) to read element(s) from to get the value of this side input. |
| # If more than one source, then the elements are taken from the |
| # sources, in the specified order if order matters. |
| # At least one source is required. |
| { # A source that records can be read and decoded from. |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| ], |
| "kind": { # How to interpret the source element(s) as a side input value. |
| "a_key": "", # Properties of the object. |
| }, |
| "tag": "A String", # The id of the tag the user code will access this side input by; |
| # this should correspond to the tag of some MultiOutputInfo. |
| }, |
| ], |
| "input": { # An input of an instruction, as a reference to an output of a # The input. |
| # producer instruction. |
| "outputNum": 42, # The output index (origin zero) within the producer. |
| "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces |
| # the output to be consumed by this input. This index is relative |
| # to the list of instructions in this input's instruction's |
| # containing MapTask. |
| }, |
| "multiOutputInfos": [ # Information about each of the outputs, if user_fn is a MultiDoFn. |
| { # Information about an output of a multi-output DoFn. |
| "tag": "A String", # The id of the tag the user code will emit to this output by; this |
| # should correspond to the tag of some SideInputInfo. |
| }, |
| ], |
| "numOutputs": 42, # The number of outputs. |
| "userFn": { # The user function to invoke. |
| "a_key": "", # Properties of the object. |
| }, |
| }, |
| }, |
| ], |
| }, |
| ], |
| "windmillServiceEndpoint": "A String", # If present, the worker must use this endpoint to communicate with Windmill |
| # Service dispatchers, otherwise the worker must continue to use whatever |
| # endpoint it had been using. |
| }, |
| "packages": [ # Any required packages that need to be fetched in order to execute |
| # this WorkItem. |
| { # The packages that must be installed in order for a worker to run the |
| # steps of the Cloud Dataflow job that will be assigned to its worker |
| # pool. |
| # |
| # This is the mechanism by which the Cloud Dataflow SDK causes code to |
| # be loaded onto the workers. For example, the Cloud Dataflow Java SDK |
| # might use this to install jars containing the user's code and all of the |
| # various dependencies (libraries, data files, etc.) required in order |
| # for that code to run. |
| "location": "A String", # The resource to read the package from. The supported resource type is: |
| # |
| # Google Cloud Storage: |
| # |
| # storage.googleapis.com/{bucket} |
| # bucket.storage.googleapis.com/ |
| "name": "A String", # The name of the package. |
| }, |
| ], |
| "shellTask": { # A task which consists of a shell command for the worker to execute. # Additional information for ShellTask WorkItems. |
| "command": "A String", # The shell command to run. |
| "exitCode": 42, # Exit code for the task. |
| }, |
| "streamingComputationTask": { # A task which describes what action should be performed for the specified # Additional information for StreamingComputationTask WorkItems. |
| # streaming computation ranges. |
| "taskType": "A String", # A type of streaming computation task. |
| "computationRanges": [ # Contains ranges of a streaming computation this task should apply to. |
| { # Describes full or partial data disk assignment information of the computation |
| # ranges. |
| "computationId": "A String", # The ID of the computation. |
| "rangeAssignments": [ # Data disk assignments for ranges from this computation. |
| { # Data disk assignment information for a specific key-range of a sharded |
| # computation. |
| # Currently we only support UTF-8 character splits to simplify encoding into |
| # JSON. |
| "start": "A String", # The start (inclusive) of the key range. |
| "end": "A String", # The end (exclusive) of the key range. |
| "dataDisk": "A String", # The name of the data disk where data for this range is stored. |
| # This name is local to the Google Cloud Platform project and uniquely |
| # identifies the disk within that project, for example |
| # "myproject-1014-104817-4c2-harness-0-disk-1". |
| }, |
| ], |
| }, |
| ], |
| "dataDisks": [ # Describes the set of data disks this task should apply to. |
| { # Describes mounted data disk. |
| "dataDisk": "A String", # The name of the data disk. |
| # This name is local to the Google Cloud Platform project and uniquely |
| # identifies the disk within that project, for example |
| # "myproject-1014-104817-4c2-harness-0-disk-1". |
| }, |
| ], |
| }, |
| }, |
| ], |
| }</pre> |
| </div> |
| |
| <div class="method"> |
| <code class="details" id="reportStatus">reportStatus(projectId, jobId, body, x__xgafv=None)</code> |
| <pre>Reports the status of dataflow WorkItems leased by a worker. |
| |
| Args: |
| projectId: string, The project which owns the WorkItem's job. (required) |
| jobId: string, The job which the WorkItem is part of. (required) |
| body: object, The request body. (required) |
| The object takes the form of: |
| |
| { # Request to report the status of WorkItems. |
| "workerId": "A String", # The ID of the worker reporting the WorkItem status. If this |
| # does not match the ID of the worker which the Dataflow service |
| # believes currently has the lease on the WorkItem, the report |
| # will be dropped (with an error response). |
| "currentWorkerTime": "A String", # The current timestamp at the worker. |
| "workItemStatuses": [ # The order is unimportant, except that the order of the |
| # WorkItemServiceState messages in the ReportWorkItemStatusResponse |
| # corresponds to the order of WorkItemStatus messages here. |
| { # Conveys a worker's progress through the work described by a WorkItem. |
| "reportIndex": "A String", # The report index. When a WorkItem is leased, the lease will |
| # contain an initial report index. When a WorkItem's status is |
| # reported to the system, the report should be sent with |
| # that report index, and the response will contain the index the |
| # worker should use for the next report. Reports received with |
| # unexpected index values will be rejected by the service. |
| # |
| # In order to preserve idempotency, the worker should not alter the |
| # contents of a report, even if the worker must submit the same |
| # report multiple times before getting back a response. The worker |
| # should not submit a subsequent report until the response for the |
| # previous report had been received from the service. |
| "errors": [ # Specifies errors which occurred during processing. If errors are |
| # provided, and completed = true, then the WorkItem is considered |
| # to have failed. |
| { # The `Status` type defines a logical error model that is suitable for different |
| # programming environments, including REST APIs and RPC APIs. It is used by |
| # [gRPC](https://github.com/grpc). The error model is designed to be: |
| # |
| # - Simple to use and understand for most users |
| # - Flexible enough to meet unexpected needs |
| # |
| # # Overview |
| # |
| # The `Status` message contains three pieces of data: error code, error message, |
| # and error details. The error code should be an enum value of |
| # google.rpc.Code, but it may accept additional error codes if needed. The |
| # error message should be a developer-facing English message that helps |
| # developers *understand* and *resolve* the error. If a localized user-facing |
| # error message is needed, put the localized message in the error details or |
| # localize it in the client. The optional error details may contain arbitrary |
| # information about the error. There is a predefined set of error detail types |
| # in the package `google.rpc` that can be used for common error conditions. |
| # |
| # # Language mapping |
| # |
| # The `Status` message is the logical representation of the error model, but it |
| # is not necessarily the actual wire format. When the `Status` message is |
| # exposed in different client libraries and different wire protocols, it can be |
| # mapped differently. For example, it will likely be mapped to some exceptions |
| # in Java, but more likely mapped to some error codes in C. |
| # |
| # # Other uses |
| # |
| # The error model and the `Status` message can be used in a variety of |
| # environments, either with or without APIs, to provide a |
| # consistent developer experience across different environments. |
| # |
| # Example uses of this error model include: |
| # |
| # - Partial errors. If a service needs to return partial errors to the client, |
| # it may embed the `Status` in the normal response to indicate the partial |
| # errors. |
| # |
| # - Workflow errors. A typical workflow has multiple steps. Each step may |
| # have a `Status` message for error reporting. |
| # |
| # - Batch operations. If a client uses batch request and batch response, the |
| # `Status` message should be used directly inside batch response, one for |
| # each error sub-response. |
| # |
| # - Asynchronous operations. If an API call embeds asynchronous operation |
| # results in its response, the status of those operations should be |
| # represented directly using the `Status` message. |
| # |
| # - Logging. If some API errors are stored in logs, the message `Status` could |
| # be used directly after any stripping needed for security/privacy reasons. |
| "message": "A String", # A developer-facing error message, which should be in English. Any |
| # user-facing error message should be localized and sent in the |
| # google.rpc.Status.details field, or localized by the client. |
| "code": 42, # The status code, which should be an enum value of google.rpc.Code. |
| "details": [ # A list of messages that carry the error details. There will be a |
| # common set of message types for APIs to use. |
| { |
| "a_key": "", # Properties of the object. Contains field @type with type URL. |
| }, |
| ], |
| }, |
| ], |
| "sourceOperationResponse": { # The result of a SourceOperationRequest, specified in # If the work item represented a SourceOperationRequest, and the work |
| # is completed, contains the result of the operation. |
| # ReportWorkItemStatusRequest.source_operation when the work item |
| # is completed. |
| "getMetadata": { # The result of a SourceGetMetadataOperation. # A response to a request to get metadata about a source. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # The computed metadata. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| "split": { # The response to a SourceSplitRequest. # A response to a request to split a source. |
| "outcome": "A String", # Indicates whether splitting happened and produced a list of bundles. |
| # If this is USE_CURRENT_SOURCE_AS_IS, the current source should |
| # be processed "as is" without splitting. "bundles" is ignored in this case. |
| # If this is SPLITTING_HAPPENED, then "bundles" contains a list of |
| # bundles into which the source was split. |
| "bundles": [ # If outcome is SPLITTING_HAPPENED, then this is a list of bundles |
| # into which the source was split. Otherwise this field is ignored. |
| # This list can be empty, which means the source represents an empty input. |
| { # Specification of one of the bundles produced as a result of splitting |
| # a Source (e.g. when executing a SourceSplitRequest, or when |
| # splitting an active task using WorkItemStatus.dynamic_source_split), |
| # relative to the source being split. |
| "derivationMode": "A String", # What source to base the produced source on (if any). |
| "source": { # A source that records can be read and decoded from. # Specification of the source. |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| }, |
| ], |
| "shards": [ # DEPRECATED in favor of bundles. |
| { # DEPRECATED in favor of DerivedSource. |
| "derivationMode": "A String", # DEPRECATED |
| "source": { # A source that records can be read and decoded from. # DEPRECATED |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| }, |
| ], |
| }, |
| }, |
| "stopPosition": { # Position defines a position within a collection of data. The value # A worker may split an active map task in two parts, "primary" and |
| # "residual", continuing to process the primary part and returning the |
| # residual part into the pool of available work. |
| # This event is called a "dynamic split" and is critical to the dynamic |
| # work rebalancing feature. The two obtained sub-tasks are called |
| # "parts" of the split. |
| # The parts, if concatenated, must represent the same input as would |
| # be read by the current task if the split did not happen. |
| # The exact way in which the original task is decomposed into the two |
| # parts is specified either as a position demarcating them |
| # (stop_position), or explicitly as two DerivedSources, if this |
| # task consumes a user-defined source type (dynamic_source_split). |
| # |
| # The "current" task is adjusted as a result of the split: after a task |
| # with range [A, B) sends a stop_position update at C, its range is |
| # considered to be [A, C), e.g.: |
| # * Progress should be interpreted relative to the new range, e.g. |
| # "75% completed" means "75% of [A, C) completed" |
| # * The worker should interpret proposed_stop_position relative to the |
| # new range, e.g. "split at 68%" should be interpreted as |
| # "split at 68% of [A, C)". |
| # * If the worker chooses to split again using stop_position, only |
| # stop_positions in [A, C) will be accepted. |
| # * Etc. |
| # dynamic_source_split has similar semantics: e.g., if a task with |
| # source S splits using dynamic_source_split into {P, R} |
| # (where P and R must be together equivalent to S), then subsequent |
| # progress and proposed_stop_position should be interpreted relative |
| # to P, and in a potential subsequent dynamic_source_split into {P', R'}, |
| # P' and R' must be together equivalent to P, etc. |
| # can be either the end position, a key (used with ordered |
| # collections), a byte offset, or a record index. |
| "end": True or False, # Position is past all other positions. Also useful for the end |
| # position of an unbounded range. |
| "recordIndex": "A String", # Position is a record index. |
| "byteOffset": "A String", # Position is a byte offset. |
| "key": "A String", # Position is a string key, ordered lexicographically. |
| "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. |
| # position. A ConcatPosition can be used by a reader of a source that |
| # encapsulates a set of other sources. |
| "position": # Object with schema name: Position # Position within the inner source. |
| "index": 42, # Index of the inner source. |
| }, |
| "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED |
| # sharding). |
| }, |
| "sourceFork": { # DEPRECATED in favor of DynamicSourceSplit. # DEPRECATED in favor of dynamic_source_split. |
| "residualSource": { # Specification of one of the bundles produced as a result of splitting # DEPRECATED |
| # a Source (e.g. when executing a SourceSplitRequest, or when |
| # splitting an active task using WorkItemStatus.dynamic_source_split), |
| # relative to the source being split. |
| "derivationMode": "A String", # What source to base the produced source on (if any). |
| "source": { # A source that records can be read and decoded from. # Specification of the source. |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| }, |
| "primarySource": { # Specification of one of the bundles produced as a result of splitting # DEPRECATED |
| # a Source (e.g. when executing a SourceSplitRequest, or when |
| # splitting an active task using WorkItemStatus.dynamic_source_split), |
| # relative to the source being split. |
| "derivationMode": "A String", # What source to base the produced source on (if any). |
| "source": { # A source that records can be read and decoded from. # Specification of the source. |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| }, |
| "residual": { # DEPRECATED in favor of DerivedSource. # DEPRECATED |
| "derivationMode": "A String", # DEPRECATED |
| "source": { # A source that records can be read and decoded from. # DEPRECATED |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| }, |
| "primary": { # DEPRECATED in favor of DerivedSource. # DEPRECATED |
| "derivationMode": "A String", # DEPRECATED |
| "source": { # A source that records can be read and decoded from. # DEPRECATED |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| }, |
| }, |
| "requestedLeaseDuration": "A String", # Amount of time the worker requests for its lease. |
| "completed": True or False, # True if the WorkItem was completed (successfully or unsuccessfully). |
| "workItemId": "A String", # Identifies the WorkItem. |
| "dynamicSourceSplit": { # When a task splits using WorkItemStatus.dynamic_source_split, this # See documentation of stop_position. |
| # message describes the two parts of the split relative to the |
| # description of the current task's input. |
| "residual": { # Specification of one of the bundles produced as a result of splitting # Residual part (returned to the pool of work). |
| # Specified relative to the previously-current source. |
| # a Source (e.g. when executing a SourceSplitRequest, or when |
| # splitting an active task using WorkItemStatus.dynamic_source_split), |
| # relative to the source being split. |
| "derivationMode": "A String", # What source to base the produced source on (if any). |
| "source": { # A source that records can be read and decoded from. # Specification of the source. |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| }, |
| "primary": { # Specification of one of the bundles produced as a result of splitting # Primary part (continued to be processed by worker). |
| # Specified relative to the previously-current source. |
| # Becomes current. |
| # a Source (e.g. when executing a SourceSplitRequest, or when |
| # splitting an active task using WorkItemStatus.dynamic_source_split), |
| # relative to the source being split. |
| "derivationMode": "A String", # What source to base the produced source on (if any). |
| "source": { # A source that records can be read and decoded from. # Specification of the source. |
| "codec": { # The codec to use to decode data read from the source. |
| "a_key": "", # Properties of the object. |
| }, |
| "baseSpecs": [ # While splitting, sources may specify the produced bundles |
| # as differences against another source, in order to save backend-side |
| # memory and allow bigger jobs. For details, see SourceSplitRequest. |
| # To support this use case, the full set of parameters of the source |
| # is logically obtained by taking the latest explicitly specified value |
| # of each parameter in the order: |
| # base_specs (later items win), spec (overrides anything in base_specs). |
| { |
| "a_key": "", # Properties of the object. |
| }, |
| ], |
| "spec": { # The source to read from, plus its parameters. |
| "a_key": "", # Properties of the object. |
| }, |
| "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source |
| # doesn't need splitting, and using SourceSplitRequest on it would |
| # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. |
| # |
| # E.g. a file splitter may set this to true when splitting a single file |
| # into a set of byte ranges of appropriate size, and set this |
| # to false when splitting a filepattern into individual files. |
| # However, for efficiency, a file splitter may decide to produce |
| # file subranges directly from the filepattern to avoid a splitting |
| # round-trip. |
| # |
| # See SourceSplitRequest for an overview of the splitting process. |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, |
| # avoiding a SourceGetMetadataOperation roundtrip |
| # (see SourceOperationRequest). |
| # |
| # This field is meaningful only in the Source objects populated |
| # by the user (e.g. when filling in a DerivedSource). |
| # Source objects supplied by the framework to the user don't have |
| # this field populated. |
| # and tuning the pipeline, etc. |
| "infinite": True or False, # Specifies that the size of this source is known to be infinite |
| # (this is a streaming source). |
| "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be |
| # read from this source. This estimate is in terms of external storage |
| # size, before any decompression or other processing done by the reader. |
| "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with |
| # the (encoded) keys in lexicographically sorted order. |
| }, |
| }, |
| }, |
| }, |
| "counterUpdates": [ # Worker output counters for this WorkItem. |
| { # An update to a Counter sent from a worker. |
| "floatingPointList": { # A metric value representing a list of floating point numbers. # List of floating point numbers, for Set. |
| "elements": [ # Elements of the list. |
| 3.14, |
| ], |
| }, |
| "floatingPoint": 3.14, # Floating point value for Sum, Max, Min. |
| "integerMean": { # A representation of an integer mean metric contribution. # Integer mean aggregation value for Mean. |
| "count": { # A representation of an int64, n, that is immune to precision loss when # The number of values being aggregated. |
| # encoded in JSON. |
| "lowBits": 42, # The low order bits: n & 0xffffffff. |
| "highBits": 42, # The high order bits, including the sign: n >> 32. |
| }, |
| "sum": { # A representation of an int64, n, that is immune to precision loss when # The sum of all values being aggregated. |
| # encoded in JSON. |
| "lowBits": 42, # The low order bits: n & 0xffffffff. |
| "highBits": 42, # The high order bits, including the sign: n >> 32. |
| }, |
| }, |
| "boolean": True or False, # Boolean value for And, Or. |
| "integerList": { # A metric value representing a list of integers. # List of integers, for Set. |
| "elements": [ # Elements of the list. |
| { # A representation of an int64, n, that is immune to precision loss when |
| # encoded in JSON. |
| "lowBits": 42, # The low order bits: n & 0xffffffff. |
| "highBits": 42, # The high order bits, including the sign: n >> 32. |
| }, |
| ], |
| }, |
| "cumulative": True or False, # True if this counter is reported as the total cumulative aggregate |
| # value accumulated since the worker started working on this WorkItem. |
| # By default this is false, indicating that this counter is reported |
| # as a delta. |
| "shortId": "A String", # The service-generated short identifier for this counter. |
| # The short_id -> (name, metadata) mapping is constant for the lifetime of |
| # a job. |
| "floatingPointMean": { # A representation of a floating point mean metric contribution. # Floating point mean aggregation value for Mean. |
| "count": { # A representation of an int64, n, that is immune to precision loss when # The number of values being aggregated. |
| # encoded in JSON. |
| "lowBits": 42, # The low order bits: n & 0xffffffff. |
| "highBits": 42, # The high order bits, including the sign: n >> 32. |
| }, |
| "sum": 3.14, # The sum of all values being aggregated. |
| }, |
| "internal": "", # Value for internally-defined counters used by the Dataflow service. |
| "structuredNameAndMetadata": { # A single message which encapsulates structured name and metadata for a given # Counter structured name and metadata. |
| # counter. |
| "name": { # Identifies a counter within a per-job namespace. Counters whose structured # Structured name of the counter. |
| # names are the same get merged into a single value for the job. |
| "origin": "A String", # One of the standard Origins defined above. |
| "executionStepName": "A String", # Name of the stage. An execution step contains multiple component steps. |
| "name": "A String", # Counter name. Not necessarily globally-unique, but unique within the |
| # context of the other fields. |
| # Required. |
| "workerId": "A String", # ID of a particular worker. |
| "originalStepName": "A String", # System generated name of the original step in the user's graph, before |
| # optimization. |
| "originNamespace": "A String", # A string containing a more specific namespace of the counter's origin. |
| "portion": "A String", # Portion of this counter, either key or value. |
| "componentStepName": "A String", # Name of the optimized step being executed by the workers. |
| }, |
| "metadata": { # CounterMetadata includes all static non-name non-value counter attributes. # Metadata associated with a counter |
| "standardUnits": "A String", # System defined Units, see above enum. |
| "kind": "A String", # Counter aggregation kind. |
| "otherUnits": "A String", # A string referring to the unit type. |
| "description": "A String", # Human-readable description of the counter semantics. |
| }, |
| }, |
| "nameAndKind": { # Basic metadata about a counter. # Counter name and aggregation type. |
| "kind": "A String", # Counter aggregation kind. |
| "name": "A String", # Name of the counter. |
| }, |
| "integer": { # A representation of an int64, n, that is immune to precision loss when # Integer value for Sum, Max, Min. |
| # encoded in JSON. |
| "lowBits": 42, # The low order bits: n & 0xffffffff. |
| "highBits": 42, # The high order bits, including the sign: n >> 32. |
| }, |
| "distribution": { # A metric value representing a distribution. # Distribution data |
| "count": { # A representation of an int64, n, that is immune to precision loss when # The count of the number of elements present in the distribution. |
| # encoded in JSON. |
| "lowBits": 42, # The low order bits: n & 0xffffffff. |
| "highBits": 42, # The high order bits, including the sign: n >> 32. |
| }, |
| "min": { # A representation of an int64, n, that is immune to precision loss when # The minimum value present in the distribution. |
| # encoded in JSON. |
| "lowBits": 42, # The low order bits: n & 0xffffffff. |
| "highBits": 42, # The high order bits, including the sign: n >> 32. |
| }, |
| "max": { # A representation of an int64, n, that is immune to precision loss when # The maximum value present in the distribution. |
| # encoded in JSON. |
| "lowBits": 42, # The low order bits: n & 0xffffffff. |
| "highBits": 42, # The high order bits, including the sign: n >> 32. |
| }, |
| "sum": { # A representation of an int64, n, that is immune to precision loss when # Use an int64 since we'd prefer the added precision. If overflow is a common |
| # problem we can detect it and use an additional int64 or a double. |
| # encoded in JSON. |
| "lowBits": 42, # The low order bits: n & 0xffffffff. |
| "highBits": 42, # The high order bits, including the sign: n >> 32. |
| }, |
| "sumOfSquares": 3.14, # Use a double since the sum of squares is likely to overflow int64. |
| "logBuckets": [ # (Optional) Logarithmic histogram of values. |
| # Each log may be in no more than one bucket. Order does not matter. |
| { # Bucket of values for Distribution's logarithmic histogram. |
| "count": "A String", # Number of values in this bucket. |
| "log": 42, # floor(log2(value)); defined to be zero for nonpositive values. |
| # log(-1) = 0 |
| # log(0) = 0 |
| # log(1) = 0 |
| # log(2) = 1 |
| # log(3) = 1 |
| # log(4) = 2 |
| # log(5) = 2 |
| }, |
| ], |
| }, |
| "stringList": { # A metric value representing a list of strings. # List of strings, for Set. |
| "elements": [ # Elements of the list. |
| "A String", |
| ], |
| }, |
| }, |
| ], |
| "progress": { # Obsolete in favor of ApproximateReportedProgress and ApproximateSplitRequest. # DEPRECATED in favor of reported_progress. |
| "position": { # Position defines a position within a collection of data. The value # Obsolete. |
| # can be either the end position, a key (used with ordered |
| # collections), a byte offset, or a record index. |
| "end": True or False, # Position is past all other positions. Also useful for the end |
| # position of an unbounded range. |
| "recordIndex": "A String", # Position is a record index. |
| "byteOffset": "A String", # Position is a byte offset. |
| "key": "A String", # Position is a string key, ordered lexicographically. |
| "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. |
| # position. A ConcatPosition can be used by a reader of a source that |
| # encapsulates a set of other sources. |
| "position": # Object with schema name: Position # Position within the inner source. |
| "index": 42, # Index of the inner source. |
| }, |
| "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED |
| # sharding). |
| }, |
| "remainingTime": "A String", # Obsolete. |
| "percentComplete": 3.14, # Obsolete. |
| }, |
| "metricUpdates": [ # DEPRECATED in favor of counter_updates. |
| { # Describes the state of a metric. |
| "meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind. |
| # This holds the count of the aggregated values and is used in combination |
| # with mean_sum above to obtain the actual mean aggregate value. |
| # The only possible value type is Long. |
| "updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are |
| # reporting work progress; it will be filled in responses from the |
| # metrics API. |
| "set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only |
| # possible value type is a list of Values whose type can be Long, Double, |
| # or String, according to the metric's type. All Values in the list must |
| # be of the same type. |
| "name": { # Identifies a metric, by describing the source which generated the # Name of the metric. |
| # metric. |
| "origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics; |
| # will be "dataflow" for metrics defined by the Dataflow service or SDK. |
| "name": "A String", # Worker-defined metric name. |
| "context": { # Zero or more labeled fields which identify the part of the job this |
| # metric is associated with, such as the name of a step or collection. |
| # |
| # For example, built-in counters associated with steps will have |
| # context['step'] = <step-name>. Counters associated with PCollections |
| # in the SDK will have context['pcollection'] = <pcollection-name>. |
| "a_key": "A String", |
| }, |
| }, |
| "cumulative": True or False, # True if this metric is reported as the total cumulative aggregate |
| # value accumulated since the worker started working on this WorkItem. |
| # By default this is false, indicating that this metric is reported |
| # as a delta that is not associated with any WorkItem. |
| "kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are |
| # "Sum", "Max", "Min", "Mean", "Set", "And", "Or", and "Distribution". |
| # The specified aggregation kind is case-insensitive. |
| # |
| # If omitted, this is not an aggregated value but instead |
| # a single metric sample value. |
| "scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min", |
| # "And", and "Or". The possible value types are Long, Double, and Boolean. |
| "meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind. |
| # This holds the sum of the aggregated values and is used in combination |
| # with mean_count below to obtain the actual mean aggregate value. |
| # The only possible value types are Long and Double. |
| "distribution": "", # A struct value describing properties of a distribution of numeric values. |
| "internal": "", # Worker-computed aggregate value for internal use by the Dataflow |
| # service. |
| }, |
| ], |
| "reportedProgress": { # A progress measurement of a WorkItem by a worker. # The worker's progress through this WorkItem. |
| "fractionConsumed": 3.14, # Completion as fraction of the input consumed, from 0.0 (beginning, nothing |
| # consumed), to 1.0 (end of the input, entire input consumed). |
| "position": { # Position defines a position within a collection of data. The value # A Position within the work to represent a progress. |
| # can be either the end position, a key (used with ordered |
| # collections), a byte offset, or a record index. |
| "end": True or False, # Position is past all other positions. Also useful for the end |
| # position of an unbounded range. |
| "recordIndex": "A String", # Position is a record index. |
| "byteOffset": "A String", # Position is a byte offset. |
| "key": "A String", # Position is a string key, ordered lexicographically. |
| "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. |
| # position. A ConcatPosition can be used by a reader of a source that |
| # encapsulates a set of other sources. |
| "position": # Object with schema name: Position # Position within the inner source. |
| "index": 42, # Index of the inner source. |
| }, |
| "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED |
| # sharding). |
| }, |
| "remainingParallelism": { # Represents the level of parallelism in a WorkItem's input, # Total amount of parallelism in the input of this task that remains, |
| # (i.e. can be delegated to this task and any new tasks via dynamic |
| # splitting). Always at least 1 for non-finished work items and 0 for |
| # finished. |
| # |
| # "Amount of parallelism" refers to how many non-empty parts of the input |
| # can be read in parallel. This does not necessarily equal number |
| # of records. An input that can be read in parallel down to the |
| # individual records is called "perfectly splittable". |
| # An example of non-perfectly parallelizable input is a block-compressed |
| # file format where a block of records has to be read as a whole, |
| # but different blocks can be read in parallel. |
| # |
| # Examples: |
| # * If we are processing record #30 (starting at 1) out of 50 in a perfectly |
| # splittable 50-record input, this value should be 21 (20 remaining + 1 |
| # current). |
| # * If we are reading through block 3 in a block-compressed file consisting |
| # of 5 blocks, this value should be 3 (since blocks 4 and 5 can be |
| # processed in parallel by new tasks via dynamic splitting and the current |
| # task remains processing block 3). |
| # * If we are reading through the last block in a block-compressed file, |
| # or reading or processing the last record in a perfectly splittable |
| # input, this value should be 1, because apart from the current task, no |
| # additional remainder can be split off. |
| # reported by the worker. |
| "isInfinite": True or False, # Specifies whether the parallelism is infinite. If true, "value" is |
| # ignored. |
| # Infinite parallelism means the service will assume that the work item |
| # can always be split into more non-empty work items by dynamic splitting. |
| # This is a work-around for lack of support for infinity by the current |
| # JSON-based Java RPC stack. |
| "value": 3.14, # Specifies the level of parallelism in case it is finite. |
| }, |
| "consumedParallelism": { # Represents the level of parallelism in a WorkItem's input, # Total amount of parallelism in the portion of input of this task that has |
| # already been consumed and is no longer active. In the first two examples |
| # above (see remaining_parallelism), the value should be 29 or 2 |
| # respectively. The sum of remaining_parallelism and consumed_parallelism |
| # should equal the total amount of parallelism in this work item. If |
| # specified, must be finite. |
| # reported by the worker. |
| "isInfinite": True or False, # Specifies whether the parallelism is infinite. If true, "value" is |
| # ignored. |
| # Infinite parallelism means the service will assume that the work item |
| # can always be split into more non-empty work items by dynamic splitting. |
| # This is a work-around for lack of support for infinity by the current |
| # JSON-based Java RPC stack. |
| "value": 3.14, # Specifies the level of parallelism in case it is finite. |
| }, |
| }, |
| }, |
| ], |
| "location": "A String", # The location which contains the WorkItem's job. |
| } |
| |
| x__xgafv: string, V1 error format. |
| Allowed values |
| 1 - v1 error format |
| 2 - v2 error format |
| |
| Returns: |
| An object of the form: |
| |
| { # Response from a request to report the status of WorkItems. |
| "workItemServiceStates": [ # A set of messages indicating the service-side state for each |
| # WorkItem whose status was reported, in the same order as the |
| # WorkItemStatus messages in the ReportWorkItemStatusRequest which |
| # resulting in this response. |
| { # The Dataflow service's idea of the current state of a WorkItem |
| # being processed by a worker. |
| "reportStatusInterval": "A String", # New recommended reporting interval. |
| "suggestedStopPosition": { # Position defines a position within a collection of data. The value # Obsolete, always empty. |
| # can be either the end position, a key (used with ordered |
| # collections), a byte offset, or a record index. |
| "end": True or False, # Position is past all other positions. Also useful for the end |
| # position of an unbounded range. |
| "recordIndex": "A String", # Position is a record index. |
| "byteOffset": "A String", # Position is a byte offset. |
| "key": "A String", # Position is a string key, ordered lexicographically. |
| "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. |
| # position. A ConcatPosition can be used by a reader of a source that |
| # encapsulates a set of other sources. |
| "position": # Object with schema name: Position # Position within the inner source. |
| "index": 42, # Index of the inner source. |
| }, |
| "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED |
| # sharding). |
| }, |
| "harnessData": { # Other data returned by the service, specific to the particular |
| # worker harness. |
| "a_key": "", # Properties of the object. |
| }, |
| "nextReportIndex": "A String", # The index value to use for the next report sent by the worker. |
| # Note: If the report call fails for whatever reason, the worker should |
| # reuse this index for subsequent report attempts. |
| "leaseExpireTime": "A String", # Time at which the current lease will expire. |
| "metricShortId": [ # The short ids that workers should use in subsequent metric updates. |
| # Workers should strive to use short ids whenever possible, but it is ok |
| # to request the short_id again if a worker lost track of it |
| # (e.g. if the worker is recovering from a crash). |
| # NOTE: it is possible that the response may have short ids for a subset |
| # of the metrics. |
| { # The metric short id is returned to the user alongside an offset into |
| # ReportWorkItemStatusRequest |
| "shortId": "A String", # The service-generated short identifier for the metric. |
| "metricIndex": 42, # The index of the corresponding metric in |
| # the ReportWorkItemStatusRequest. Required. |
| }, |
| ], |
| "splitRequest": { # A suggestion by the service to the worker to dynamically split the WorkItem. # The progress point in the WorkItem where the Dataflow service |
| # suggests that the worker truncate the task. |
| "fractionConsumed": 3.14, # A fraction at which to split the work item, from 0.0 (beginning of the |
| # input) to 1.0 (end of the input). |
| "position": { # Position defines a position within a collection of data. The value # A Position at which to split the work item. |
| # can be either the end position, a key (used with ordered |
| # collections), a byte offset, or a record index. |
| "end": True or False, # Position is past all other positions. Also useful for the end |
| # position of an unbounded range. |
| "recordIndex": "A String", # Position is a record index. |
| "byteOffset": "A String", # Position is a byte offset. |
| "key": "A String", # Position is a string key, ordered lexicographically. |
| "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. |
| # position. A ConcatPosition can be used by a reader of a source that |
| # encapsulates a set of other sources. |
| "position": # Object with schema name: Position # Position within the inner source. |
| "index": 42, # Index of the inner source. |
| }, |
| "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED |
| # sharding). |
| }, |
| }, |
| "suggestedStopPoint": { # Obsolete in favor of ApproximateReportedProgress and ApproximateSplitRequest. # DEPRECATED in favor of split_request. |
| "position": { # Position defines a position within a collection of data. The value # Obsolete. |
| # can be either the end position, a key (used with ordered |
| # collections), a byte offset, or a record index. |
| "end": True or False, # Position is past all other positions. Also useful for the end |
| # position of an unbounded range. |
| "recordIndex": "A String", # Position is a record index. |
| "byteOffset": "A String", # Position is a byte offset. |
| "key": "A String", # Position is a string key, ordered lexicographically. |
| "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. |
| # position. A ConcatPosition can be used by a reader of a source that |
| # encapsulates a set of other sources. |
| "position": # Object with schema name: Position # Position within the inner source. |
| "index": 42, # Index of the inner source. |
| }, |
| "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED |
| # sharding). |
| }, |
| "remainingTime": "A String", # Obsolete. |
| "percentComplete": 3.14, # Obsolete. |
| }, |
| }, |
| ], |
| }</pre> |
| </div> |
| |
| </body></html> |