sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java (6 lines): - line 211: // TODO: Find out a generic way for the WorkExecutor to report work-specific results - line 268: // TODO: Look into moving the stack trace thinning - line 272: error.setCode(2); // Code.UNKNOWN. TODO: Replace with a generated definition. - line 273: // TODO: Attach the stack trace as exception details, not to the message. - line 328: // TODO: Implement exactly-once delivery and use deltas, - line 365: // TODO: Provide more structure representation of error, sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java (5 lines): - line 554: // TODO: This should be done via a Structs accessor. - line 688: // TODO: This should be done via a Structs accessor. - line 695: // TODO: This should be done via a Structs accessor. - line 844: // TODO: Put in better element printing: - line 909: // TODO: Allow combiner lifting on the non-default trigger, as appropriate. sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java (4 lines): - line 182: // TODO Implement a more efficient parallel/batch size estimation mechanism for file patterns. - line 205: // TODO: replace this with bulk request API when it is available. Will require updates - line 244: // TODO: Implement a more efficient sampling mechanism. - line 619: // Unsupported. TODO: implement. sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java (3 lines): - line 56: // TODO: How do we want to specify format and - line 92: // TODO: drop this check when server supports alternative templates. - line 107: // TODO: How do we want to specify format and sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java (3 lines): - line 229: *

TODO: provide accessors that take and return {@code WindowedValue}s - line 233: // TODO: Should we return an unmodifiable list? - line 276: // TODO: Should we return an unmodifiable list? sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java (3 lines): - line 272: // TODO: Perhaps this can be implemented by inspecting the query. - line 283: // Fallback in case estimated size is unavailable. TODO: fix this, it's horrible. - line 389: /** For testing only. TODO: This could be much cleaner with dependency injection. */ sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java (3 lines): - line 189: // TODO: strippingNewlines, etc. - line 431: // TODO: appendingNewlines, header, footer, etc. - line 627: // TODO: This would need to be adapted to write per-window shards. sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java (2 lines): - line 69: *

TODO: add support for VM initialization via config. - line 277: // required by the Google API parsing framework. TODO: Fix the framework sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java (2 lines): - line 131: // TODO: Decide the representation of sort-keyed values. - line 223: // TODO: Need to coordinate with the sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java (2 lines): - line 177: // TODO: Consider using the ExecutorService from PipelineOptions instead. - line 302: *

TODO: Remove this hack once we move to gRPC or report this value in a more structured sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java (2 lines): - line 573: // TODO: allow for metrics to propagate on failure when running a streaming pipeline - line 646: // TODO: allow for metrics to propagate on failure when running a streaming pipeline sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java (2 lines): - line 137: // TODO: Put in better element printing: - line 147: // TODO: Put in better encoded byte array printing: sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java (2 lines): - line 159: // TODO: plumb through the operationName, so that we can - line 162: // TODO: plumb through the counter prefix, so we can sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java (2 lines): - line 387: // TODO: support "." and ".." path components? - line 592: // TODO: Consider using resource names for all GCS paths used by the SDK. sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java (2 lines): - line 107: // TODO: Remove filtering here once getJobMessages supports minimum - line 165: // TODO: Allow filtering messages by importance sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java (2 lines): - line 103: // TODO: Don't use the sentinel if context.isWholeStream. - line 195: // TODO: Don't use the sentinel if context.isWholeStream. sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java (2 lines): - line 98: // TODO: Look at better integration of union types with the - line 112: // TODO: Use the schema to order the indices rather than depending sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java (2 lines): - line 86: // TODO: consider adding VECTOR_SUM, HISTOGRAM, KV_SET, PRODUCT, TOP. - line 776: // TODO: Support MIN, MAX of Strings. sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java (1 line): - line 684: // TODO: Support non-String encodings. sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java (1 line): - line 61: // TODO: Need to agree on a format for encoding bytes in sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java (1 line): - line 59: // TODO: Add support for other triggers. sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java (1 line): - line 61: // TODO: make this configurable after removal of option map. sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java (1 line): - line 450: // TODO: Remove this once GroupByKeyOnly no longer outputs elements sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java (1 line): - line 60: *

TODO: Refactor decompressing channel/stream creation and default instances to util classes. sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTracker.java (1 line): - line 114: // TODO: Investigate whether in practice this is useful or, rather, confusing. sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java (1 line): - line 109: // TODO: Replace with the real encoding used by the sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java (1 line): - line 72: // TODO: Consider the following additional API conveniences: sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java (1 line): - line 157: // TODO: Remove setting these options once we have migrated to passing sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java (1 line): - line 86: // TODO: drop this check when server supports alternative templates. sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java (1 line): - line 1103: // TODO: Run multiple shards? sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java (1 line): - line 80: // TODO: Use a more compact and readable representation, sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java (1 line): - line 95: *

TODO: Consider sharing this with FixedWindows, and bring over the equivalent of sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java (1 line): - line 56: // TODO: Add support for other triggers. sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java (1 line): - line 35: // TODO: Instead of an abstract class, strongly consider adding an sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java (1 line): - line 194: // TODO: Do this exclusively for work requests. sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java (1 line): - line 184: // TODO: Should we attempt to detect the Mime type rather than sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java (1 line): - line 39: // TODO: add registration mechanism for adding new schemas. sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java (1 line): - line 35: * TODO: Describe how to properly create {@code WindowFn}s. sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java (1 line): - line 193: // TODO: If we know that no trigger in the tree will ever finish, we don't need to do the sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java (1 line): - line 504: // TODO: Put in better element printing: sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java (1 line): - line 397: // TODO: Actually support window merging in the combiner table. contrib/hadoop/src/main/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSource.java (1 line): - line 234: // TODO: how to use registered coders here? sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java (1 line): - line 83: // TODO: support for success / failure ports? sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java (1 line): - line 870: *

TODO: Swap back to using Introspector once the proxy class issue with AppEngine is sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java (1 line): - line 38: // TODO: Think about how to integrate this with a schema object (i.e. sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java (1 line): - line 115: // TODO: Remove when we can set the coder inference context. sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java (1 line): - line 59: // TODO: It would be ideal to do this for all non-final classes. The sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ZipFiles.java (1 line): - line 81: // TODO: implement size() to try calling entry.getSize()? sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java (1 line): - line 59: // TODO: track which outputs need to be exported to parent. sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java (1 line): - line 446: *

TODO: Consider changing the API of Reader.ReaderIterator so this adapter wouldn't be sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java (1 line): - line 55: // TODO: These strings are part of the service definition, and sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java (1 line): - line 45: // TODO: method to retrieve error messages. sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java (1 line): - line 78: // TODO It is incorrect to return true here for files with content encoding set to gzip. sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java (1 line): - line 56: // TODO: Omit no-op counter updates, for counters whose sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java (1 line): - line 160: // TODO: cache results, to speed repeated type lookups? sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java (1 line): - line 264: // TODO: add support for user-defined sharding function. sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java (1 line): - line 19: // TODO: Think about making this a complete dynamic union by adding sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java (1 line): - line 534: // TODO: We should be able to support custom schemas on POJO fields, but we shouldn't sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java (1 line): - line 68: // TODO: Consider waiting to read the value until the end of a bundle, since we don't need to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java (1 line): - line 112: // TODO: Redesign to get rid of the executor and use a dedicated sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java (1 line): - line 93: // TODO: Report API errors to the caller using checked