scheduler/server/stateful_scheduler.go (8 lines): - line 316: killJobCh: make(chan jobKillRequest, 1), // TODO - what should this value be? - line 532: // TODO: make processUpdates on scheduler state wait until an update - line 639: s.stat.Gauge(stats.SchedNumAsyncRunnersGauge).Update(int64(s.asyncRunner.NumRunning())) //TODO remove when done debugging - line 861: // FIXME: seeing panic on closed channel here after killjob(). - line 879: s.stat.Counter(stats.SchedRetriedEndSagaCounter).Inc(1) // TODO errata metric - remove if unused - line 1017: // TODO - we no longer set a node as flaky on failed status. - line 1054: // TODO this may result in closed channel panic due to sending endSaga to sagalog (below) before endTask - line 1256: s.stat.Counter(stats.SchedFailedTaskSerializeCounter).Inc(1) // TODO errata metric - remove if unused scheduler/server/recover_jobs.go (5 lines): - line 24: // TODO: Add metrics for failure rate, this would be something we should alert on. - line 45: // TODO: limit max parallel requests to RecoverSagaState? - line 53: // TODO: Increment counter? A breaking change was made - line 83: // TODO: add metrics for failure rate, this would be something we should alert on - line 87: // TODO: add metrics for fatal failure rate, this would be something we should alert on, this is a bad bug runner/runners/invoke.go (4 lines): - line 96: // TODO opporunity for consolidation with existing timers and metrics as part of larger refactor - line 140: // TODO: we don't want this logic to live here, these decisions should be made at a higher level. - line 397: // TODO: remove when we transition to using only stdlog in run status - line 405: // TODO: remove when we transition to using only stdlog in run status worker/starter/server.go (3 lines): - line 57: //TODO: runner should eventually be extended to support stats, multiple runs, etc. (replacing loop here). - line 99: h.stat.Gauge(stats.WorkerEndedCachedRunsGauge).Update(int64(len(processes)) - numActive) // TODO errata metric - remove if unused - line 100: h.stat.Gauge(stats.WorkerTimeSinceLastContactGauge_ms).Update(timeSincelastContact_ms) // TODO errata metric - remove if unused common/stats/stats_names.go (2 lines): - line 409: TODO - this includes runs that are waiting to start - will not be accurate if we go to a - line 416: TODO - understand how/when this gets reset - it's based on the runs in the worker's StatusAll() scheduler/client/locate.go (2 lines): - line 17: // TODO: this will eventually store only the thrift addr and http addr - line 19: // TODO: can we get rid of this and exclusively rely on a Fetcher to find instances? runner/execer/execer.go (2 lines): - line 58: // TODO why not include directly in Execer? - line 67: // TODO when are these valid in what cases? scheduler/api/server.go (2 lines): - line 46: defer h.stat.Latency(stats.SchedServerRunJobLatency_ms).Time().Stop() // TODO errata metric - remove if unused - line 47: h.stat.Counter(stats.SchedServerRunJobCounter).Inc(1) // TODO errata metric - remove if unused scheduler/api/thrift/run_job.go (2 lines): - line 16: // TODO: change to return scoot.NewInvalidRequest() - line 29: return nil, err //TODO: use or delete scoot.NewCanNotScheduleNow() snapshot/db.go (1 line): - line 61: // TODO remove this abstraction, or consolidate it with Filer snapshot/git/gitdb/bundlestore.go (1 line): - line 267: // TODO separate the use cases that need git/repo/stream semantics from things that can be passed scheduler/server/task_scheduler.go (1 line): - line 104: // TODO move assigning tasks to nodes to cluster state to avoid copying clusterState.NodeGroups scheduler/starter/modules.go (1 line): - line 172: // MakeSagaLog - TODO remove saga or refactor it so this function can be moved into saga or sagalog scheduler/server/task_runner.go (1 line): - line 346: r.stat.Counter(stats.SchedFailedTaskSerializeCounter).Inc(1) // TODO errata metric - remove if unused snapshot/cli/cli.go (1 line): - line 258: // TODO (dgassaway): this would be better off in a proper library package scheduler/server/job_state.go (1 line): - line 266: // TODO remove before deploying to prod or if this slows staging down too much snapshot/git/gitdb/checkout.go (1 line): - line 146: // TODO - this looks suspicious.... apiserver/main.go (1 line): - line 40: // The same config will be used for both bundlestore and frontend (TODO: frontend). snapshot/store/groupcache_store.go (1 line): - line 22: // TODO: we should consider extending contexts in groupcache lib further to: snapshot/git/gitdb/db.go (1 line): - line 17: // TODO The interfaces and functionality here should be refactored to only use git when necessary. worker/domain/api.go (1 line): - line 18: // TODO: test workerStatus.