in spotify_tensorflow/tfx/tfdv.py [0:0]
def __init__(self,
schema_path, # type: Optional[str]
data_location, # type: str
binary_schema=False, # type: bool
stats_options=StatsOptions() # type: StatsOptions
):
"""
:param schema_path: tf.metadata Schema path. Must be in text or binary format. If this is
None, a new schema will be inferred automatically from the statistics.
:param data_location: input data dir containing tfrecord files
:param binary_schema: specifies if the schema is in a binary format
:param stats_options: tfdv.StatsOptions for statistics generation settings
"""
self.data_location = data_location
self.schema = None
if schema_path:
if binary_schema:
self.schema = parse_schema_file(schema_path)
else:
self.schema = parse_schema_txt_file(schema_path)
self.schema_snapshot_path = pjoin(self.data_location, "schema_snapshot.pb")
self.stats_path = pjoin(self.data_location, "stats.pb")
self.anomalies_path = pjoin(self.data_location, "anomalies.pb")
self.stats_options = stats_options