Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _create_pipeline():
"""Implements the chicago taxi pipeline with TFX."""
examples = csv_input(_data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input=examples)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
# Generates schema based on statistics files.
infer_schema = SchemaGen(statistics=statistics_gen.outputs['statistics'])
return pipeline.Pipeline(
pipeline_name='chicago_taxi_simple',
pipeline_root=_pipeline_root,
components=[
example_gen, statistics_gen, infer_schema
],
enable_cache=True,
metadata_db_root=_metadata_db_root,
)
def _create_pipeline():
"""Implements the chicago taxi pipeline with TFX."""
examples = csv_input(_data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input_base=examples)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(input_data=example_gen.outputs['examples'])
# Generates schema based on statistics files.
infer_schema = SchemaGen(stats=statistics_gen.outputs['output'])
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
stats=statistics_gen.outputs['output'],
schema=infer_schema.outputs['output'])
return pipeline.Pipeline(
pipeline_name='chicago_taxi_simple',
pipeline_root=_pipeline_root,
components=[
example_gen, statistics_gen, infer_schema, validate_stats
],
enable_cache=True,
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
metadata_path: Text) -> pipeline.Pipeline:
"""Implements the chicago taxi pipeline with TFX."""
examples = external_input(data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input_base=examples)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(input_data=example_gen.outputs['examples'])
return pipeline.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
components=[example_gen, statistics_gen],
enable_cache=True,
metadata_connection_config=metadata.sqlite_metadata_connection_config(
metadata_path))
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
metadata_path: Text) -> pipeline.Pipeline:
"""Implements the chicago taxi pipeline with TFX."""
examples = external_input(data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input=examples)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
# Generates schema based on statistics files.
infer_schema = SchemaGen(statistics=statistics_gen.outputs['statistics'])
return pipeline.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
components=[example_gen, statistics_gen, infer_schema],
enable_cache=True,
metadata_connection_config=metadata.sqlite_metadata_connection_config(
metadata_path),
additional_pipeline_args={},
)
pickup_census_tract,
dropoff_census_tract,
payment_type,
company,
trip_seconds,
dropoff_community_area,
tips
FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`
ORDER BY trip_start_timestamp
LIMIT 100000000""" # 100 Million.
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = BigQueryExampleGen(query=query)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(input_data=example_gen.outputs.examples)
# Generates schema based on statistics files.
infer_schema = SchemaGen(stats=statistics_gen.outputs.output)
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
stats=statistics_gen.outputs.output,
schema=infer_schema.outputs.output)
# Performs transformations and feature engineering in training and serving.
transform = Transform(
input_data=example_gen.outputs.examples,
schema=infer_schema.outputs.output,
module_file=_taxi_utils)
# Uses user-provided Python function that implements a model using TF-Learn.
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
module_file: Text, serving_model_dir: Text,
direct_num_workers: int) -> pipeline.Pipeline:
"""Implements the chicago taxi pipeline with TFX and Kubeflow Pipelines."""
examples = external_input(data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input=examples)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
# Generates schema based on statistics files.
infer_schema = SchemaGen(
statistics=statistics_gen.outputs['statistics'],
infer_feature_shape=False)
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
statistics=statistics_gen.outputs['statistics'],
schema=infer_schema.outputs['schema'])
# Performs transformations and feature engineering in training and serving.
transform = Transform(
examples=example_gen.outputs['examples'],
schema=infer_schema.outputs['schema'],
module_file=module_file)
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
module_file: Text, serving_model_dir: Text,
direct_num_workers: int) -> pipeline.Pipeline:
"""Implements the chicago taxi pipeline with TFX and Kubeflow Pipelines."""
examples = external_input(data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input=examples)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
# Generates schema based on statistics files.
infer_schema = SchemaGen(
statistics=statistics_gen.outputs['statistics'],
infer_feature_shape=False)
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
statistics=statistics_gen.outputs['statistics'],
schema=infer_schema.outputs['schema'])
# Performs transformations and feature engineering in training and serving.
transform = Transform(
examples=example_gen.outputs['examples'],
schema=infer_schema.outputs['schema'],
module_file=module_file)
def _create_pipeline():
"""Implements the chicago taxi pipeline with TFX."""
examples = csv_input(_data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input_base=examples)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(input_data=example_gen.outputs.examples)
# Generates schema based on statistics files.
infer_schema = SchemaGen(stats=statistics_gen.outputs.output)
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
stats=statistics_gen.outputs.output, schema=infer_schema.outputs.output)
# Performs transformations and feature engineering in training and serving.
transform = Transform(
input_data=example_gen.outputs.examples,
schema=infer_schema.outputs.output,
module_file=_taxi_module_file)
# Uses user-provided Python function that implements a model using TF-Learn.
trainer = Trainer(
def _create_pipeline():
"""Implements the chicago taxi pipeline with TFX and Kubeflow Pipelines."""
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = BigQueryExampleGen(query=_query)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(input_data=example_gen.outputs['examples'])
# Generates schema based on statistics files.
infer_schema = SchemaGen(stats=statistics_gen.outputs['output'])
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
stats=statistics_gen.outputs['output'],
schema=infer_schema.outputs['output'])
# Performs transformations and feature engineering in training and serving.
transform = Transform(
input_data=example_gen.outputs['examples'],
schema=infer_schema.outputs['output'],
module_file=_taxi_utils)
# Uses user-provided Python function that implements a model using TF-Learn