Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
statistics=statistics_gen.outputs['statistics'],
schema=infer_schema.outputs['schema'])
# Performs transformations and feature engineering in training and serving.
transform = Transform(
examples=example_gen.outputs['examples'],
schema=infer_schema.outputs['schema'],
module_file=module_file)
# Uses user-provided Python function that implements a model using TF-Learn
# to train a model on Google Cloud AI Platform.
trainer = Trainer(
custom_executor_spec=executor_spec.ExecutorClassSpec(
ai_platform_trainer_executor.Executor),
module_file=module_file,
transformed_examples=transform.outputs['transformed_examples'],
schema=infer_schema.outputs['schema'],
transform_graph=transform.outputs['transform_graph'],
train_args=trainer_pb2.TrainArgs(num_steps=10000),
eval_args=trainer_pb2.EvalArgs(num_steps=5000),
custom_config={'ai_platform_training_args': ai_platform_training_args})
# Uses TFMA to compute a evaluation statistics over features of a model.
model_analyzer = Evaluator(
examples=example_gen.outputs['examples'],
model_exports=trainer.outputs['model'],
feature_slicing_spec=evaluator_pb2.FeatureSlicingSpec(specs=[
evaluator_pb2.SingleSlicingSpec(
column_for_slicing=['trip_start_hour'])
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
statistics=statistics_gen.outputs['statistics'],
schema=infer_schema.outputs['schema'])
# Performs transformations and feature engineering in training and serving.
transform = Transform(
examples=example_gen.outputs['examples'],
schema=infer_schema.outputs['schema'],
module_file=module_file)
# Uses user-provided Python function that implements a model using TF-Learn
# to train a model on Google Cloud AI Platform.
trainer = Trainer(
custom_executor_spec=executor_spec.ExecutorClassSpec(
ai_platform_trainer_executor.Executor),
module_file=module_file,
transformed_examples=transform.outputs['transformed_examples'],
schema=infer_schema.outputs['schema'],
transform_graph=transform.outputs['transform_graph'],
train_args=trainer_pb2.TrainArgs(num_steps=10000),
eval_args=trainer_pb2.EvalArgs(num_steps=5000),
custom_config={'ai_platform_training_args': ai_platform_training_args})
# Uses TFMA to compute a evaluation statistics over features of a model.
model_analyzer = Evaluator(
examples=example_gen.outputs['examples'],
model_exports=trainer.outputs['model'],
feature_slicing_spec=evaluator_pb2.FeatureSlicingSpec(specs=[
evaluator_pb2.SingleSlicingSpec(
column_for_slicing=['trip_start_hour'])
from tfx import types
from tfx.components.base import executor_spec
from tfx.components.example_gen import component
from tfx.components.example_gen import utils
from tfx.components.example_gen.big_query_example_gen import executor
from tfx.proto import example_gen_pb2
class BigQueryExampleGen(component._QueryBasedExampleGen): # pylint: disable=protected-access
"""Official TFX BigQueryExampleGen component.
The BigQuery examplegen component takes a query, and generates train
and eval examples for downsteam components.
"""
EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(executor.Executor)
def __init__(self,
query: Optional[Text] = None,
input_config: Optional[example_gen_pb2.Input] = None,
output_config: Optional[example_gen_pb2.Output] = None,
example_artifacts: Optional[types.Channel] = None,
instance_name: Optional[Text] = None):
"""Constructs a BigQueryExampleGen component.
Args:
query: BigQuery sql string, query result will be treated as a single
split, can be overwritten by input_config.
input_config: An example_gen_pb2.Input instance with Split.pattern as
BigQuery sql string. If set, it overwrites the 'query' arg, and allows
different queries per split. If any field is provided as a
RuntimeParameter, input_config should be constructed as a dict with the
model_analyzer = Evaluator(
examples=example_gen.outputs['examples'],
model_exports=trainer.outputs['model'],
feature_slicing_spec=evaluator_pb2.FeatureSlicingSpec(specs=[
evaluator_pb2.SingleSlicingSpec(
column_for_slicing=['trip_start_hour'])
]))
# Performs quality validation of a candidate model (compared to a baseline).
model_validator = ModelValidator(
examples=example_gen.outputs['examples'], model=trainer.outputs['model'])
# Checks whether the model passed the validation steps and pushes the model
# to Google Cloud AI Platform if check passed.
pusher = Pusher(
custom_executor_spec=executor_spec.ExecutorClassSpec(
ai_platform_pusher_executor.Executor),
model=trainer.outputs['model'],
model_blessing=model_validator.outputs['blessing'],
custom_config={'ai_platform_serving_args': ai_platform_serving_args})
return pipeline.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
components=[
example_gen, statistics_gen, infer_schema, validate_stats, transform,
trainer, model_analyzer, model_validator, pusher
],
additional_pipeline_args={
'beam_pipeline_args': beam_pipeline_args,
},
model_analyzer = Evaluator(
examples=example_gen.outputs['examples'],
model_exports=trainer.outputs['model'],
feature_slicing_spec=evaluator_pb2.FeatureSlicingSpec(specs=[
evaluator_pb2.SingleSlicingSpec(
column_for_slicing=['trip_start_hour'])
]))
# Performs quality validation of a candidate model (compared to a baseline).
model_validator = ModelValidator(
examples=example_gen.outputs['examples'], model=trainer.outputs['model'])
# Checks whether the model passed the validation steps and pushes the model
# to Google Cloud AI Platform if check passed.
pusher = Pusher(
custom_executor_spec=executor_spec.ExecutorClassSpec(
ai_platform_pusher_executor.Executor),
model=trainer.outputs['model'],
model_blessing=model_validator.outputs['blessing'],
custom_config={'ai_platform_serving_args': ai_platform_serving_args})
return pipeline.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
components=[
example_gen, statistics_gen, infer_schema, validate_stats, transform,
trainer, model_analyzer, model_validator, pusher
],
beam_pipeline_args=beam_pipeline_args,
)
from proto import presto_config_pb2
from tfx import types
from tfx.components.base import executor_spec
from tfx.components.example_gen import component
from tfx.components.example_gen import utils
from tfx.proto import example_gen_pb2
class PrestoExampleGen(component._QueryBasedExampleGen): # pylint: disable=protected-access
"""Official TFX PrestoExampleGen component.
The Presto examplegen component takes a query, connection client
configuration, and generates train and eval examples for downsteam components.
"""
EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(executor.Executor)
def __init__(self,
conn_config: presto_config_pb2.PrestoConnConfig,
query: Optional[Text] = None,
input_config: Optional[example_gen_pb2.Input] = None,
output_config: Optional[example_gen_pb2.Output] = None,
example_artifacts: Optional[types.Channel] = None,
instance_name: Optional[Text] = None):
"""Constructs a PrestoExampleGen component.
Args:
conn_config: Parameters for Presto connection client.
query: Presto sql string, query result will be treated as a single split,
can be overwritten by input_config.
input_config: An example_gen_pb2.Input instance with Split.pattern as
Presto sql string. If set, it overwrites the 'query' arg, and allows
'study_best_hparams_path':
ChannelParameter(type=standard_artifacts.HyperParameters),
}
# TODO(b/139281215): these input / output names will be renamed in the future.
# These compatibility aliases are provided for forwards compatibility.
_OUTPUT_COMPATIBILITY_ALIASES = {
'model': 'model_export_path',
'best_hparams': 'study_best_hparams_path',
}
class Tuner(base_component.BaseComponent):
"""A TFX component for model hyperparameter tuning."""
SPEC_CLASS = TunerSpec
EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(executor.Executor)
def __init__(self,
examples: types.Channel = None,
schema: types.Channel = None,
module_file: Optional[Text] = None,
tuner_fn: Optional[Text] = None,
model: Optional[types.Channel] = None,
best_hparams: Optional[types.Channel] = None,
instance_name: Optional[Text] = None):
"""Construct a Tuner component.
Args:
examples: A Channel of 'ExamplesPath' type, serving as the source of
examples that are used in tuning (required). Transformed examples are
not supported yet.
schema: A Channel of 'SchemaPath' type, serving as the schema of training
In a typical TFX pipeline, the SchemaGen component generates a schema which is
is consumed by the other pipeline components.
Please see https://www.tensorflow.org/tfx/data_validation for more details.
## Example
# Generates schema based on statistics files.
infer_schema = SchemaGen(statistics=statistics_gen.outputs['statistics'])
"""
# TODO(b/123941608): Update pydoc about how to use a user provided schema
SPEC_CLASS = SchemaGenSpec
EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(executor.Executor)
def __init__(
self,
statistics: Optional[types.Channel] = None,
infer_feature_shape: Optional[Union[bool,
data_types.RuntimeParameter]] = False,
output: Optional[types.Channel] = None,
stats: Optional[types.Channel] = None,
instance_name: Optional[Text] = None):
"""Constructs a SchemaGen component.
Args:
statistics: A Channel of `ExampleStatistics` type (required if spec is not
passed). This should contain at least a `train` split. Other splits are
currently ignored. _required_
infer_feature_shape: Boolean (or RuntimeParameter) value indicating