How to use the lale.datasets.data_schemas.add_schema function in lale

To help you get started, we’ve selected a few lale examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github IBM / lale / lale / datasets / sklearn_to_pandas.py View on Github external
def _bunch_to_df(bunch, schema_X, schema_y, test_size=0.2, random_state=42):
    train_X_arr, test_X_arr, train_y_arr, test_y_arr = train_test_split(
        bunch.data, bunch.target,
        test_size=test_size, random_state=random_state)
    feature_schemas = schema_X['items']['items']
    if isinstance(feature_schemas, list):
        feature_names = [f['description'] for f in feature_schemas]
    else:
        feature_names = [f'x{i}' for i in range(schema_X['items']['maxItems'])]
    train_X_df = pd.DataFrame(train_X_arr, columns=feature_names)
    test_X_df = pd.DataFrame(test_X_arr, columns=feature_names)
    train_y_df = pd.Series(train_y_arr, name='target')
    test_y_df = pd.Series(test_y_arr, name='target')
    train_nrows, test_nrows = train_X_df.shape[0], test_X_df.shape[0]
    train_X = lale.datasets.data_schemas.add_schema(train_X_df, {
        **schema_X, 'minItems': train_nrows, 'maxItems': train_nrows })
    test_X = lale.datasets.data_schemas.add_schema(test_X_df, {
        **schema_X, 'minItems': test_nrows, 'maxItems': test_nrows })
    train_y = lale.datasets.data_schemas.add_schema(train_y_df, {
        **schema_y, 'minItems': train_nrows, 'maxItems': train_nrows })
    test_y = lale.datasets.data_schemas.add_schema(test_y_df, {
        **schema_y, 'minItems': test_nrows, 'maxItems': test_nrows })
    return (train_X, train_y), (test_X, test_y)
github IBM / lale / lale / datasets / sklearn_to_pandas.py View on Github external
train_X_arr, test_X_arr, train_y_arr, test_y_arr = train_test_split(
        bunch.data, bunch.target,
        test_size=test_size, random_state=random_state)
    feature_schemas = schema_X['items']['items']
    if isinstance(feature_schemas, list):
        feature_names = [f['description'] for f in feature_schemas]
    else:
        feature_names = [f'x{i}' for i in range(schema_X['items']['maxItems'])]
    train_X_df = pd.DataFrame(train_X_arr, columns=feature_names)
    test_X_df = pd.DataFrame(test_X_arr, columns=feature_names)
    train_y_df = pd.Series(train_y_arr, name='target')
    test_y_df = pd.Series(test_y_arr, name='target')
    train_nrows, test_nrows = train_X_df.shape[0], test_X_df.shape[0]
    train_X = lale.datasets.data_schemas.add_schema(train_X_df, {
        **schema_X, 'minItems': train_nrows, 'maxItems': train_nrows })
    test_X = lale.datasets.data_schemas.add_schema(test_X_df, {
        **schema_X, 'minItems': test_nrows, 'maxItems': test_nrows })
    train_y = lale.datasets.data_schemas.add_schema(train_y_df, {
        **schema_y, 'minItems': train_nrows, 'maxItems': train_nrows })
    test_y = lale.datasets.data_schemas.add_schema(test_y_df, {
        **schema_y, 'minItems': test_nrows, 'maxItems': test_nrows })
    return (train_X, train_y), (test_X, test_y)
github IBM / lale / lale / datasets / openml / openml_datasets.py View on Github external
cols_X = [col for col in col_names if col != target_col]
        X = df_all[cols_X]

    labelencoder = LabelEncoder()
    y = labelencoder.fit_transform(y)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size = test_size, random_state = 0)
    if verbose:
        print(f'training set shapes: X {X_train.shape}, y {y_train.shape}')
        print(f'test set shapes:     X {X_test.shape}, y {y_test.shape}')
    if preprocess:
        from lale.datasets.data_schemas import add_schema
        X_train = add_schema(X_train.astype(np.number), recalc=True)
        y_train = add_schema(y_train.astype(np.int), recalc=True)
        X_test = add_schema(X_test.astype(np.number), recalc=True)
        y_test = add_schema(y_test.astype(np.int), recalc=True)
    else:
        X_train, X_test, y_train, y_test = add_schemas( \
            schema_orig, target_col, X_train, X_test, y_train, y_test)
    return (X_train, y_train), (X_test, y_test)
github IBM / lale / lale / datasets / sklearn_to_pandas.py View on Github external
if isinstance(feature_schemas, list):
        feature_names = [f['description'] for f in feature_schemas]
    else:
        feature_names = [f'x{i}' for i in range(schema_X['items']['maxItems'])]
    train_X_df = pd.DataFrame(train_X_arr, columns=feature_names)
    test_X_df = pd.DataFrame(test_X_arr, columns=feature_names)
    train_y_df = pd.Series(train_y_arr, name='target')
    test_y_df = pd.Series(test_y_arr, name='target')
    train_nrows, test_nrows = train_X_df.shape[0], test_X_df.shape[0]
    train_X = lale.datasets.data_schemas.add_schema(train_X_df, {
        **schema_X, 'minItems': train_nrows, 'maxItems': train_nrows })
    test_X = lale.datasets.data_schemas.add_schema(test_X_df, {
        **schema_X, 'minItems': test_nrows, 'maxItems': test_nrows })
    train_y = lale.datasets.data_schemas.add_schema(train_y_df, {
        **schema_y, 'minItems': train_nrows, 'maxItems': train_nrows })
    test_y = lale.datasets.data_schemas.add_schema(test_y_df, {
        **schema_y, 'minItems': test_nrows, 'maxItems': test_nrows })
    return (train_X, train_y), (test_X, test_y)
github IBM / lale / lale / lib / aif360 / util.py View on Github external
-------
    result : tuple

      - item 0: pandas Dataframe or None, features X

      - item 1: pandas Series or None, labels y
    """
    if 'X' in return_only:
        X = pd.DataFrame(dataset.features, columns=dataset.feature_names)
        result_X = lale.datasets.data_schemas.add_schema(X)
        assert isinstance(result_X, pd.DataFrame), type(result_X)
    else:
        result_X = None
    if 'y' in return_only:
        y = pd.Series(dataset.labels.ravel(), name=dataset.label_names[0])
        result_y = lale.datasets.data_schemas.add_schema(y)
        assert isinstance(result_y, pd.Series), type(result_y)
    else:
        result_y = None
    return result_X, result_y
github IBM / lale / lale / datasets / openml / openml_datasets.py View on Github external
if item_schema['description'].lower() != target_col]
    elem_y = [item_schema for item_schema in schema_orig['items']['items']
              if item_schema['description'].lower() == target_col][0]
    if 'enum' in elem_y:
        elem_y['enum'] = [*range(len(elem_y['enum']))]
    ncols_X = len(elems_X)
    rows_X = {
        **schema_orig['items'],
        'minItems': ncols_X, 'maxItems': ncols_X, 'items': elems_X}
    if 'json_schema' not in pd.DataFrame._internal_names:
        pd.DataFrame._internal_names.append('json_schema')
    nrows_train, nrows_test = len(train_y), len(test_y)
    train_X = add_schema(train_X, {
        **schema_orig,
        'minItems': nrows_train, 'maxItems': nrows_train, 'items': rows_X})
    test_X = add_schema(test_X, {
        **schema_orig,
        'minItems': nrows_test, 'maxItems': nrows_test, 'items': rows_X})
    train_y = add_schema(train_y, {
        **schema_orig,
         'minItems': nrows_train, 'maxItems': nrows_train, 'items': elem_y})
    test_y = add_schema(test_y, {
        **schema_orig,
        'minItems': nrows_test, 'maxItems': nrows_test, 'items': elem_y})
    return train_X, test_X, train_y, test_y
github IBM / lale / lale / datasets / sklearn_to_pandas.py View on Github external
test_size=test_size, random_state=random_state)
    feature_schemas = schema_X['items']['items']
    if isinstance(feature_schemas, list):
        feature_names = [f['description'] for f in feature_schemas]
    else:
        feature_names = [f'x{i}' for i in range(schema_X['items']['maxItems'])]
    train_X_df = pd.DataFrame(train_X_arr, columns=feature_names)
    test_X_df = pd.DataFrame(test_X_arr, columns=feature_names)
    train_y_df = pd.Series(train_y_arr, name='target')
    test_y_df = pd.Series(test_y_arr, name='target')
    train_nrows, test_nrows = train_X_df.shape[0], test_X_df.shape[0]
    train_X = lale.datasets.data_schemas.add_schema(train_X_df, {
        **schema_X, 'minItems': train_nrows, 'maxItems': train_nrows })
    test_X = lale.datasets.data_schemas.add_schema(test_X_df, {
        **schema_X, 'minItems': test_nrows, 'maxItems': test_nrows })
    train_y = lale.datasets.data_schemas.add_schema(train_y_df, {
        **schema_y, 'minItems': train_nrows, 'maxItems': train_nrows })
    test_y = lale.datasets.data_schemas.add_schema(test_y_df, {
        **schema_y, 'minItems': test_nrows, 'maxItems': test_nrows })
    return (train_X, train_y), (test_X, test_y)