How to use the synapseclient.Schema function in synapseclient

To help you get started, we’ve selected a few synapseclient examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ga4gh / cloud-interop-testing / scripts / summarize_submissions_new.py View on Github external
print("Searching for existing 'AllSubmissions' table...")
        schema_id = [t for t in syn.getChildren(project_id, includeTypes=['table'])
                     if t['name'] == 'AllSubmissions_Annotated'][0]['id']
        schema = syn.get(schema_id)
        all_subs_table = syn.tableQuery('select * from {}'.format(schema_id))
        if all_subs_table.asDataFrame().shape[0] == submission_df.shape[0]:
            print("No new submissions since last update.")
        all_subs_table.schema = schema
        print("Updating 'AllSubmissions' table...")
        update_table = synapseclient.Table(schema, submission_df)
        all_subs_table = _update_syn_table(all_subs_table, update_table, 'objectId')
    except IndexError:
        print("Creating 'AllSubmissions' table...")
        project = syn.get(project_id)
        cols = synapseclient.as_table_columns(submission_df)
        schema = synapseclient.Schema(name='AllSubmissions_Annotated', columns=cols, parent=project)
        all_subs_table = synapseclient.Table(schema, submission_df)
    print("Storing 'AllSubmissions' table...")
    all_subs_table = syn.store(all_subs_table)
github Sage-Bionetworks / synapsePythonClient / tests / integration / test_tables.py View on Github external
def _table_setup(cls):
        # set up a table
        cols = [Column(name='foo', columnType='INTEGER'), Column(name='bar', columnType='INTEGER')]
        schema = syn.store(Schema(name='PartialRowTest' + str(uuid.uuid4()), columns=cols, parent=project))
        data = [[1, None], [None, 2]]
        syn.store(RowSet(schema=schema, rows=[Row(r) for r in data]))
        return schema
github ga4gh / cloud-interop-testing / scripts / summarize_submissions.py View on Github external
print("Searching for existing 'ValidatedSubmissions' table...")
        schema_id = [t for t in syn.getChildren(project_id, includeTypes=['table'])
                     if t['name'] == 'ValidatedSubmissions'][0]['id']
        schema = syn.get(schema_id)
        validated_subs_table = syn.tableQuery('select * from {}'.format(schema_id))
        if validated_subs_table.asDataFrame().shape[0] == valid_df.shape[0]:
            print("No new valid submissions since last update.")
        validated_subs_table.schema = schema
        print("Updating 'ValidatedSubmissions' table...")
        update_table = synapseclient.Table(schema, valid_df)
        validated_subs_table = _update_syn_table(validated_subs_table, update_table, 'objectId')
    except IndexError:
        print("Creating 'ValidatedSubmissions' table...")
        project = syn.get(project_id)
        cols = synapseclient.as_table_columns(valid_df)
        schema = synapseclient.Schema(name='ValidatedSubmissions', columns=cols, parent=project)
        validated_subs_table = synapseclient.Table(schema, valid_df)
    print("Storing 'ValidatedSubmissions' table...")
    validated_subs_table = syn.store(validated_subs_table)
github Sage-Bionetworks / synapsePythonClient / tests / integration / test_tables.py View on Github external
def test_tables_pandas():
    # create a pandas DataFrame
    df = pd.DataFrame({
        'A': ("foo", "bar", "baz", "qux", "asdf"),
        'B': tuple(0.42*i for i in range(5)),
        'C': (101, 202, 303, 404, 505),
        'D': (False, True, False, True, False),
        # additional data types supported since SYNPY-347
        'int64': tuple(np.int64(range(5))),
        'datetime64': tuple(np.datetime64(d) for d in ['2005-02-01', '2005-02-02', '2005-02-03', '2005-02-04',
                                                       '2005-02-05']),
        'string_': tuple(np.string_(s) for s in ['urgot', 'has', 'dark', 'mysterious', 'past'])})

    cols = as_table_columns(df)
    cols[0].maximumSize = 20
    schema = Schema(name="Nifty Table", columns=cols, parent=project)

    # store in Synapse
    table = syn.store(Table(schema, df))

    # retrieve the table and verify
    results = syn.tableQuery('select * from %s' % table.schema.id, resultsAs='csv')
    df2 = results.asDataFrame(convert_to_datetime=True)

    # simulate rowId-version rownames for comparison
    df.index = ['%s_1' % i for i in range(1, 6)]

    # for python3 we need to convert from numpy.bytes_ to str or the equivalence comparision fails
    if six.PY3:
        df['string_'] = df['string_'].transform(str)

    # SYNPY-717
github Sage-Bionetworks / synapsePythonClient / tests / integration / test_tables.py View on Github external
def dontruntest_big_csvs():
    cols = [Column(name='name', columnType='STRING', maximumSize=1000),
            Column(name='foo', columnType='STRING', enumValues=['foo', 'bar', 'bat']),
            Column(name='x', columnType='DOUBLE'),
            Column(name='n', columnType='INTEGER'),
            Column(name='is_bogus', columnType='BOOLEAN')]

    schema1 = syn.store(Schema(name='Big Table', columns=cols, parent=project))

    # write rows to CSV file
    with tempfile.NamedTemporaryFile(delete=False) as temp:
        schedule_for_cleanup(temp.name)
        filename = temp.name

    with io.open(filename, mode='w', encoding="utf-8", newline='') as temp:
        writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep))
        writer.writerow([col.name for col in cols])

        for i in range(10):
            for j in range(100):
                foo = cols[1].enumValues[random.randint(0, 2)]
                writer.writerow(('Robot ' + str(i*100 + j), foo, random.random()*200.0, random.randint(0, 100),
                                 random.random() >= 0.5))
    # upload CSV
github Sage-Bionetworks / synapseAnnotations / scripts / json2schema.py View on Github external
help="JSON is already in Synapse Table Schema format")
    args = parser.parse_args()

    syn = synapseclient.login(silent=True)

    project = syn.get(args.projectId)

    f = urllib.urlopen(path2url(args.path))
    data = json.load(f)

    url_path = urllib.splittype(args.path)[1]
    filename = os.path.split(url_path)[1]
    schema_name = os.path.splitext(filename)[0]

    if args.synapseJSONSchema:
        schema = synapseclient.Schema(name=schema_name, parent=project)
        schema.columns_to_store = data
    else:
        cols = []

        for k, v in data.iteritems():

            # Handle null values, assume that they will be strings
            if not v:
                column_type = "STRING"
            elif bool in map(type, v):
                column_type = "BOOLEAN"
            elif int in map(type, v):
                column_type = "INTEGER"
            elif float in map(type, v):
                column_type = "DOUBLE"
            else:
github Sage-Bionetworks / synapseAnnotations / scripts / json2schema.py View on Github external
# Handle null values, assume that they will be strings
            if not v:
                column_type = "STRING"
            elif bool in map(type, v):
                column_type = "BOOLEAN"
            elif int in map(type, v):
                column_type = "INTEGER"
            elif float in map(type, v):
                column_type = "DOUBLE"
            else:
                column_type = "STRING"

            cols.append(synapseclient.Column(name=k, columnType=column_type,
                                             enumValues=v, maximumSize=250))

        schema = synapseclient.Schema(name=schema_name, columns=cols, parent=project)

    if args.dry_run:

        schema_as_list = map(dict, schema.columns_to_store)
        new_schema_as_list = []

        _key_order = ['name', 'description', 'columnType', 'maximumSize', 'enumValues']

        for col in schema_as_list:
            col['description'] = ""
            col['source'] = ""

            new_enum_values = []

            for v in col['enumValues']:
github Sage-Bionetworks / synapsePythonClient / synapseutils / copy.py View on Github external
# with the Synapse rest API. Entity views currently also aren't
        # supported
        entities = syn.getChildren(entity, includeTypes=['folder', 'file',
                                                         'table', 'link'])
        for i in entities:
            mapping = _copyRecursive(syn, i['id'], destinationId, mapping=mapping,
                                     skipCopyAnnotations=skipCopyAnnotations, **kwargs)
    elif isinstance(ent, Folder):
        copiedId = _copyFolder(syn, ent.id, destinationId, mapping=mapping, skipCopyAnnotations=skipCopyAnnotations,
                               **kwargs)
    elif isinstance(ent, File) and "file" not in excludeTypes:
        copiedId = _copyFile(syn, ent.id, destinationId, version=version, updateExisting=updateExisting,
                             setProvenance=setProvenance, skipCopyAnnotations=skipCopyAnnotations)
    elif isinstance(ent, Link) and "link" not in excludeTypes:
        copiedId = _copyLink(syn, ent.id, destinationId, updateExisting=updateExisting)
    elif isinstance(ent, Schema) and "table" not in excludeTypes:
        copiedId = _copyTable(syn, ent.id, destinationId, updateExisting=updateExisting)
    # This is currently done because copyLink returns None sometimes
    if copiedId is not None:
        mapping[ent.id] = copiedId
        print("Copied %s to %s" % (ent.id, copiedId))
    else:
        print("%s not copied" % ent.id)
    return mapping