Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
print("Searching for existing 'AllSubmissions' table...")
schema_id = [t for t in syn.getChildren(project_id, includeTypes=['table'])
if t['name'] == 'AllSubmissions_Annotated'][0]['id']
schema = syn.get(schema_id)
all_subs_table = syn.tableQuery('select * from {}'.format(schema_id))
if all_subs_table.asDataFrame().shape[0] == submission_df.shape[0]:
print("No new submissions since last update.")
all_subs_table.schema = schema
print("Updating 'AllSubmissions' table...")
update_table = synapseclient.Table(schema, submission_df)
all_subs_table = _update_syn_table(all_subs_table, update_table, 'objectId')
except IndexError:
print("Creating 'AllSubmissions' table...")
project = syn.get(project_id)
cols = synapseclient.as_table_columns(submission_df)
schema = synapseclient.Schema(name='AllSubmissions_Annotated', columns=cols, parent=project)
all_subs_table = synapseclient.Table(schema, submission_df)
print("Storing 'AllSubmissions' table...")
all_subs_table = syn.store(all_subs_table)
def _table_setup(cls):
# set up a table
cols = [Column(name='foo', columnType='INTEGER'), Column(name='bar', columnType='INTEGER')]
schema = syn.store(Schema(name='PartialRowTest' + str(uuid.uuid4()), columns=cols, parent=project))
data = [[1, None], [None, 2]]
syn.store(RowSet(schema=schema, rows=[Row(r) for r in data]))
return schema
print("Searching for existing 'ValidatedSubmissions' table...")
schema_id = [t for t in syn.getChildren(project_id, includeTypes=['table'])
if t['name'] == 'ValidatedSubmissions'][0]['id']
schema = syn.get(schema_id)
validated_subs_table = syn.tableQuery('select * from {}'.format(schema_id))
if validated_subs_table.asDataFrame().shape[0] == valid_df.shape[0]:
print("No new valid submissions since last update.")
validated_subs_table.schema = schema
print("Updating 'ValidatedSubmissions' table...")
update_table = synapseclient.Table(schema, valid_df)
validated_subs_table = _update_syn_table(validated_subs_table, update_table, 'objectId')
except IndexError:
print("Creating 'ValidatedSubmissions' table...")
project = syn.get(project_id)
cols = synapseclient.as_table_columns(valid_df)
schema = synapseclient.Schema(name='ValidatedSubmissions', columns=cols, parent=project)
validated_subs_table = synapseclient.Table(schema, valid_df)
print("Storing 'ValidatedSubmissions' table...")
validated_subs_table = syn.store(validated_subs_table)
def test_tables_pandas():
# create a pandas DataFrame
df = pd.DataFrame({
'A': ("foo", "bar", "baz", "qux", "asdf"),
'B': tuple(0.42*i for i in range(5)),
'C': (101, 202, 303, 404, 505),
'D': (False, True, False, True, False),
# additional data types supported since SYNPY-347
'int64': tuple(np.int64(range(5))),
'datetime64': tuple(np.datetime64(d) for d in ['2005-02-01', '2005-02-02', '2005-02-03', '2005-02-04',
'2005-02-05']),
'string_': tuple(np.string_(s) for s in ['urgot', 'has', 'dark', 'mysterious', 'past'])})
cols = as_table_columns(df)
cols[0].maximumSize = 20
schema = Schema(name="Nifty Table", columns=cols, parent=project)
# store in Synapse
table = syn.store(Table(schema, df))
# retrieve the table and verify
results = syn.tableQuery('select * from %s' % table.schema.id, resultsAs='csv')
df2 = results.asDataFrame(convert_to_datetime=True)
# simulate rowId-version rownames for comparison
df.index = ['%s_1' % i for i in range(1, 6)]
# for python3 we need to convert from numpy.bytes_ to str or the equivalence comparision fails
if six.PY3:
df['string_'] = df['string_'].transform(str)
# SYNPY-717
def dontruntest_big_csvs():
cols = [Column(name='name', columnType='STRING', maximumSize=1000),
Column(name='foo', columnType='STRING', enumValues=['foo', 'bar', 'bat']),
Column(name='x', columnType='DOUBLE'),
Column(name='n', columnType='INTEGER'),
Column(name='is_bogus', columnType='BOOLEAN')]
schema1 = syn.store(Schema(name='Big Table', columns=cols, parent=project))
# write rows to CSV file
with tempfile.NamedTemporaryFile(delete=False) as temp:
schedule_for_cleanup(temp.name)
filename = temp.name
with io.open(filename, mode='w', encoding="utf-8", newline='') as temp:
writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep))
writer.writerow([col.name for col in cols])
for i in range(10):
for j in range(100):
foo = cols[1].enumValues[random.randint(0, 2)]
writer.writerow(('Robot ' + str(i*100 + j), foo, random.random()*200.0, random.randint(0, 100),
random.random() >= 0.5))
# upload CSV
help="JSON is already in Synapse Table Schema format")
args = parser.parse_args()
syn = synapseclient.login(silent=True)
project = syn.get(args.projectId)
f = urllib.urlopen(path2url(args.path))
data = json.load(f)
url_path = urllib.splittype(args.path)[1]
filename = os.path.split(url_path)[1]
schema_name = os.path.splitext(filename)[0]
if args.synapseJSONSchema:
schema = synapseclient.Schema(name=schema_name, parent=project)
schema.columns_to_store = data
else:
cols = []
for k, v in data.iteritems():
# Handle null values, assume that they will be strings
if not v:
column_type = "STRING"
elif bool in map(type, v):
column_type = "BOOLEAN"
elif int in map(type, v):
column_type = "INTEGER"
elif float in map(type, v):
column_type = "DOUBLE"
else:
# Handle null values, assume that they will be strings
if not v:
column_type = "STRING"
elif bool in map(type, v):
column_type = "BOOLEAN"
elif int in map(type, v):
column_type = "INTEGER"
elif float in map(type, v):
column_type = "DOUBLE"
else:
column_type = "STRING"
cols.append(synapseclient.Column(name=k, columnType=column_type,
enumValues=v, maximumSize=250))
schema = synapseclient.Schema(name=schema_name, columns=cols, parent=project)
if args.dry_run:
schema_as_list = map(dict, schema.columns_to_store)
new_schema_as_list = []
_key_order = ['name', 'description', 'columnType', 'maximumSize', 'enumValues']
for col in schema_as_list:
col['description'] = ""
col['source'] = ""
new_enum_values = []
for v in col['enumValues']:
# with the Synapse rest API. Entity views currently also aren't
# supported
entities = syn.getChildren(entity, includeTypes=['folder', 'file',
'table', 'link'])
for i in entities:
mapping = _copyRecursive(syn, i['id'], destinationId, mapping=mapping,
skipCopyAnnotations=skipCopyAnnotations, **kwargs)
elif isinstance(ent, Folder):
copiedId = _copyFolder(syn, ent.id, destinationId, mapping=mapping, skipCopyAnnotations=skipCopyAnnotations,
**kwargs)
elif isinstance(ent, File) and "file" not in excludeTypes:
copiedId = _copyFile(syn, ent.id, destinationId, version=version, updateExisting=updateExisting,
setProvenance=setProvenance, skipCopyAnnotations=skipCopyAnnotations)
elif isinstance(ent, Link) and "link" not in excludeTypes:
copiedId = _copyLink(syn, ent.id, destinationId, updateExisting=updateExisting)
elif isinstance(ent, Schema) and "table" not in excludeTypes:
copiedId = _copyTable(syn, ent.id, destinationId, updateExisting=updateExisting)
# This is currently done because copyLink returns None sometimes
if copiedId is not None:
mapping[ent.id] = copiedId
print("Copied %s to %s" % (ent.id, copiedId))
else:
print("%s not copied" % ent.id)
return mapping