Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#print dataset.full_length()
#print len(dataset)
if export == dataset.export_hdf5:
path = path_hdf5
export(path, column_names=column_names, byteorder=byteorder, shuffle=shuffle, selection=selection, progress=False)
else:
path = path_fits
export(path, column_names=column_names, shuffle=shuffle, selection=selection, progress=False)
with astropy.io.fits.open(path) as fitsfile:
# make sure astropy can read the data
bla = fitsfile[1].data
try:
fitsfile.writeto(path_fits_astropy)
finally:
os.remove(path_fits_astropy)
compare = vx.open(path)
column_names = column_names or ["x", "y", "f", "z"]
# TODO: does the order matter?
self.assertEqual(sorted(compare.get_column_names()), sorted(column_names + (["random_index"] if shuffle else [])))
for column_name in column_names:
values = dataset.evaluate(column_name)
if selection:
self.assertEqual(sorted(compare.columns[column_name]), sorted(values[dataset.mask]))
else:
if shuffle:
indices = compare.columns["random_index"]
self.assertEqual(sorted(compare.columns[column_name]), sorted(values[indices]))
else:
self.assertEqual(sorted(compare.columns[column_name]), sorted(values[:length]))
compare.close_files()
# self.dataset_concat_dup references self.dataset, so set it's active_fraction to 1 again
for shuffle in [False, True]:
for selection in [False, True]:
for export in [dataset.export_fits, dataset.export_hdf5] if byteorder == ">" else [dataset.export_hdf5]:
type = "hdf5" if export == dataset.export_hdf5 else "fits"
if shuffle and selection:
continue # TODO: export should fail on this combination
#print column_names, byteorder, shuffle, selection, type
if export == dataset.export_hdf5:
path = path_hdf5
path_ui = path_hdf5_ui
export(path, column_names=column_names, byteorder=byteorder, shuffle=shuffle, selection=selection)
else:
path = path_fits
path_ui = path_fits_ui
export(path, column_names=column_names, shuffle=shuffle, selection=selection)
compare_direct = vx.open(path)
dialogs.set_choose(1 if selection else 0).then("=<>".index(byteorder))
# select columns
dialogs.set_select_many(True, [name in column_names for name in dataset.get_column_names()])
counter_confirm = CallCounter(return_value=shuffle)
counter_info = CallCounter()
dialogs.dialog_confirm = counter_confirm
dialogs.dialog_info = counter_info
dialogs.get_path_save = lambda *args: path_ui
dialogs.ProgressExecution = dialogs.FakeProgressExecution
import sys
sys.stdout.flush()
self.app.export(type=type)
compare_ui = vx.open(path_ui)
def stat_main(argv):
parser = make_stat_parser(argv[0])
args = parser.parse_args(argv[1:])
import vaex
dataset = vaex.open(args.dataset)
if dataset is None:
print("Cannot open input: %s" % args.dataset)
sys.exit(1)
print("dataset:")
print(" length: %s" % len(dataset))
print(" full_length: %s" % dataset.full_length())
print(" name: %s" % dataset.name)
print(" path: %s" % dataset.path)
print(" columns: ")
desc = dataset.description
if desc:
print(" description: %s" % desc)
for name in dataset.get_column_names():
print(" - %s: " % name)
desc = dataset.descriptions.get(name)
if desc:
def main(argv=sys.argv):
dataset = vaex.open(argv[1])
app = QtGui.QApplication(argv)
table = VariablesTable(None)
table.set_dataset(dataset)
table.show()
table.raise_()
sys.exit(app.exec_())
def main(argv=sys.argv):
dataset = vaex.open(argv[1])
app = QtGui.QApplication(argv)
table = ColumnsTable(None)
table.set_dataset(dataset)
table.show()
table.raise_()
sys.exit(app.exec_())
def open(self):
return vx.open_many(self.filenames_vaex) if len(self.filenames_vaex) != 1 else vx.open(self.filenames_vaex[0])
def open(self, path):
"""Add a dataset and add it to the UI"""
logger.debug("open dataset: %r", path)
if path.startswith("http") or path.startswith("ws"):
dataset = vaex.open(path, thread_mover=self.call_in_main_thread)
else:
dataset = vaex.open(path)
self.add_recently_opened(path)
self.dataset_selector.add(dataset)
return dataset
else:
return 1
if args.task == "tap":
dataset = vaex.dataset.DatasetTap(args.tap_url, args.table_name)
if not args.quiet:
print("exporting from {tap_url} table name {table_name} to {output}".format(tap_url=args.tap_url, table_name=args.table_name, output=args.output))
if args.task == "csv":
# dataset = vaex.dataset.DatasetTap(args.tap_url, args.table_name)
if not args.quiet:
print("exporting from {input} to {output}".format(input=args.input, output=args.output))
if args.task == "file":
if args.input[0] == "@":
inputs = open(args.input[1:]).readlines()
dataset = vaex.open_many(inputs)
else:
dataset = vaex.open(args.input)
if not args.quiet:
print("exporting from {input} to {output}".format(input=args.input, output=args.output))
if dataset is None and args.task not in ["csv"]:
if not args.quiet:
print("Cannot open input")
return 1
if dataset:
dataset.set_active_fraction(args.fraction)
if args.list:
if not args.quiet:
print("columns names: " + " ".join(dataset.get_column_names()))
else:
if args.task == "csv":
row_count = -1 # the header does not count
with file(args.input) as lines:
import numpy as np
import vaex as vx
import numexpr as ne
import vaex.multithreading as mt
import timeit
import math
import vaex.execution
import threading
lock = threading.Lock()
import sys
pool = mt.pool
ds = vx.open("data/Aq-A-2-999-shuffled-10percent.hdf5") if len(sys.argv) == 1 else vx.open(sys.argv[1])
x = ds("x")
xlim = x.minmax()
data = ds.columns["x"]
print len(data), len(data)/4, len(data)%4, math.ceil(float(len(data))/pool.nthreads)
splits = 10
buf_size = int(1e7)
buf = np.zeros((pool.nthreads, len(data)/pool.nthreads+10), dtype=np.float64)
print buf.shape
import concurrent.futures
import theano.tensor as T
from theano import function
x = T.dvector('x')
z = eval("x**2")
func = function([x], z)
def case_a():
#executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers)