Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def setUp(self):
self.n = 5
self.base = range(100)
self.data = ConcatDataset(*[self.base for _ in range(5)])
def test_concats_multiple_files(self):
fp = self.fp
lines = self.lines
data = TextDataset([fp.name, fp.name], mode='concat')
for x, y in zip(data, lines + lines):
self.assertEqual(x, y)
for j, y in enumerate(lines + lines):
self.assertEqual(data[j], y)
self.assertEqual(len(data), len(lines) * 2)
self.assertEqual(data._length, len(lines) * 2)
self.assertEqual(data[len(data) - 1], lines[-1])
self.assertIsInstance(data._dataset, lineflow.core.ConcatDataset)
self.assertIsInstance(data.map(lambda x: x)._dataset, TextDataset)
def test_dunder_add(self):
data = self.data + self.data + self.data
expected = list(self.base) * 3
self.assertSequenceEqual(data, expected)
self.assertIsInstance(data, ConcatDataset)
def __init__(self,
paths: Union[str, List[str]],
encoding: str = 'utf-8',
mode: str = 'zip') -> None:
if isinstance(paths, str):
dataset = easyfile.TextFile(paths, encoding)
elif isinstance(paths, list):
if mode == 'zip':
dataset = ZipDataset(*[easyfile.TextFile(p, encoding) for p in paths])
elif mode == 'concat':
dataset = ConcatDataset(*[easyfile.TextFile(p, encoding) for p in paths])
else:
raise ValueError(f"only 'zip' and 'concat' are valid for 'mode', but '{mode}' is given.")
super().__init__(dataset)
def lineflow_concat(*datasets: List[DatasetMixin]) -> ConcatDataset:
return ConcatDataset(*datasets)
def __add__(self, other: 'Dataset') -> 'ConcatDataset':
return ConcatDataset(self, other)