Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@ocrd_cli_options
def cli_param_dumper(*args, **kwargs): # pylint: disable=unused-argument
print(json.dumps(kwargs['parameter']))
def cli_dummy_processor(*args, **kwargs):
return ocrd_cli_wrap_processor(DummyProcessor, *args, **kwargs)
def setUp(self):
self.resolver = Resolver()
self.bagger = WorkspaceBagger(self.resolver)
self.tempdir = mkdtemp()
self.bagdir = join(self.tempdir, 'kant_aufklaerung_1784')
copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir)
self.workspace_dir = join(self.bagdir, 'data')
self.workspace = Workspace(self.resolver, directory=join(self.workspace_dir))
def setUp(self):
self.resolver = Resolver()
self.workspace = self.resolver.workspace_from_url(assets.url_of('SBB0000F29300010000/data/mets.xml'))
def test_validate_sequence(self):
resolver = Resolver()
with TemporaryDirectory() as tempdir:
workspace = resolver.workspace_from_url(assets.path_to('kant_aufklaerung_1784/data/mets.xml'), dst_dir=tempdir)
params_path = Path(tempdir, 'params.json')
params_path.write_text('{"param1": true}')
with self.assertRaisesRegex(Exception, "Input file group not contained in METS or produced by previous steps: FOO'"):
validate_tasks([ProcessorTask.parse(x) for x in [
'sample-processor-required-param -I OCR-D-IMG -O OUT1 -p %s' % params_path,
'sample-processor-required-param -I FOO -O OUT2 -p %s' % params_path
]], workspace)
with self.assertRaisesRegex(Exception, "Input fileGrp.@USE='IN'. not in METS!"):
validate_tasks([ProcessorTask.parse(x) for x in [
'sample-processor-required-param -I IN -O OUT1 -p %s' % params_path,
]], workspace)
def setUp(self):
if exists(BACKUPDIR):
rmtree(BACKUPDIR)
self.resolver = Resolver()
self.bagger = WorkspaceBagger(self.resolver)
self.tempdir = mkdtemp()
self.bagdir = join(self.tempdir, 'bag')
copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir)
self.workspace_dir = join(self.bagdir, 'data')
self.workspace = Workspace(self.resolver, directory=self.workspace_dir)
def runTest(self):
resolver = Resolver()
workspace = resolver.workspace_from_url(METS_HEROLD_SMALL, dst_dir=WORKSPACE_DIR)
TesserocrSegmentRegion(workspace, input_file_grp="INPUT", output_file_grp="OCR-D-SEG-BLOCK").process()
# workspace.save_mets()
TesserocrSegmentLine(workspace, input_file_grp="OCR-D-SEG-BLOCK", output_file_grp="OCR-D-SEG-LINE").process()
workspace.save_mets()
def runTest(self):
resolver = Resolver(cache_enabled=True)
workspace = resolver.workspace_from_url(METS_HEROLD_SMALL, directory=WORKSPACE_DIR)
Tesseract3RegionSegmenter(workspace, inputGrp="INPUT", outputGrp="OCR-D-SEG-BLOCK").process()
workspace.save_mets()
Tesseract3LineSegmenter(workspace, inputGrp="OCR-D-SEG-BLOCK", outputGrp="OCR-D-SEG-LINE").process()
workspace.save_mets()
# TODO takes too long
# Tesseract3Recognizer(workspace, inputGrp="OCR-D-SEG-LINE", outputGrp="OCR-D-OCR-TESS").process()
workspace.save_mets()
def test_422(self):
"""
# OCR-D/core#422
"""
resolver = Resolver()
with TemporaryDirectory() as tempdir:
workspace = resolver.workspace_from_url(assets.path_to('kant_aufklaerung_1784/data/mets.xml'), dst_dir=tempdir)
validate_tasks([ProcessorTask.parse(x) for x in [
"sample-processor -I OCR-D-IMG -O OCR-D-SEG-BLOCK",
"sample-processor -I OCR-D-SEG-BLOCK -O OCR-D-SEG-LINE",
"sample-processor -I OCR-D-SEG-LINE -O OCR-D-SEG-WORD",
"sample-processor -I OCR-D-SEG-WORD -O OCR-D-OCR-TESS",
]], workspace)
def setUp(self):
self.resolver = Resolver()