How to use the asrtoolkit.file_utils.name_cleaners.basename function in asrtoolkit

To help you get started, we’ve selected a few asrtoolkit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github finos / greenkey-asrtoolkit / asrtoolkit / data_structures / corpus.py View on Github external
"transcript_file":
                    time_aligned_text(strip_extension(fl) + ".stm"),
                }) for audio_extension in audio_extensions_to_try
                for fl in (get_files(self.location, audio_extension) if self.
                           location else [])
                if (os.path.exists(strip_extension(fl) + ".stm"))
            ]

            # gather all exemplars from /stm and /sph subdirectories if present
            self.exemplars += [
                exemplar({
                    "audio_file":
                    audio_file(fl),
                    "transcript_file":
                    time_aligned_text(self.location + "/stm/" +
                                      basename(strip_extension(fl)) + ".stm"),
                }) for audio_extension in audio_extensions_to_try for fl in
                (get_files(self.location +
                           "/sph/", audio_extension) if self.location else [])
                if (os.path.exists(self.location + "/stm/" +
                                   basename(strip_extension(fl)) + ".stm"))
            ]
github finos / greenkey-asrtoolkit / asrtoolkit / data_structures / corpus.py View on Github external
def validate(self):
        """
        Validates exemplar object by constraining that the filenames before the
        extension are the same
        """

        audio_filename = basename(strip_extension(self.audio_file.location))
        transcript_filename = basename(
            strip_extension(self.transcript_file.location))

        # Audio and transcript filename must match
        # Audio file must not be empty
        # Transcript file must not be empty
        valid = (audio_filename == transcript_filename
                 and os.path.getsize(self.audio_file.location)
                 and os.path.getsize(self.transcript_file.location))
        # This returns an integer corresponding to the output of the last condition, not a boolean.
        # Thats just how `and` works in python

        return bool(valid)
github finos / greenkey-asrtoolkit / asrtoolkit / data_structures / corpus.py View on Github external
def validate(self):
        """
        Validates exemplar object by constraining that the filenames before the
        extension are the same
        """

        audio_filename = basename(strip_extension(self.audio_file.location))
        transcript_filename = basename(
            strip_extension(self.transcript_file.location))

        # Audio and transcript filename must match
        # Audio file must not be empty
        # Transcript file must not be empty
        valid = (audio_filename == transcript_filename
                 and os.path.getsize(self.audio_file.location)
                 and os.path.getsize(self.transcript_file.location))
        # This returns an integer corresponding to the output of the last condition, not a boolean.
        # Thats just how `and` works in python

        return bool(valid)
github finos / greenkey-asrtoolkit / asrtoolkit / data_structures / corpus.py View on Github external
def prepare_for_training(self, target, sample_rate=16000, nested=False):
        """
        Prepare one exemplar for training
        Returning a new exemplar object with updated file locations
        and a resampled audio_file
        """
        if nested:
            af_target_file = os.path.join(target, "sph",
                                          basename(self.audio_file.location))
            tf_target_file = os.path.join(
                target, "stm", basename(self.transcript_file.location))
        else:
            af_target_file = os.path.join(target,
                                          basename(self.audio_file.location))
            tf_target_file = os.path.join(
                target, basename(self.transcript_file.location))

        af = self.audio_file.prepare_for_training(
            af_target_file,
            sample_rate=sample_rate,
        )

        tf = self.transcript_file.write(tf_target_file)

        return (exemplar({
            "audio_file": af,
            "transcript_file": tf
        }) if all([af, tf]) else None)