How to use the srsly.write_jsonl function in srsly

To help you get started, we’ve selected a few srsly examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / spaCy / tests / regression / test_issue3526.py View on Github external
def test_entity_ruler_from_disk_old_format_safe(patterns, en_vocab):
    nlp = Language(vocab=en_vocab)
    ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True)
    with make_tempdir() as tmpdir:
        out_file = tmpdir / "entity_ruler"
        srsly.write_jsonl(out_file.with_suffix(".jsonl"), ruler.patterns)
        new_ruler = EntityRuler(nlp).from_disk(out_file)
        for pattern in ruler.patterns:
            assert pattern in new_ruler.patterns
        assert len(new_ruler) == len(ruler)
        assert new_ruler.overwrite is not ruler.overwrite
github explosion / spaCy / spacy / pipeline / entityruler.py View on Github external
            "patterns": lambda p: srsly.write_jsonl(
                p.with_suffix(".jsonl"), self.patterns
            ),
github explosion / sense2vec / sense2vec / prodigy_recipes.py View on Github external
nlp = spacy.load(spacy_model)
    log(f"RECIPE: Loaded spaCy model '{spacy_model}'")
    DB = connect()
    if dataset not in DB:
        msg.fail(f"Can't find dataset '{dataset}'", exits=1)
    examples = DB.get_dataset(dataset)
    terms = set([eg["word"] for eg in examples if eg["answer"] == "accept"])
    if case_sensitive:
        patterns = [[{"text": t.text} for t in nlp.make_doc(term)] for term in terms]
    else:
        terms = set([word.lower() for word in terms])
        patterns = [[{"lower": t.lower_} for t in nlp.make_doc(term)] for term in terms]
    patterns = [{"label": label, "pattern": pattern} for pattern in patterns]
    log(f"RECIPE: Generated {len(patterns)} patterns")
    if not dry:
        srsly.write_jsonl(output_file, patterns)
    return patterns
github explosion / spaCy / spacy / cli / convert.py View on Github external
output_file = Path(output_dir) / Path(input_path.parts[-1]).with_suffix(suffix)
        if file_type == "json":
            srsly.write_json(output_file, data)
        elif file_type == "jsonl":
            srsly.write_jsonl(output_file, data)
        elif file_type == "msg":
            srsly.write_msgpack(output_file, data)
        msg.good(
            "Generated output file ({} documents): {}".format(len(data), output_file)
        )
    else:
        # Print to stdout
        if file_type == "json":
            srsly.write_json("-", data)
        elif file_type == "jsonl":
            srsly.write_jsonl("-", data)
github justindujardin / mathy / libraries / mathy_python / mathy / agents / zero / practice_session.py View on Github external
def save_training_examples(self):
        model_dir = Path(self.runner.config.model_dir)
        if not model_dir.is_dir():
            model_dir.mkdir(parents=True, exist_ok=True)

        # Write to local then copy (don't thrash virtual file systems like GCS)
        _, tmp_file = tempfile.mkstemp()
        srsly.write_jsonl(tmp_file, self.all_examples)
        out_file = model_dir / INPUT_EXAMPLES_FILE_NAME
        copyfile(tmp_file, str(out_file))
        os.remove(tmp_file)
        return str(out_file)
github explosion / spaCy / spacy / cli / convert.py View on Github external
input_data,
        n_sents=n_sents,
        seg_sents=seg_sents,
        use_morphology=morphology,
        lang=lang,
        model=model,
        no_print=no_print,
    )
    if output_dir != "-":
        # Export data to a file
        suffix = ".{}".format(file_type)
        output_file = Path(output_dir) / Path(input_path.parts[-1]).with_suffix(suffix)
        if file_type == "json":
            srsly.write_json(output_file, data)
        elif file_type == "jsonl":
            srsly.write_jsonl(output_file, data)
        elif file_type == "msg":
            srsly.write_msgpack(output_file, data)
        msg.good(
            "Generated output file ({} documents): {}".format(len(data), output_file)
        )
    else:
        # Print to stdout
        if file_type == "json":
            srsly.write_json("-", data)
        elif file_type == "jsonl":
            srsly.write_jsonl("-", data)
github explosion / spaCy / spacy / pipeline / entityruler.py View on Github external
DOCS: https://spacy.io/api/entityruler#to_disk
        """
        path = ensure_path(path)
        cfg = {
            "overwrite": self.overwrite,
            "phrase_matcher_attr": self.phrase_matcher_attr,
            "ent_id_sep": self.ent_id_sep,
        }
        serializers = {
            "patterns": lambda p: srsly.write_jsonl(
                p.with_suffix(".jsonl"), self.patterns
            ),
            "cfg": lambda p: srsly.write_json(p, cfg),
        }
        if path.suffix == ".jsonl":  # user wants to save only JSONL
            srsly.write_jsonl(path, self.patterns)
        else:
            to_disk(path, serializers, {})
github microsoft / SkillsExtractorCognitiveSearch / services / skills.py View on Github external
if pattern:
                            label = f"SKILL|{skill_id}"
                            patterns.append({"label": label, "pattern": pattern})

                            for t in split_tokens:
                                if t in skill_name:
                                    patterns.append(
                                        {
                                            "label": label,
                                            "pattern": self._skill_pattern(
                                                skill_name, t
                                            ),
                                        }
                                    )

            srsly.write_jsonl(patterns_path, patterns)
            return patterns
        else:
            patterns = srsly.read_jsonl(patterns_path)
            return patterns