How to use the medcat.cdb.CDB function in medcat

To help you get started, we’ve selected a few medcat examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github CogStack / MedCAT / medcat / cat.py View on Github external
self.train = False
        data = json.load(open(data_path))
        cui_counts = {}

        if test_size == 0:
            test_set = data
            train_set = data
        else:
            train_set, test_set, _, _ = make_mc_train_test(data, self.cdb, test_size=test_size)

        if print_stats:
            self._print_stats(test_set, use_filters=use_filters, use_cui_doc_limit=use_cui_doc_limit, use_overlaps=use_overlaps,
                    use_groups=use_groups)

        if reset_cdb:
            self.cdb = CDB()
            self.spacy_cat.cdb = self.cdb
            self.spacy_cat.cat_ann.cdb = self.cdb

        if reset_cui_count:
            # Get all CUIs
            cuis = []
            for project in train_set['projects']:
                for doc in project['documents']:
                    for ann in doc['annotations']:
                        cuis.append(ann['cui'])
            for cui in set(cuis):
                if cui in self.cdb.cui_count:
                    self.cdb.cui_count[cui] = 10

        # Remove entites that were terminated
        if not never_terminate:
github CogStack / MedCAT / api / api.py View on Github external
from flask import Flask
from medcat.cdb import CDB
from medcat.utils.spacy_pipe import SpacyPipe
from medcat.utils.vocab import Vocab
from medcat.cat import CAT
from flask import request
import os
import json
from spacy import displacy

vocab = Vocab()
cdb = CDB()
cdb.load_dict(os.getenv("CDB_MODEL", '/cat/models/med_ann_norm.dat'))
vocab.load_dict(path=os.getenv("VOCAB_MODEL", '/cat/models/med_ann_norm_dict.dat'))
cat = CAT(cdb, vocab=vocab)
cat.spacy_cat.train = False

app = Flask(__name__)

@app.route('/api_test', methods=['GET', 'POST'])
def api_test():
    if request.method == 'POST':
        return cat.get_json(request.form.get('text'))

    content = get_file('api_test.html')
    return content

@app.route('/doc', methods=['POST'])
github CogStack / MedCAT / medcat / prepare_umls.py View on Github external
def __init__(self, vocab=None, pretrained_cdb=None, tokenizer=None):
        self.vocab = vocab
        if pretrained_cdb is None:
            self.cdb = CDB()
        else:
            self.cdb = pretrained_cdb
        # Build the required spacy pipeline
        self.nlp = SpacyPipe(spacy_split_all, disable=['ner', 'parser'])
        self.nlp.add_punct_tagger(tagger=partial(spacy_tag_punct, skip_stopwords=False))
        # Get the tokenizer
        if tokenizer is not None:
            self.tokenizer = tokenizer
        else:
            self.tokenizer = self._tok #BertTokenizer.from_pretrained('bert-base-uncased')
github CogStack / MedCAT / medcat / prepare_cdb.py View on Github external
def __init__(self, vocab=None, pretrained_cdb=None, word_tokenizer=None):
        self.vocab = vocab
        if pretrained_cdb is None:
            self.cdb = CDB()
        else:
            self.cdb = pretrained_cdb

        # Build the required spacy pipeline
        self.nlp = SpacyPipe(spacy_split_all, disable=['ner', 'parser'])
        self.nlp.add_punct_tagger(tagger=partial(spacy_tag_punct, skip_stopwords=self.SKIP_STOPWORDS))
        # Get the tokenizer
        if word_tokenizer is not None:
            self.tokenizer = word_tokenizer
        else:
            self.tokenizer = self._tok
github CogStack / MedCAT / webapp / webapp / demo / views.py View on Github external
neg_path = os.getenv('NEG_PATH', '/tmp/mc_negated')

try:
    if not os.path.exists(vocab_path):
        vocab_url = os.getenv('VOCAB_URL')
        urlretrieve(vocab_url, vocab_path)

    if not os.path.exists(cdb_path):
        cdb_url = os.getenv('CDB_URL')
        print("*"*399)
        print(cdb_url)
        urlretrieve(cdb_url, cdb_path)

    vocab = Vocab()
    vocab.load_dict(vocab_path)
    cdb = CDB()
    cdb.load_dict(cdb_path)
    mc_negated = MetaCAT(save_dir=neg_path)
    mc_negated.load()
    cat = CAT(cdb=cdb, vocab=vocab, meta_cats=[mc_negated])
    cat.spacy_cat.MIN_ACC = 0.30
    cat.spacy_cat.MIN_ACC_TH = 0.30
    cat.spacy_cat.ACC_ALWAYS = True
except Exception as e:
    print(str(e))

def get_html_and_json(text):
    doc = cat(text)

    a = json.loads(cat.get_json(text))
    for i in range(len(a['entities'])):
        ent = a['entities'][i]
github CogStack / MedCAT / medcat / cat.py View on Github external
""" Given data learns vector embeddings for concepts
        in a suppervised way.

        data_path:  path to data in json format
        """
        self.train = False
        data = json.load(open(data_path))

        if print_stats:
            if test_set:
                self._print_stats(test_set, use_filters=use_filters)
            else:
                self._print_stats(data, use_filters=use_filters)

        if reset_cdb:
            self.cdb = CDB()
            self.spacy_cat.cdb = self.cdb
            self.spacy_cat.cat_ann.cdb = self.cdb

        if reset_cui_count:
            # Get all CUIs
            cuis = []
            for project in data['projects']:
                for doc in project['documents']:
                    for ann in doc['annotations']:
                        cuis.append(ann['cui'])
            for cui in set(cuis):
                if cui in self.cdb.cui_count:
                    self.cdb.cui_count[cui] = 10

        # Remove entites that were terminated
        for project in data['projects']:
github CogStack / MedCAT / api / cogstack_api.py View on Github external
from flask import Flask
from flask import Response
import json
from medcat.cdb import CDB
from medcat.utils.vocab import Vocab
from medcat.cat import CAT
from flask import request
import os


vocab = Vocab()
cdb = CDB()

cdb.load_dict(os.getenv("CDB_MODEL", '/cat/models/med_ann_norm.dat'))
vocab.load_dict(path=os.getenv("VOCAB_MODEL", '/cat/models/med_ann_norm_dict.dat'))
cat = CAT(cdb, vocab=vocab)

cat.spacy_cat.train = False

app = Flask(__name__)


app_name = 'MEDCAT'
app_lang = 'en'
app_version = os.getenv("CAT_VERSION", '0.1.0')


@app.route('/api/info', methods=['GET'])
github CogStack / MedCAT / medcat / utils / helpers.py View on Github external
import json

    use_groups = False
    if groups is not None:
        use_groups = True

    f1s = {}
    ps = {}
    rs = {}
    tps = {}
    fns = {}
    fps = {}
    cui_counts = {}
    examples = {}
    for i in range(cv):
        cdb = CDB()
        cdb.load_dict(cdb_path)
        vocab = Vocab()
        vocab.load_dict(path=vocab_path)
        cat = CAT(cdb, vocab=vocab)
        cat.train = False
        cat.spacy_cat.MIN_ACC = 0.30
        cat.spacy_cat.MIN_ACC_TH = 0.30

        # Add groups if they exist
        if groups is not None:
            for cui in cdb.cui2info.keys():
                if "group" in cdb.cui2info[cui]:
                    del cdb.cui2info[cui]['group']
            groups = json.load(open("./groups.json"))
            for k,v in groups.items():
                for val in v: