Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.train = False
data = json.load(open(data_path))
cui_counts = {}
if test_size == 0:
test_set = data
train_set = data
else:
train_set, test_set, _, _ = make_mc_train_test(data, self.cdb, test_size=test_size)
if print_stats:
self._print_stats(test_set, use_filters=use_filters, use_cui_doc_limit=use_cui_doc_limit, use_overlaps=use_overlaps,
use_groups=use_groups)
if reset_cdb:
self.cdb = CDB()
self.spacy_cat.cdb = self.cdb
self.spacy_cat.cat_ann.cdb = self.cdb
if reset_cui_count:
# Get all CUIs
cuis = []
for project in train_set['projects']:
for doc in project['documents']:
for ann in doc['annotations']:
cuis.append(ann['cui'])
for cui in set(cuis):
if cui in self.cdb.cui_count:
self.cdb.cui_count[cui] = 10
# Remove entites that were terminated
if not never_terminate:
from flask import Flask
from medcat.cdb import CDB
from medcat.utils.spacy_pipe import SpacyPipe
from medcat.utils.vocab import Vocab
from medcat.cat import CAT
from flask import request
import os
import json
from spacy import displacy
vocab = Vocab()
cdb = CDB()
cdb.load_dict(os.getenv("CDB_MODEL", '/cat/models/med_ann_norm.dat'))
vocab.load_dict(path=os.getenv("VOCAB_MODEL", '/cat/models/med_ann_norm_dict.dat'))
cat = CAT(cdb, vocab=vocab)
cat.spacy_cat.train = False
app = Flask(__name__)
@app.route('/api_test', methods=['GET', 'POST'])
def api_test():
if request.method == 'POST':
return cat.get_json(request.form.get('text'))
content = get_file('api_test.html')
return content
@app.route('/doc', methods=['POST'])
def __init__(self, vocab=None, pretrained_cdb=None, tokenizer=None):
self.vocab = vocab
if pretrained_cdb is None:
self.cdb = CDB()
else:
self.cdb = pretrained_cdb
# Build the required spacy pipeline
self.nlp = SpacyPipe(spacy_split_all, disable=['ner', 'parser'])
self.nlp.add_punct_tagger(tagger=partial(spacy_tag_punct, skip_stopwords=False))
# Get the tokenizer
if tokenizer is not None:
self.tokenizer = tokenizer
else:
self.tokenizer = self._tok #BertTokenizer.from_pretrained('bert-base-uncased')
def __init__(self, vocab=None, pretrained_cdb=None, word_tokenizer=None):
self.vocab = vocab
if pretrained_cdb is None:
self.cdb = CDB()
else:
self.cdb = pretrained_cdb
# Build the required spacy pipeline
self.nlp = SpacyPipe(spacy_split_all, disable=['ner', 'parser'])
self.nlp.add_punct_tagger(tagger=partial(spacy_tag_punct, skip_stopwords=self.SKIP_STOPWORDS))
# Get the tokenizer
if word_tokenizer is not None:
self.tokenizer = word_tokenizer
else:
self.tokenizer = self._tok
neg_path = os.getenv('NEG_PATH', '/tmp/mc_negated')
try:
if not os.path.exists(vocab_path):
vocab_url = os.getenv('VOCAB_URL')
urlretrieve(vocab_url, vocab_path)
if not os.path.exists(cdb_path):
cdb_url = os.getenv('CDB_URL')
print("*"*399)
print(cdb_url)
urlretrieve(cdb_url, cdb_path)
vocab = Vocab()
vocab.load_dict(vocab_path)
cdb = CDB()
cdb.load_dict(cdb_path)
mc_negated = MetaCAT(save_dir=neg_path)
mc_negated.load()
cat = CAT(cdb=cdb, vocab=vocab, meta_cats=[mc_negated])
cat.spacy_cat.MIN_ACC = 0.30
cat.spacy_cat.MIN_ACC_TH = 0.30
cat.spacy_cat.ACC_ALWAYS = True
except Exception as e:
print(str(e))
def get_html_and_json(text):
doc = cat(text)
a = json.loads(cat.get_json(text))
for i in range(len(a['entities'])):
ent = a['entities'][i]
""" Given data learns vector embeddings for concepts
in a suppervised way.
data_path: path to data in json format
"""
self.train = False
data = json.load(open(data_path))
if print_stats:
if test_set:
self._print_stats(test_set, use_filters=use_filters)
else:
self._print_stats(data, use_filters=use_filters)
if reset_cdb:
self.cdb = CDB()
self.spacy_cat.cdb = self.cdb
self.spacy_cat.cat_ann.cdb = self.cdb
if reset_cui_count:
# Get all CUIs
cuis = []
for project in data['projects']:
for doc in project['documents']:
for ann in doc['annotations']:
cuis.append(ann['cui'])
for cui in set(cuis):
if cui in self.cdb.cui_count:
self.cdb.cui_count[cui] = 10
# Remove entites that were terminated
for project in data['projects']:
from flask import Flask
from flask import Response
import json
from medcat.cdb import CDB
from medcat.utils.vocab import Vocab
from medcat.cat import CAT
from flask import request
import os
vocab = Vocab()
cdb = CDB()
cdb.load_dict(os.getenv("CDB_MODEL", '/cat/models/med_ann_norm.dat'))
vocab.load_dict(path=os.getenv("VOCAB_MODEL", '/cat/models/med_ann_norm_dict.dat'))
cat = CAT(cdb, vocab=vocab)
cat.spacy_cat.train = False
app = Flask(__name__)
app_name = 'MEDCAT'
app_lang = 'en'
app_version = os.getenv("CAT_VERSION", '0.1.0')
@app.route('/api/info', methods=['GET'])
import json
use_groups = False
if groups is not None:
use_groups = True
f1s = {}
ps = {}
rs = {}
tps = {}
fns = {}
fps = {}
cui_counts = {}
examples = {}
for i in range(cv):
cdb = CDB()
cdb.load_dict(cdb_path)
vocab = Vocab()
vocab.load_dict(path=vocab_path)
cat = CAT(cdb, vocab=vocab)
cat.train = False
cat.spacy_cat.MIN_ACC = 0.30
cat.spacy_cat.MIN_ACC_TH = 0.30
# Add groups if they exist
if groups is not None:
for cui in cdb.cui2info.keys():
if "group" in cdb.cui2info[cui]:
del cdb.cui2info[cui]['group']
groups = json.load(open("./groups.json"))
for k,v in groups.items():
for val in v: