Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def load_pbg_config():
""" reads config.json file and creates a schema object for the config
Returns:
[object] -- Config Schema object for the json file
"""
try:
initialise_config()
logging.info(CHECKPOINT_DIRECTORY)
pbg_config_path = os.path.join(
CHECKPOINT_DIRECTORY, GLOBAL_CONFIG["PBG_CONFIG_NAME"]
)
with open(pbg_config_path) as f:
pbg_config = f.read()
f.close()
pbg_config = json.loads(pbg_config)
pbg_config = parse_config(pbg_config)
return pbg_config
except Exception as e:
logging.info("Could not convert to pbg format")
logging.info(e, exc_info=True)
sys.exit(e)
def search_in_index(index_filename, query_entity_embedding):
try:
index_path = os.path.join(CHECKPOINT_DIRECTORY, "index", index_filename)
index = load_index(index_path)
distances, indices = index.search(query_entity_embedding, neighbors)
return distances, indices
except Exception as e:
logging.info(f"{e}", exc_info=True)
def find_entity_data(entity_id):
""" Reads the entity_dictionary.json containing ids of all nodes to locate the index of the entity
Arguments:
entity_id {[str]} -- id of the node to be searched
Returns:
[dict] -- dict specifying partition number index of the entity and the file
"""
try:
entity = find_node(entity_id)
logging.info(f"ENTITY FOUND : {entity}")
entity_type = entity["entity_type"]
entity_id = str(entity["entity_id"])
with open(os.path.join(DATA_DIRECTORY, "entity_dictionary.json"), "r") as f:
all_entity_dictionary = json.load(f)
f.close()
entity_dictionaries = [
ent
for ent in all_entity_dictionary["all_entities"]
if ent["entity_type"] == entity_type
] # get all the dictionaries where the entity label is found
entity_dictionary = [
entity_dict
for entity_dict in entity_dictionaries
if entity_id in entity_dict["entity_ids"]
][0]
entity_index = entity_dictionary["entity_ids"].index(entity_id)
partition_number = int(entity_dictionary["partition_number"])
entity_file = entity_dictionary["entity_file"]
return dict(
global FILENAMES
GLOBAL_CONFIG = load_config("GLOBAL_CONFIG")
FILENAMES = {
"train": os.path.join(
os.getcwd(),
GLOBAL_CONFIG["PROJECT_NAME"],
GLOBAL_CONFIG["DATA_DIRECTORY"],
GLOBAL_CONFIG["TSV_FILE_NAME"] + ".tsv",
)
} # path to tsv file with train data
DATA_DIRECTORY = os.path.join(
os.getcwd(), GLOBAL_CONFIG["PROJECT_NAME"], GLOBAL_CONFIG["DATA_DIRECTORY"]
)
CHECKPOINT_DIRECTORY = os.path.join(
os.getcwd(),
GLOBAL_CONFIG["PROJECT_NAME"],
GLOBAL_CONFIG["CHECKPOINT_DIRECTORY"],
)
def create_indexes():
try:
initialise_config()
logging.info(
f"-------------------------CHECKING FOR INDEXES------------------------"
)
create_index_directory()
with open(os.path.join(DATA_DIRECTORY, "entity_dictionary.json"), "r") as f:
all_entity_dictionary = json.load(f)
f.close()
for ent in all_entity_dictionary["all_entities"]:
try:
partition_number = ent["partition_number"]
entity_type = ent["entity_type"]
save_index(entity_type, partition_number)
except Exception as e:
logging.info(f"error in index creation: {e}", exc_info=True)
continue
logging.info("Done")
except Exception as e:
logging.info(f"error in index creation: {e}", exc_info=True)
def map_back_to_entities(entity_file_list, search_result, neighbors):
count = 1
all_similar_ents = list()
for result in search_result:
entity_file_list_index = int(result[-1] / neighbors)
similar_entity_index = int(result[0])
similar_entity_distance = result[1]
if similar_entity_distance == 0:
continue
entity_filename = (
f"entity_names_{entity_file_list[entity_file_list_index]}.json"
)
entity_filepath = os.path.join(DATA_DIRECTORY, entity_filename)
node_list = json.load(open(entity_filepath, "r"))
similar_entity_id = node_list[similar_entity_index]
similar_entity = find_node(similar_entity_id)
similar_entity["distance"] = similar_entity_distance
count += 1
all_similar_ents.append(similar_entity)
if count == neighbors:
break
return all_similar_ents