Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Split up current query matchbook to recover filters
matchbook_splitter = re.compile(r"(?!\'),(?= 1:
if len(props) / chunks < 2:
chunks = len(props) + 1
query_error = False
for chunk in grouper(props, (len(props) // chunks) + 1):
logger.debug('Requesting property chunk {} with {} records'.format(chunks, k))
props_to_request = list(set(c for c in chunk if c is not None))
# Exclude orderby keyword if it is not requested in this chunk.
# If it is included, remove from requested properties to avoid duplication in URI
orderby_prop = None
orderby_str = None
for prop in props_to_request:
if orderby_kw.startswith(prop):
if orderby_kw.startswith('$'):
orderby_str = orderby_kw[1:]
else:
orderby_str = orderby_kw
orderby_prop = prop
break
'all' if not config_['select']
else config_['select']))
if config_['select']:
kws_to_chunk = config_['select']
else:
kws_to_chunk = self.keywords
k = config_['k']
filter_vals = config_['filter']
chunk_idx = 0
chunk_size = 5
total_chunks = len(kws_to_chunk) // chunk_size + 1
for chunk in grouper(kws_to_chunk, chunk_size):
chunk_idx += 1
logger.debug("Property chunk {} of {}".format(chunk_idx, total_chunks))
props = [getattr(AFLOW_KWS, c) for c in chunk if c is not None]
if len(props) == 0:
continue
data_query = self._get_query_obj(config_['catalog'], k,
config_['exclude'], filter_vals)
data_query.select(*props)
success = False
while not success:
try:
for entry in data_query:
yield entry, config_['targets']
success = True
except ValueError:
if data_query.N == 0: # Empty query
from maggma.builders import Builder
import numpy as np
from propnet.core.graph import Graph
from propnet import ureg
# noinspection PyUnresolvedReferences
import propnet.models
from propnet.core.registry import Registry
warnings.warn("The correlation_with_mp module is deprecated. Use the correlation module instead.",
DeprecationWarning)
logger = logging.getLogger(__name__)
class CorrelationBuilder(Builder):
"""
A class to calculate the correlation between properties derived by or used in propnet
using a suite of regression tools. Uses the Builder architecture for optional parallel
processing of data.
Note: serialization of builder does not work with custom correlation functions, although
interactive use does support them.
"""
# TODO: Add these symbols to propnet so we don't have to bring them in explicitly?
MP_QUERY_PROPS = ["piezo.eij_max", "elasticity.universal_anisotropy",
"diel.poly_electronic", "total_magnetization", "efermi",
"magnetism.total_magnetization_normalized_vol"]
PROPNET_PROPS = [v.name for v in Registry("symbols").values()
if (v.category == 'property' and v.shape == 1)]
from maggma.builders import Builder
from itertools import combinations_with_replacement
import numpy as np
import json
from minepy import MINE
from collections import defaultdict
from propnet.symbols import DEFAULT_SYMBOLS
from propnet import ureg
import random
class MicBuilder(Builder):
def __init__(self, propnet_store, mp_store, correlation_store, out_file, **kwargs):
self.propnet_store = propnet_store
self.mp_store = mp_store
self.correlation_store = correlation_store
self.out_file = out_file
super(MicBuilder, self).__init__(sources=[propnet_store, mp_store],
targets=[correlation_store],
**kwargs)
def get_items(self):
data = defaultdict(dict)
propnet_props = [v.name for v in DEFAULT_SYMBOLS.values()
if (v.category == 'property' and v.shape == 1)]
from propnet.dbtools.aflow_ingester_defaults import default_query_configs, default_files_to_ingest
from aflow.keywords import load as kw_load, reset as kw_reset
from aflow import K as AFLOW_KWS
from maggma.builders import Builder
from maggma.utils import grouper
from monty.json import jsanitize
from pymongo import UpdateOne
import logging
import time
import datetime
from urllib.error import HTTPError
logger = logging.getLogger(__name__)
class AflowIngester(Builder):
"""
Builds MongoDB collections from AFLOW data using the AFLOW and AFLUX web APIs.
"""
_available_kws = dict()
"""Contains supported keywords in the AFLUX schema
"""
kw_load(_available_kws)
def __init__(self, data_target, auid_target=None,
keywords=None, query_configs=None,
files_to_ingest=None, filter_null_properties=False,
**kwargs):
"""
Initialize the database builder.
Args:
from maggma.builders import Builder
from itertools import combinations_with_replacement
import numpy as np
import json
from collections import defaultdict
from propnet.symbols import DEFAULT_SYMBOLS
from propnet.core.graph import Graph
from propnet import ureg
import logging
import re
logger = logging.getLogger(__name__)
class CorrelationBuilder(Builder):
"""
A class to calculate the correlation between properties derived by or used in propnet
using a suite of regression tools. Uses the Builder architecture for optional parallel
processing of data.
Note: serialization of builder does not work with custom correlation functions, although
interactive use does support them.
"""
def __init__(self, propnet_store, mp_store,
correlation_store, out_file=None,
funcs='linlsq', **kwargs):
"""
Constructor for the correlation builder.
Args:
import numpy as np
import json
from collections import defaultdict
from propnet.core.graph import Graph
from propnet import ureg
import logging
import re
# noinspection PyUnresolvedReferences
import propnet.models
from propnet.core.registry import Registry
logger = logging.getLogger(__name__)
class CorrelationBuilder(Builder):
"""
A class to calculate the correlation between properties derived by or used in propnet
using a suite of regression tools. Uses the Builder architecture for optional parallel
processing of data.
Note: serialization of builder does not work with custom correlation functions, although
interactive use does support them.
"""
PROPNET_PROPS = [v.name for v in Registry("symbols").values()
if (v.category == 'property' and v.shape == 1)]
def __init__(self, propnet_store,
correlation_store, out_file=None,
funcs='linlsq', props=None,
sample_size=None, from_quantity_db=True,
from maggma.builders import Builder
from maggma.utils import grouper
from pymongo import InsertOne
import pydash
from itertools import chain
from propnet import ureg
from propnet.core.registry import Registry
# noinspection PyUnresolvedReferences
import propnet.symbols
class SeparationBuilder(Builder):
"""
Converts old-style propnet database into separate quantity-centered
and materials-centered databases.
"""
def __init__(self, propnet_store, quantity_store, material_store=None,
criteria=None, props=None, chunk_size=100, insert_only=False):
"""
Args:
propnet_store (Mongolike Store): old-style propnet store
quantity_store (Mongolike Store): store for quantities
material_store (Mongolike Store): store for materials
criteria (dict): JSON-style criteria for MongoDB find() query
**kwargs: arguments to Builder parent class
"""
def get_items(self):
# Borrowed from MapBuilder
keys = self.propnet_store.distinct('task_id', criteria=self.criteria)
containers = self.props + ['inputs']
self.total = len(keys)
for chunked_keys in grouper(keys, self.chunk_size, None):
chunked_keys = list(filter(None.__ne__, chunked_keys))
for doc in list(
self.propnet_store.query(
criteria={'task_id': {
"$in": chunked_keys
}},
properties=containers + ['task_id'],
)):
yield doc
def process_sdf_file(filename):
mp_pubchem = MongograntStore("rw:knowhere.lbl.gov/mp_pubchem", "mp_pubchem",
key="pubchem_id")
mp_pubchem.connect()
coll = mp_pubchem.collection
skipped = 0
pubchem_molecules = []
for i, mol in enumerate(pybel.readfile('sdf', filename)):
try:
pubchem_id = int(mol.data['PUBCHEM_COMPOUND_CID'])
xyz = mol.write(format="xyz")
data = {'pubchem_id': pubchem_id,
'xyz': xyz}
for key in keys:
if key in mol.data:
data[key_map[key]] = mol.data[key]