Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import os
import os.path
import sys, re
import time
from datetime import datetime
import biothings, config
biothings.config_for_app(config)
from config import DATA_ARCHIVE_ROOT, logger as logging
from biothings.hub.dataload.dumper import FTPDumper
class DBSNPDumper(FTPDumper):
SRC_NAME = "dbsnp"
SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
FTP_HOST = 'ftp.ncbi.nlm.nih.gov'
CWD_DIR = '/snp/latest_release/JSON'
VERSIONS_DIR = '/snp/archive'
FILE_RE = 'refsnp-chr*.json.bz2'
MAX_PARALLEL_DUMP = 10
SCHEDULE = "0 9 * * *"
def set_release(self):
import os
import os.path
import sys
import time
import biothings, config
biothings.config_for_app(config)
from config import DATA_ARCHIVE_ROOT
from biothings.hub.dataload.dumper import ManualDumper
from biothings.utils.common import unzipall
class UMLSDumper(ManualDumper):
SRC_NAME = "umls"
SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
#VERSION = '2020-4-7'
import os
import os.path
import sys
import time
import biothings, config
biothings.config_for_app(config)
from config import DATA_ARCHIVE_ROOT
from biothings.hub.dataload.dumper import ManualDumper
from biothings.utils.common import unzipall
class UMLSDumper(ManualDumper):
SRC_NAME = "umls"
SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
def __init__(self, *args, **kwargs):
super(UMLSDumper,self).__init__(*args,**kwargs)
self.logger.info("""
Assuming manual download from: https://www.nlm.nih.gov/research/umls/licensedcontent/umlsknowledgesources.html
- umls-2017AA-full.zip
import os
import os.path
import sys
import time
from datetime import datetime
import biothings, config
biothings.config_for_app(config)
from config import DATA_ARCHIVE_ROOT, logger as logging
from biothings.hub.dataload.dumper import FTPDumper, FilesystemDumper
from biothings.utils.hub_db import get_src_dump
class UniprotDumper(FTPDumper):
SRC_NAME = "uniprot"
SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
FTP_HOST = 'ftp.uniprot.org'
CWD_DIR = '/pub/databases/uniprot/current_release/knowledgebase/idmapping'
SCHEDULE = "30 7 * * *"
def get_newest_info(self):
import os
import sys
import time
import ftplib
import re
import pandas as pd
import biothings, config
biothings.config_for_app(config)
from config import DATA_ARCHIVE_ROOT
from biothings.hub.dataload.dumper import FTPDumper, DumperException
from biothings.utils.common import gunzipall
class SiderDumper(FTPDumper):
SRC_NAME = "sider"
SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
FTP_HOST = 'xi.embl.de'
CWD_DIR = '/SIDER'
SCHEDULE = "0 12 * * *"
def get_release(self):
# only dir with dates
import os
import os.path
import sys
import time
from datetime import datetime
import biothings, config
biothings.config_for_app(config)
from config import DATA_ARCHIVE_ROOT, logger as logging
from biothings.hub.dataload.dumper import FTPDumper
class EntrezGeneDumper(FTPDumper):
SRC_NAME = "entrez"
SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
FTP_HOST = 'ftp.ncbi.nih.gov'
CWD_DIR = '/gene/DATA'
SCHEDULE = "0 22 * * 6"
def get_newest_info(self):
res = self.client.sendcmd("MDTM gene_info.gz") # pick one, assuming all other on the same data
import os
import os.path
import sys
import time
import biothings, config
biothings.config_for_app(config)
from config import DATA_ARCHIVE_ROOT
from biothings.hub.dataload.dumper import ManualDumper
class CADDDumper(ManualDumper):
SRC_NAME = "cadd"
SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
def __init__(self, *args, **kwargs):
super(CADDDumper,self).__init__(*args,**kwargs)
self.logger.info("""
Assuming manual download from: http://cadd.gs.washington.edu
of files (.tsv.gz and .tsv.gz.tbi) looking like:
- HumanExome-12v1-1_A_inclAn"no"
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import os.path
import time
from datetime import datetime
from ftplib import FTP
import biothings, config
biothings.config_for_app(config)
from biothings.utils.common import ask, timesofar, safewfile, setup_logfile
from biothings.utils.hipchat import hipchat_msg
from biothings.utils.mongo import get_src_dump
from config import DATA_ARCHIVE_ROOT, logger as logging
timestamp = time.strftime('%Y%m%d')
DATA_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, 'by_resources/exac', timestamp)
FTP_SERVER = 'ftp.broadinstitute.org'
DATAFILES_PATH = [
'pub/ExAC_release/current/functional_gene_constraint/fordist_cleaned_exac_nonTCGA_z_pli_rec_null_data.txt',
'pub/ExAC_release/current/functional_gene_constraint/fordist_cleaned_exac_r03_march16_z_pli_rec_null_data.txt',
'pub/ExAC_release/current/functional_gene_constraint/fordist_cleaned_nonpsych_z_pli_rec_null_data.txt'
]
import os
import os.path
import sys
import time
import biothings, config
biothings.config_for_app(config)
from config import DATA_ARCHIVE_ROOT
from biothings.hub.dataload.dumper import ManualDumper
class EMVDumper(ManualDumper):
SRC_NAME = "emv"
SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
def __init__(self, *args, **kwargs):
super(EMVDumper,self).__init__(*args,**kwargs)
self.logger.info("""
Assuming manual download from: http://geneticslab.emory.edu/emvclass/emvclass.php
- EmVClass.*.csv
""")
import os
import os.path
import sys
import time
import biothings, config
biothings.config_for_app(config)
from config import DATA_ARCHIVE_ROOT, logger as logging
from biothings.hub.dataload.dumper import FTPDumper
class HomologeneDumper(FTPDumper):
SRC_NAME = "homologene"
SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
FTP_HOST = 'ftp.ncbi.nih.gov'
CWD_DIR = '/pub/HomoloGene/current'
SCHEDULE = "0 6 * * *"
def get_newest_info(self):
rel = None