How to use the invenio.bibtask.write_message function in invenio

To help you get started, we’ve selected a few invenio examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github inspirehep / inspire / bibtasklets / bst_webcoll_postprocess.py View on Github external
def bst_webcoll_postprocess(recids=[]):
    """Parse recids to POST to remote server to alert that records are visible."""
    if isinstance(recids, str):
        recids = recids.split(",")
    cache = get_redis()
    cached_ids = cache.get("webcoll_pending_recids") or []
    if cached_ids and not cached_ids == "[]":
        if isinstance(cached_ids, str):
            cached_ids = eval(cached_ids)
        recids += cached_ids

    if not CFG_WEBCOLL_POST_REQUEST_URL:
        write_message("CFG_WEBCOLL_POST_REQUEST_URL is not set.")
        return

    if recids and len(recids) > 0 and not recids == "[]":
        write_message("Going to POST callback to {0}: {1} (total: {2})".format(
            CFG_WEBCOLL_POST_REQUEST_URL,
            recids[:10],
            len(recids))
        )
        cache.set("webcoll_pending_recids", recids)
        session = requests.Session()
        try:
            addapter = requests.adapters.HTTPAdapter(max_retries=3)
            session.mount(CFG_WEBCOLL_POST_REQUEST_URL, addapter)
            response = session.post(CFG_WEBCOLL_POST_REQUEST_URL,
                                    data={'recids': recids})
        except Exception as err:
github inspirehep / inspire / bibtasklets / bst_consyn_harvest.py View on Github external
try:
                submit_records_via_ftp(filepath)
                filename = filepath.split('/')[-1]
                body.append("\t%s (%s records)" % (filename, batch_size))
            except:
                _errors_detected.append(Exception(
                    "Failed to upload %s to FTP server" % filepath)
                )
                write_message("Failed to upload %s to FTP server" % filepath)
    else:
        body += ['\tFiles ready for upload:']
        for filename in files_to_upload:
            body.append("\t%s (%s records)" % (filename, batch_size))
    if files_to_upload:
        body = '\n'.join(body)
        write_message(subject)
        write_message(body)
        if submit:
            if submit_records_via_mail(subject, body, CFG_CONSYNHARVEST_EMAIL):
                write_message("Mail sent to %r" % (CFG_CONSYNHARVEST_EMAIL,))
            else:
                write_message("ERROR: Cannot send mail.")
    else:
        write_message("No new files!")
github inveniosoftware / invenio / modules / bibrank / lib / bibrank_tag_based_indexer.py View on Github external
"""
    if not dates:
        dates = (get_lastupdated(rank_method_code), '')
    if dates[0] is None:
        dates = ("0000-00-00 00:00:00", '')
    query = """SELECT b.id FROM bibrec AS b WHERE b.modification_date >= %s"""
    if dates[1]:
        query += " and b.modification_date <= %s"
    query += " ORDER BY b.id ASC"""
    if dates[1]:
        res = run_sql(query, (dates[0], dates[1]))
    else:
        res = run_sql(query, (dates[0], ))
    alist = create_range_list([row[0] for row in res])
    if not alist:
        write_message("No new records added since last time method was run")
    return alist
github inveniosoftware / invenio / modules / docextract / lib / refextract_task.py View on Github external
if _arxiv:
        overwrite = True
    else:
        overwrite = not task_get_option('no-overwrite')

    try:
        record = extract_references_from_record(recid)
        msg = "Extracted references for %s" % recid
        safe_to_extract = True
        if overwrite:
            write_message("%s (overwrite)" % msg)
        else:
            write_message(msg)
            if not check_record_for_refextract(recid):
                write_message('Record not safe for re-extraction, skipping')
                safe_to_extract = False

        if safe_to_extract:
            records.append(record)
            # Create a RT ticket if necessary
            if task_get_option('new') or task_get_option('create-ticket'):
                create_ticket(recid, bibcatalog_system)
    except FullTextNotAvailable:
        write_message("No full text available for %s" % recid)
github inveniosoftware / invenio / modules / bibsort / lib / bibsort_engine.py View on Github external
def _get_values_from_marc_tag(tag, recids):
    '''Finds the value for a specific tag'''
    digits = tag[0:2]
    try:
        intdigits = int(digits)
        if intdigits < 0 or intdigits > 99:
            raise ValueError
    except ValueError:
        # invalid tag value asked for
        write_message('You have asked for an invalid tag value ' \
                      '[tag=%s; value=%s].' %(tag, intdigits), verbose=5)
        return []
    bx = "bib%sx" % digits
    bibx = "bibrec_bib%sx" % digits
    max_recid = get_max_recid()

    if len(recids) == 1:
        to_append = '= %s'
        query_params = [recids.tolist()[0]]

    elif len(recids) < max_recid/3:
        # if we have less then one third of the records
        # use IN
        #This realy depends on how large the repository is..
        to_append = 'IN %s'
        query_params = [tuple(recids)]
github inspirehep / inspire / bibtasklets / bst_align_hepnames_and_bais.py View on Github external
tag = 'BAI'
        elif tag == 'extid:INSPIREID':
            tag = 'INSPIRE'
        elif tag == 'extid:ORCID':
            tag = 'ORCID'
        elif tag == 'extid:KAKEN':
            tag = 'KAKEN'
        elif tag == 'uid':
            tag = 'UID'
        else:
            continue
        data = data.strip()
        if personid not in ret:
            ret[personid] = {'personid': personid}
        if tag in ret[personid]:
            write_message("ERROR: http://old.inspirehep.net/author/profile/{personid} has invalid IDs".format(personid=personid), stream=sys.stderr)
            continue
        ret[personid][tag] = data.upper()
        if tag == 'BAI':
            ret[personid]['ORIGINAL_BAI'] = data
    return ret.values()
github inspirehep / inspire / bibtasklets / bst_consyn_harvest.py View on Github external
_errors_detected.append(e)
                error_trace = traceback.format_exc()
                # Some error happened, lets gracefully quit
                results[full_xml_filepath] = (StatusCodes.CONVERSION_ERROR,
                                              error_trace)
                write_message('Error converting:'
                              ' \n {0}'.format(error_trace))
                continue
            with open(new_full_xml_filepath, "w") as marcfile:
                marcfile.write(converted_xml)
            results[full_xml_filepath] = (StatusCodes.OK,
                                          new_full_xml_filepath)
        else:
            results[full_xml_filepath] = (StatusCodes.DOCTYPE_WRONG,
                                          doctype)
            write_message("Doctype not interesting: {0}".format(doctype))
    return results
github inveniosoftware / invenio / modules / bibupload / lib / bibupload.py View on Github external
write_message("('%s', '%s', '%s') not inserted because: '%s'." % (doctype, newname, urls, e), stream=sys.stderr)
                    raise StandardError
                for (url, format, description, comment, flags, timestamp) in urls:
                    assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, timestamp, pretend=pretend))
            elif mode == 'replace_or_insert': # to be thought as correct_or_insert
                for bibdoc in bibrecdocs.list_bibdocs():
                    if bibdoc.get_docname() == docname:
                        if doctype not in ('PURGE', 'DELETE', 'EXPUNGE', 'REVERT', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE'):
                            if newname != docname:
                                try:
                                    if not pretend:
                                        bibdoc.change_name(newname)
                                        ## Let's refresh the list of bibdocs.
                                        bibrecdocs.build_bibdoc_list()
                                except StandardError, e:
                                    write_message(e, stream=sys.stderr)
                                    raise
                found_bibdoc = False
                for bibdoc in bibrecdocs.list_bibdocs():
                    if bibdoc.get_docname() == newname:
                        found_bibdoc = True
                        if doctype == 'PURGE':
                            if not pretend:
                                bibdoc.purge()
                        elif doctype == 'DELETE':
                            if not pretend:
                                bibdoc.delete()
                        elif doctype == 'EXPUNGE':
                            if not pretend:
                                bibdoc.expunge()
                        elif doctype == 'FIX-ALL':
                            if not pretend:
github inveniosoftware / invenio / modules / bibrank / lib / bibrank_word_indexer.py View on Github external
term_docs = deserialize_via_marshal(hitlist)
                if term_docs.has_key("Gi"):
                    Gi[t] = term_docs["Gi"][1]
                elif len(term_docs) == 1:
                    Gi[t] = 1
                else:
                    Fi = 0
                    Gi[t] = 1
                    for (j, tf) in term_docs.iteritems():
                        Fi += tf[0]
                    for (j, tf) in term_docs.iteritems():
                        if tf[0] != Fi:
                            Gi[t] = Gi[t] + ((float(tf[0]) / Fi) * math.log(float(tf[0]) / Fi) / math.log(2)) / math.log(N)
            write_message("Phase 3: ......processed %s/%s terms" % ((i+5000>len(terms) and len(terms) or (i+5000)), len(terms)))
            i += 5000
        write_message("Phase 3: Finished getting approximate importance of all affected terms")

    write_message("Phase 4: Calculating normalization value for all affected records and updating %sR" % table[:-1])
    records = Nj.keys()
    i = 0
    while i < len(records):
        #Calculating the normalization value for each document, and adding the Gi value to each term in each document.
        docs_terms = get_from_reverse_index(records, i, (i + 5000), table)
        for (j, termlist) in docs_terms:
            doc_terms = deserialize_via_marshal(termlist)
            try:
                for (t, tf) in doc_terms.iteritems():
                    if Gi.has_key(t):
                        Nj[j] = Nj.get(j, 0) + math.pow(Gi[t] * (1 + math.log(tf[0])), 2)
                        Git = int(math.floor(Gi[t]*100))
                        if Git >= 0:
                            Git += 1
github inveniosoftware / invenio / modules / oaiharvest / lib / oai_harvest_daemon.py View on Github external
twodates = None
                        return twodates
                else:
                    write_message("Dates have invalid format, not "
                        "'yyyy-mm-dd:yyyy-mm-dd'")
                    twodates = None
                    return twodates
            ## final check.. date1 must me smaller than date2
            date1 = str(twodates[0]) + " 01:00:00"
            date2 = str(twodates[1]) + " 01:00:00"
            if compare_timestamps_with_tolerance(date1, date2) != -1:
                write_message("First date must be before second date.")
                twodates = None
                return twodates
        else:
            write_message("Dates have invalid format, not "
                "'yyyy-mm-dd:yyyy-mm-dd'")
            twodates = None
    else:
        twodates = None
    return twodates