Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_bytes():
"""
Should properly log bytes
"""
logzero.reset_default_logger()
temp = tempfile.NamedTemporaryFile()
try:
logger = logzero.setup_logger(logfile=temp.name)
testbytes = os.urandom(20)
logger.debug(testbytes)
logger.debug(None)
# with open(temp.name) as f:
# content = f.read()
# # assert str(testbytes) in content
finally:
temp.close()
time.sleep(3)
# get the loggers ready
self.train_log_filename = self.save_dir.joinpath("train.log")
self.validation_log_filename = self.save_dir.joinpath("validation.log")
self.test_log_filename = self.save_dir.joinpath("test.log")
self.train_logger = logzero.setup_logger(
name="train-logger", logfile=self.train_log_filename, level=logging.INFO
)
self.validation_logger = logzero.setup_logger(
name="valid-logger",
logfile=self.validation_log_filename,
level=logging.INFO,
)
self.test_logger = logzero.setup_logger(
name="test-logger", logfile=self.test_log_filename, level=logging.INFO
)
if self.lr_scheduler_is_plateau:
if self.best_track_value == "loss" and self.lr_scheduler.mode == "max":
self.msg_printer.warn(
"You are optimizing loss and lr schedule mode is max instead of min"
)
if (
self.best_track_value == "macro_fscore"
or self.best_track_value == "fscore"
and self.lr_scheduler.mode == "min"
):
self.msg_printer.warn(
f"You are optimizing for macro_fscore and lr scheduler mode is min instead of max"
)
import logzero
import logging
_LOG_LEVELS_STR = ['INFO', 'WARNING', 'ERROR', 'DEBUG']
def log_leveller(log_level_str):
loggin_levels = [logging.INFO, logging.WARNING, logging.ERROR, logging.DEBUG]
log_level_str_index = _LOG_LEVELS_STR.index(log_level_str)
loggin_level = loggin_levels[log_level_str_index]
return loggin_level
log_format = ("%(color)s%(levelname)s:%(end_color)s %(message)s")
formatter = logzero.LogFormatter(fmt=log_format)
# create a default logger
log = logzero.setup_logger(formatter=formatter)
def get_logger(filename):
return setup_logger(name=filename, formatter=CustomFormatter())
self.train_data_url = DATA_FILE_URLS["SECT_LABEL_TRAIN_FILE"]
self.dev_data_url = DATA_FILE_URLS["SECT_LABEL_DEV_FILE"]
self.test_data_url = DATA_FILE_URLS["SECT_LABEL_TEST_FILE"]
self.msg_printer = wasabi.Printer()
self._download_if_required()
self.data_manager = self._get_data()
self.hparams = self._get_hparams()
self.model = self._get_model()
self.infer = self._get_infer_client()
self.cli_interact = SciWINGInteract(self.infer)
self.log_file = log_file
if log_file:
self.logger = setup_logger(
"sectlabel_logger", logfile=self.log_file, level=logging.INFO
)
else:
self.logger = self.msg_printer
# try logzero first, so user gets nice colored logs
import logzero # type: ignore
# TODO meh, default formatter shorthands logging levels making it harder to search errors..
except ModuleNotFoundError:
import warnings
warnings.warn("You might want to install 'logzero' for nice colored logs")
# ugh. why does it have to be so verbose?
logger.setLevel(lvl)
ch = logging.StreamHandler()
ch.setLevel(lvl)
FMT = '[%(levelname)s %(name)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
ch.setFormatter(logging.Formatter(FMT))
logger.addHandler(ch)
else:
logzero.setup_logger(logger.name, level=lvl)
# try logzero first, so user gets nice colored logs
import logzero # type: ignore
# TODO meh, default formatter shorthands logging levels making it harder to search errors..
except ModuleNotFoundError:
import warnings
warnings.warn("You might want to install 'logzero' for nice colored logs")
# ugh. why does it have to be so verbose?
logger.setLevel(lvl)
ch = logging.StreamHandler()
ch.setLevel(lvl)
FMT = '[%(levelname)s %(name)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
ch.setFormatter(logging.Formatter(FMT))
logger.addHandler(ch)
else:
logzero.setup_logger(logger.name, level=lvl)
# try logzero first, so user gets nice colored logs
import logzero # type: ignore
# TODO meh, default formatter shorthands logging levels making it harder to search errors..
except ModuleNotFoundError:
import warnings
warnings.warn("You might want to install 'logzero' for nice colored logs")
# ugh. why does it have to be so verbose?
logger.setLevel(lvl)
ch = logging.StreamHandler()
ch.setLevel(lvl)
FMT = '[%(levelname)s %(name)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
ch.setFormatter(logging.Formatter(FMT))
logger.addHandler(ch)
else:
logzero.setup_logger(logger.name, level=lvl)
there are zero hyperparameter tuning.
"""
import os
import re
import logging
from joblib import cpu_count
from string import punctuation
from logzero import setup_logger
from nltk.corpus import stopwords
from gensim.models import Phrases
from gensim.models import Word2Vec
from gensim.models.phrases import Phraser
from gensim.models.word2vec import LineSentence
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
logger = setup_logger(name = __name__, logfile = 'word2vec.log', level = logging.INFO)
def main():
# -------------------------------------------------------------------------------
# Parameters
# the script will most likely work if we swap the TEXTS variable
# with any iterable of text (where one element represents a document,
# and the whole iterable is the corpus)
newsgroups_train = fetch_20newsgroups(subset = 'train')
TEXTS = newsgroups_train.data
# a set of stopwords built-in to various packages
# we can always expand this set for the
# problem that we are working on, here we also included
# python built-in string punctuation mark
import re
import argparse
from pathlib import Path
import logzero
from logzero import logger
from gensim.models.word2vec import LineSentence, Word2Vec
logger_word2vec = logzero.setup_logger(name='gensim.models.word2vec')
logger_base_any2vec = logzero.setup_logger(name='gensim.models.base_any2vec')
regex_entity = re.compile(r'##[^#]+?##')
def main(args):
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
word_vectors_file = output_dir / 'word_vectors.txt'
entity_vectors_file = output_dir / 'entity_vectors.txt'
all_vectors_file = output_dir / 'all_vectors.txt'
logger.info('training the model')
model = Word2Vec(sentences=LineSentence(args.corpus_file),
size=args.embed_size,
window=args.window_size,