Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
]
labeler.update(split=0, lfs=[stg_temp_lfs_2, ce_v_max_lfs], parallelism=PARALLEL)
assert session.query(Label).count() == 6478
assert session.query(LabelKey).count() == 16
L_train = labeler.get_label_matrices(train_cands)
assert L_train[0].shape == (3493, 16)
gen_model = LabelModel()
gen_model.fit(L_train=L_train[0], n_epochs=500, log_freq=100)
train_marginals = gen_model.predict_proba(L_train[0])
diffs = train_marginals.max(axis=1) - train_marginals.min(axis=1)
train_idxs = np.where(diffs > 1e-6)[0]
train_dataloader = EmmentalDataLoader(
task_to_label_dict={ATTRIBUTE: "labels"},
dataset=FonduerDataset(
ATTRIBUTE,
train_cands[0],
F_train[0],
emb_layer.word2id,
train_marginals,
train_idxs,
),
split="train",
batch_size=100,
shuffle=True,
)
emmental.Meta.reset()
emmental.init(fonduer.Meta.log_path)
emmental.Meta.init()
# Set random seed seed
set_random_seed(2)
task1 = "task1"
x1 = np.random.rand(20, 2)
y1 = torch.from_numpy(np.random.rand(20))
task2 = "task2"
x2 = np.random.rand(30, 3)
y2 = torch.from_numpy(np.random.rand(30))
dataloaders = [
EmmentalDataLoader(
task_to_label_dict={task_name: "label"},
dataset=EmmentalDataset(
name=task_name, X_dict={"feature": x}, Y_dict={"label": y}
),
split="train",
batch_size=10,
shuffle=True,
)
for task_name, x, y in [(task1, x1, y1), (task2, x2, y2)]
]
scheduler = RoundRobinScheduler()
assert scheduler.get_num_batches(dataloaders) == 5
batch_task_names = [
name="synthetic", X_dict={"data": X_test}, Y_dict={"label1": Y1_test}
)
test_dataset2 = EmmentalDataset(
name="synthetic", X_dict={"data": X_test}, Y_dict={"label2": Y2_test}
)
task_to_label_dict = {"task1": "label1"}
train_dataloader1 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=train_dataset1,
split="train",
batch_size=10,
)
dev_dataloader1 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=dev_dataset1,
split="valid",
batch_size=10,
)
test_dataloader1 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=test_dataset1,
split="test",
batch_size=10,
)
task_to_label_dict = {"task2": "label2"}
train_dataloader2 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
torch.Tensor([1, 2, 3, 4, 5]),
torch.Tensor([1, 2, 3, 4]),
torch.Tensor([1, 2, 3]),
torch.Tensor([1, 2]),
torch.Tensor([1]),
]
y2 = torch.Tensor([1, 1, 1, 1, 1])
dataset = EmmentalDataset(
X_dict={"data1": x1, "data2": x2},
Y_dict={"label1": y1, "label2": y2},
name="new_data",
)
dataloader1 = EmmentalDataLoader(
task_to_label_dict={"task1": "label1"},
dataset=dataset,
split="train",
batch_size=2,
)
x_batch, y_batch = next(iter(dataloader1))
# Check if the dataloader is correctly constructed
assert dataloader1.task_to_label_dict == {"task1": "label1"}
assert dataloader1.split == "train"
assert torch.equal(x_batch["data1"], torch.Tensor([[1, 0], [1, 2]]))
assert torch.equal(
x_batch["data2"], torch.Tensor([[1, 2, 3, 4, 5], [1, 2, 3, 4, 0]])
)
assert torch.equal(y_batch["label1"], torch.Tensor([0, 0]))
task_to_label_dict = {"task1": "label1"}
train_dataloader1 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=train_dataset1,
split="train",
batch_size=10,
)
dev_dataloader1 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=dev_dataset1,
split="valid",
batch_size=10,
)
test_dataloader1 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=test_dataset1,
split="test",
batch_size=10,
)
task_to_label_dict = {"task2": "label2"}
train_dataloader2 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=train_dataset2,
split="train",
batch_size=10,
)
dev_dataloader2 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dev_dataset2 = EmmentalDataset(
name="synthetic", X_dict={"data": X_dev}, Y_dict={"label2": Y2_dev}
)
test_dataset1 = EmmentalDataset(
name="synthetic", X_dict={"data": X_test}, Y_dict={"label1": Y1_test}
)
test_dataset2 = EmmentalDataset(
name="synthetic", X_dict={"data": X_test}, Y_dict={"label2": Y2_test}
)
task_to_label_dict = {"task1": "label1"}
train_dataloader1 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=train_dataset1,
split="train",
batch_size=10,
)
dev_dataloader1 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=dev_dataset1,
split="valid",
batch_size=10,
)
test_dataloader1 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=test_dataset1,
split="test",
batch_size=10,
dev_dataloader1 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=dev_dataset1,
split="valid",
batch_size=10,
)
test_dataloader1 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=test_dataset1,
split="test",
batch_size=10,
)
task_to_label_dict = {"task2": "label2"}
train_dataloader2 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=train_dataset2,
split="train",
batch_size=10,
)
dev_dataloader2 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=dev_dataset2,
split="valid",
batch_size=10,
)
test_dataloader2 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=test_dataset2,
split="test",
batch_size=10,
shuffle=True,
)
tasks = create_task(
ATTRIBUTE, 2, F_train[0].shape[1], 2, emb_layer, model="LogisticRegression"
)
model = EmmentalModel(name=f"{ATTRIBUTE}_task")
for task in tasks:
model.add_task(task)
emmental_learner = EmmentalLearner()
emmental_learner.learn(model, [train_dataloader])
test_dataloader = EmmentalDataLoader(
task_to_label_dict={ATTRIBUTE: "labels"},
dataset=FonduerDataset(
ATTRIBUTE, test_cands[0], F_test[0], emb_layer.word2id, 2
),
split="test",
batch_size=100,
shuffle=False,
)
test_preds = model.predict(test_dataloader, return_preds=True)
positive = np.where(np.array(test_preds["probs"][ATTRIBUTE])[:, TRUE] > 0.6)
true_pred = [test_cands[0][_] for _ in positive[0]]
pickle_file = "tests/data/parts_by_doc_dict.pkl"
with open(pickle_file, "rb") as f:
parts_by_doc = pickle.load(f)
batch_size=2,
)
x_batch, y_batch = next(iter(dataloader1))
# Check if the dataloader is correctly constructed
assert dataloader1.task_to_label_dict == {"task1": "label1"}
assert dataloader1.split == "train"
assert torch.equal(x_batch["data1"], torch.Tensor([[1, 0], [1, 2]]))
assert torch.equal(
x_batch["data2"], torch.Tensor([[1, 2, 3, 4, 5], [1, 2, 3, 4, 0]])
)
assert torch.equal(y_batch["label1"], torch.Tensor([0, 0]))
assert torch.equal(y_batch["label2"], torch.Tensor([1, 1]))
dataloader2 = EmmentalDataLoader(
task_to_label_dict={"task2": "label2"},
dataset=dataset,
split="test",
batch_size=3,
)
x_batch, y_batch = next(iter(dataloader2))
# Check if the dataloader with differet batch size is correctly constructed
assert dataloader2.task_to_label_dict == {"task2": "label2"}
assert dataloader2.split == "test"
assert torch.equal(
x_batch["data1"], torch.Tensor([[1, 0, 0], [1, 2, 0], [1, 2, 3]])
)
assert torch.equal(
x_batch["data2"],
task_to_label_dict = {"task2": "label2"}
train_dataloader2 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=train_dataset2,
split="train",
batch_size=10,
)
dev_dataloader2 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=dev_dataset2,
split="valid",
batch_size=10,
)
test_dataloader2 = EmmentalDataLoader(
task_to_label_dict=task_to_label_dict,
dataset=test_dataset2,
split="test",
batch_size=10,
)
# Create task
def ce_loss(task_name, immediate_ouput_dict, Y, active):
module_name = f"{task_name}_pred_head"
return F.cross_entropy(
immediate_ouput_dict[module_name][0][active], (Y.view(-1))[active]
)
def output(task_name, immediate_ouput_dict):
module_name = f"{task_name}_pred_head"
return F.softmax(immediate_ouput_dict[module_name][0], dim=1)