Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
num_foreground = foreground_idxs.sum()
gt_classes_target = torch.zeros_like(pred_class_logits)
gt_classes_target[foreground_idxs, gt_classes[foreground_idxs]] = 1
# logits loss
loss_cls = sigmoid_focal_loss_jit(
pred_class_logits[valid_idxs],
gt_classes_target[valid_idxs],
alpha=self.focal_loss_alpha,
gamma=self.focal_loss_gamma,
reduction="sum",
) / max(1, num_foreground)
# regression loss
loss_box_reg = smooth_l1_loss(
pred_anchor_deltas[foreground_idxs],
gt_anchors_deltas[foreground_idxs],
beta=self.smooth_l1_loss_beta,
reduction="sum",
) / max(1, num_foreground)
return {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg}
loss_cls = (
sigmoid_focal_loss_star_jit(
pred_logits[gt_valid_inds],
gt_classes_target[gt_valid_inds],
alpha=self.focal_loss_alpha,
gamma=self.focal_loss_gamma,
reduction="sum",
)
/ loss_normalizer
)
if num_fg == 0:
loss_box_reg = pred_deltas.sum() * 0
else:
loss_box_reg = (
smooth_l1_loss(pred_deltas[gt_fg_inds], gt_deltas, beta=0.0, reduction="sum")
/ loss_normalizer
)
losses = {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg}
# mask prediction
if self.mask_on:
loss_mask = 0
for lvl in range(self.num_levels):
cur_level_factor = 2 ** lvl if self.bipyramid_on else 1
for anc in range(self.num_anchors):
cur_gt_mask_inds = gt_mask_inds[lvl][anc]
if cur_gt_mask_inds is None:
loss_mask += pred_masks[lvl][anc][0, 0, 0, 0] * 0
else:
cur_mask_size = self.mask_sizes[anc] * cur_level_factor
# TODO maybe there are numerical issues when mask sizes are large
gt_anchor_deltas (Tensor): shape (N, box_dim), row i represents ground-truth
box2box transform targets (dx, dy, dw, dh) or (dx, dy, dw, dh, da) that map anchor i to
its matched ground-truth box.
pred_objectness_logits (Tensor): shape (N,), each element is a predicted objectness
logit.
pred_anchor_deltas (Tensor): shape (N, box_dim), each row is a predicted box2box
transform (dx, dy, dw, dh) or (dx, dy, dw, dh, da)
smooth_l1_beta (float): The transition point between L1 and L2 loss in
the smooth L1 loss function. When set to 0, the loss becomes L1. When
set to +inf, the loss becomes constant 0.
Returns:
objectness_loss, localization_loss, both unnormalized (summed over samples).
"""
pos_masks = gt_objectness_logits == 1
localization_loss = smooth_l1_loss(
pred_anchor_deltas[pos_masks], gt_anchor_deltas[pos_masks], smooth_l1_beta, reduction="sum"
)
valid_masks = gt_objectness_logits >= 0
objectness_loss = F.binary_cross_entropy_with_logits(
pred_objectness_logits[valid_masks],
gt_objectness_logits[valid_masks].to(torch.float32),
reduction="sum",
)
return objectness_loss, localization_loss
# and would produce a nan loss).
fg_inds = torch.nonzero((self.gt_classes >= 0) & (self.gt_classes < bg_class_ind)).squeeze(
1
)
if cls_agnostic_bbox_reg:
# pred_proposal_deltas only corresponds to foreground class for agnostic
gt_class_cols = torch.arange(box_dim, device=device)
else:
fg_gt_classes = self.gt_classes[fg_inds]
# pred_proposal_deltas for class k are located in columns [b * k : b * k + b],
# where b is the dimension of box representation (4 or 5)
# Note that compared to Detectron1,
# we do not perform bounding box regression for background classes.
gt_class_cols = box_dim * fg_gt_classes[:, None] + torch.arange(box_dim, device=device)
loss_box_reg = smooth_l1_loss(
self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
gt_proposal_deltas[fg_inds],
self.smooth_l1_beta,
reduction="sum",
)
# The loss is normalized using the total number of regions (R), not the number
# of foreground regions even though the box regression loss is only defined on
# foreground regions. Why? Because doing so gives equal training influence to
# each foreground example. To see how, consider two different minibatches:
# (1) Contains a single foreground region
# (2) Contains 100 foreground regions
# If we normalize by the number of foreground regions, the single example in
# minibatch (1) will be given 100 times as much influence as each foreground
# example in minibatch (2). Normalizing by the total number of regions, R,
# means that the single example in minibatch (1) and each of the 100 examples
# in minibatch (2) are given equal influence.