ranking
ranking copied to clipboard
Custom lambda weights: no scores in ˛pair_weights`, implementing lamba weights based on custom ranking metric
I am trying to implement a custom ranking metric based lambda weights class.
The paper I am reading defines lambda weights as depending on scores,
$ \lambda_{ij} = ( {-\sigma}/{1+exp({s_i-s_j})} ) |\Delta_{NDCG}|$
where $s_i$ and $s_j$ are scores.
(equation (6) from paper https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf)
The the pair_weights
function in tfr.keras.losses.losses_impl._LambdaWeight
that I am subclassing only gets labels and ranks as inputs.
I am obviously missing something. Could you help me clarify this ? Example custom lambda weights class (should work with any ranking metric, here used with MAP@12):
# mapk : Source: https://github.com/benhamner/Metrics
mapk_fn = lambda actual, predicted : metrics.mapk(
actual=[
actual,
],
predicted=[
predicted,
],
k=12,
)
class RankingMetricLambdaWeight(tfr.keras.losses.losses_impl._LambdaWeight):
"""Ranking metric based lambda weight with cached values of the metric"""
def __init__(
self,
ranking_metric_fn,
n_ranks,
max_positive_samples,
min_positive_samples=0,
positive_fn=None,
**kwargs
):
"""Ranking-metric based lambda weights based on cached metric values.
Args:
ranking_metric_fn: function(actual,predicted) returning the metric value
actual, predicted: lists of ranks
n_ranks: number of ranks to cache, higher ranks are truncated to this value during evaluation
max_positive_samples: max number of positive samples to build cache for, higher numbers are truncated to this value and this value is truncated to n_ranks
min_positive_samples: min number of positive samples to build cache for
In case the metric does not depend on the number of positive samples, set max_positive_samples=0
"""
super().__init__(**kwargs)
max_positive_samples = min(max_positive_samples, n_ranks)
self._ranking_metric_fn = ranking_metric_fn
self._n_ranks = n_ranks
self._max_positive_samples = max_positive_samples
self._min_positive_samples = min_positive_samples
self._positive_fn = positive_fn or tfr.keras.losses.utils.is_greater_equal_1
self._count_positive_samples = self._max_positive_samples - self._min_positive_samples + 1
self._map_lambda_weights = np.zeros(
shape=(self._count_positive_samples, self._n_ranks, self._n_ranks), dtype=np.float32
)
for n_positive in range(self._min_positive_samples, self._max_positive_samples + 1):
actual_correct = np.arange(self._n_ranks)[::-1] + 1
actual_correct[n_positive:] = 0
v_correct = np.arange(self._n_ranks)[::-1] + 1
metric_ordered = ranking_metric_fn(
actual=actual_correct[0:n_positive],
predicted=v_correct,
)
for i in range(self._n_ranks):
for j in range(self._n_ranks):
v = np.copy(v_correct)
vv = v[i]
v[i] = v[j]
v[j] = vv
metric_swapped = ranking_metric_fn(
actual=actual_correct[0:n_positive],
predicted=v,
)
self._map_lambda_weights[n_positive-self._min_positive_samples, i, j] = np.abs( metric_swapped - metric_ordered )
self._map_lambda_weights = tf.convert_to_tensor(self._map_lambda_weights)
def get_config(self):
return {
"ranking_metric_fn": self._ranking_metric_fn,
"n_ranks" : self._n_ranks,
"max_positive_samples" : self._max_positive_samples,
"min_positive_samples" : self._min_positive_samples,
"positive_fn" : self._positive_fn,
}
def pair_weights(self, labels, ranks):
"""See `_LambdaWeight`.
Returns lambda weights associated with given metrics.
Lambda weights are fetched from pre-calculated table.
"""
def gather_lambda_weights(ranks, lambda_weights):
g1 = tf.gather(
params=lambda_weights,
indices=ranks - 1,
batch_dims=1,
axis=-2,
)
g2 = tf.gather(
params=g1,
indices=ranks - 1,
axis=-1,
batch_dims=1,
)
return g2
with tf.compat.v1.name_scope(name="map_lambda_weight"):
tfr.keras.losses.losses_impl._check_tensor_shapes([labels, ranks])
(
valid_pair,
labels,
) = tfr.keras.losses.losses_impl._get_valid_pairs_and_clean_labels(labels)
# dimenstions: 0=batch, 1,2: index in rank
binary_labels = tf.cast(self._positive_fn(labels), dtype=tf.float32)
n_positive = tf.math.minimum(
tf.math.count_nonzero(binary_labels, axis=1), self._max_positive_samples
)
# Before gather: clamp rank to max samples
ranks = tf.math.minimum(ranks, self._n_ranks)
# Get weights based on num positive per row
lambda_weights = tf.gather(
params=self._map_lambda_weights,
indices=n_positive-self._min_positive_samples,
axis=0,
)
# Get weights based on ranks, each row depends on n_positive in that row
lw = gather_lambda_weights(
ranks=ranks,
lambda_weights=lambda_weights,
)
# Get multipliers based on score values
# -sigma / (1+exp( sigma * (s_i - s_j) ) )
# ???
return lw * tf.cast(valid_pair, dtype=tf.float32)
# Usage example:
lambda_weight=RankingMetricLambdaWeight(ranking_metric_fn=mapk_fn,n_ranks=5,max_positive_samples=12, min_positive_samples=1 )
loss_obj_map12 = tfr.keras.losses.PairwiseHingeLoss(
reduction=tf.losses.Reduction.NONE,
lambda_weight=lambda_weight,
)