ranking icon indicating copy to clipboard operation
ranking copied to clipboard

Custom lambda weights: no scores in ˛pair_weights`, implementing lamba weights based on custom ranking metric

Open matbb opened this issue 2 years ago • 0 comments

I am trying to implement a custom ranking metric based lambda weights class. The paper I am reading defines lambda weights as depending on scores, $ \lambda_{ij} = ( {-\sigma}/{1+exp({s_i-s_j})} ) |\Delta_{NDCG}|$ where $s_i$ and $s_j$ are scores. (equation (6) from paper https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf) The the pair_weights function in tfr.keras.losses.losses_impl._LambdaWeight that I am subclassing only gets labels and ranks as inputs.

I am obviously missing something. Could you help me clarify this ? Example custom lambda weights class (should work with any ranking metric, here used with MAP@12):

# mapk : Source: https://github.com/benhamner/Metrics
mapk_fn = lambda actual, predicted : metrics.mapk(
    actual=[
        actual,
    ],
    predicted=[
        predicted,
    ],
    k=12,
)

class RankingMetricLambdaWeight(tfr.keras.losses.losses_impl._LambdaWeight):
    """Ranking metric based lambda weight with cached values of the metric"""

    def __init__(
        self,
        ranking_metric_fn,
        n_ranks,
        max_positive_samples,
        min_positive_samples=0,
        positive_fn=None,
        **kwargs
    ):
        """Ranking-metric based lambda weights based on cached metric values.
        Args:
            ranking_metric_fn: function(actual,predicted) returning the metric value
                actual, predicted: lists of ranks
            n_ranks: number of ranks to cache, higher ranks are truncated to this value during evaluation
            max_positive_samples: max number of positive samples to build cache for, higher numbers are truncated to this value and this value is truncated to n_ranks
            min_positive_samples: min number of positive samples to build cache for
                In case the metric does not depend on the number of positive samples, set max_positive_samples=0
        """
        super().__init__(**kwargs)

        max_positive_samples = min(max_positive_samples, n_ranks)

        self._ranking_metric_fn = ranking_metric_fn
        self._n_ranks = n_ranks
        self._max_positive_samples = max_positive_samples
        self._min_positive_samples = min_positive_samples
        self._positive_fn = positive_fn or tfr.keras.losses.utils.is_greater_equal_1

        self._count_positive_samples = self._max_positive_samples - self._min_positive_samples + 1

        self._map_lambda_weights = np.zeros(
            shape=(self._count_positive_samples, self._n_ranks, self._n_ranks), dtype=np.float32
        )


        for n_positive in range(self._min_positive_samples, self._max_positive_samples + 1):

            actual_correct = np.arange(self._n_ranks)[::-1] + 1
            actual_correct[n_positive:] = 0
            v_correct = np.arange(self._n_ranks)[::-1] + 1

            metric_ordered = ranking_metric_fn(
                actual=actual_correct[0:n_positive],
                predicted=v_correct,
            )
            for i in range(self._n_ranks):
                for j in range(self._n_ranks):
                    v = np.copy(v_correct)
                    vv = v[i]
                    v[i] = v[j]
                    v[j] = vv
                    metric_swapped = ranking_metric_fn(
                        actual=actual_correct[0:n_positive],
                        predicted=v,
                    )
                    self._map_lambda_weights[n_positive-self._min_positive_samples, i, j] = np.abs( metric_swapped - metric_ordered )

        self._map_lambda_weights = tf.convert_to_tensor(self._map_lambda_weights)

    def get_config(self):
        return {
            "ranking_metric_fn": self._ranking_metric_fn,
            "n_ranks" : self._n_ranks,
            "max_positive_samples" : self._max_positive_samples,
            "min_positive_samples" : self._min_positive_samples,
            "positive_fn" : self._positive_fn,
        }

    def pair_weights(self, labels, ranks):
        """See `_LambdaWeight`.
        Returns lambda weights associated with given metrics.
        Lambda weights are fetched from pre-calculated table.
        """

        def gather_lambda_weights(ranks, lambda_weights):
            g1 = tf.gather(
                params=lambda_weights,
                indices=ranks - 1,
                batch_dims=1,
                axis=-2,
            )
            g2 = tf.gather(
                params=g1,
                indices=ranks - 1,
                axis=-1,
                batch_dims=1,
            )
            return g2

        with tf.compat.v1.name_scope(name="map_lambda_weight"):
            tfr.keras.losses.losses_impl._check_tensor_shapes([labels, ranks])
            (
                valid_pair,
                labels,
            ) = tfr.keras.losses.losses_impl._get_valid_pairs_and_clean_labels(labels)
            # dimenstions: 0=batch, 1,2: index in rank
            binary_labels = tf.cast(self._positive_fn(labels), dtype=tf.float32)
            n_positive = tf.math.minimum(
                tf.math.count_nonzero(binary_labels, axis=1), self._max_positive_samples
            )

            # Before gather: clamp rank to max samples
            ranks = tf.math.minimum(ranks, self._n_ranks)
            # Get weights based on num positive per row
            lambda_weights = tf.gather(
                params=self._map_lambda_weights,
                indices=n_positive-self._min_positive_samples,
                axis=0,
            )
            # Get weights based on ranks, each row depends on n_positive in that row
            lw = gather_lambda_weights(
                ranks=ranks,
                lambda_weights=lambda_weights,
            )
            # Get multipliers based on score values
            # -sigma / (1+exp( sigma * (s_i - s_j) ) )
            # ???

            return lw * tf.cast(valid_pair, dtype=tf.float32)


# Usage example:

lambda_weight=RankingMetricLambdaWeight(ranking_metric_fn=mapk_fn,n_ranks=5,max_positive_samples=12, min_positive_samples=1 )
loss_obj_map12 = tfr.keras.losses.PairwiseHingeLoss(
    reduction=tf.losses.Reduction.NONE,
    lambda_weight=lambda_weight,
)

matbb avatar Jun 01 '22 14:06 matbb