sumeval
sumeval copied to clipboard
ROUGE-L for summary-level
Hi. I have a feature request for ROUGE-L calculation with multiple sentences. There are two manners for ROUGE-L calculation: (1) sentence-level and (2) summary-level. See also: https://github.com/google-research/google-research/tree/master/rouge#two-flavors-of-rouge-l
In pythonrouge
, if we use a new line for each sentence, we can calculate summary-level ROUGE-L.
If we use the concatenated sentence, the value will be sentence-level ROUGE-L.
In Google's rouge implemention, they support two types of ROUGE-L (rougeL
and rougeLsum
).
From my experiment, sumeval supports only sentence-level ROUGE-L. Is it correct? And do you have a plan to implement such an option?
Here is the test code to compare the results:
import numpy as np
import pytest
from pythonrouge.pythonrouge import Pythonrouge
from rouge_score.rouge_scorer import RougeScorer
from sumeval.metrics.rouge import RougeCalculator
SUMMARY = [
(
"the unusual format has been captured in a series of photographs by visual "
"journalist anna erickson ."
),
(
"meet seattle 's rolling neighborhood of rvs , where each unassuming vehicle "
"is a capsule home ."
),
(
"meet bud dodson , 57 , and welcome to his home : an rv in seattle 's sodo "
"where he watches over the parking lot in exchange for a spot"
),
]
ABSTRACT = [
(
"around 30 people live a floating life in seattle 's sodo "
"( south of downtown ) area in their rvs"
),
(
"there is one parking lot in particular where the owner lets them act as "
"watchmen in exchange for a spot to live"
),
(
"visual journalist anna erickson , who photographed the community , said "
"they are just grateful to have a home"
),
]
@pytest.mark.parametrize("stemming", [True, False])
def test_rouge_mutilple_sentences(stemming):
# In pythonrouge, sentences are represented as the list of sentences.
# If use list of sentences, each sentence has a new line.
summ = SUMMARY
abst = ABSTRACT
pythonrouge = Pythonrouge(
summary_file_exist=False,
summary=[summ],
reference=[[abst]],
n_gram=2,
ROUGE_SU4=False,
ROUGE_L=True,
recall_only=False,
stemming=stemming,
stopwords=False,
length_limit=False,
)
res = pythonrouge.calc_score()
pythonrouge_rouge_1 = res["ROUGE-1-F"]
pythonrouge_rouge_2 = res["ROUGE-2-F"]
pythonrouge_rouge_l = res["ROUGE-L-F"]
print(f"pythonrouge_rouge_1={pythonrouge_rouge_1}")
print(f"pythonrouge_rouge_2={pythonrouge_rouge_2}")
print(f"pythonrouge_rouge_l={pythonrouge_rouge_l}")
# In rouge_score, sentences are represented as the concatenated sentence with "\n"
# In sumeval, it does not care about how to concat the sentences (?)
abst = "\n".join(ABSTRACT)
summ = "\n".join(SUMMARY)
rouge_calculator = RougeCalculator(stopwords=False, stemming=stemming, lang="en")
rouge_calculator_rouge_1 = rouge_calculator.rouge_n(summ, abst, n=1)
rouge_calculator_rouge_2 = rouge_calculator.rouge_n(summ, abst, n=2)
rouge_calculator_rouge_l = rouge_calculator.rouge_l(summ, abst)
print(f"rouge_calculator_rouge_1={rouge_calculator_rouge_1}")
print(f"rouge_calculator_rouge_2={rouge_calculator_rouge_2}")
print(f"rouge_calculator_rouge_l={rouge_calculator_rouge_l}")
rouge_scorer = RougeScorer(
["rouge1", "rouge2", "rougeL", "rougeLsum"], use_stemmer=stemming
)
res = rouge_scorer.score(abst, summ)
rouge_scorer_rouge_1 = res["rouge1"].fmeasure
rouge_scorer_rouge_2 = res["rouge2"].fmeasure
rouge_scorer_rouge_l = res["rougeL"].fmeasure
rouge_scorer_rouge_lsum = res["rougeLsum"].fmeasure
print(f"rouge_scorer_rouge_1={rouge_scorer_rouge_1}")
print(f"rouge_scorer_rouge_2={rouge_scorer_rouge_2}")
print(f"rouge_scorer_rouge_l={rouge_scorer_rouge_l}")
print(f"rouge_scorer_rouge_lsum={rouge_scorer_rouge_lsum}")
try:
np.testing.assert_array_almost_equal(
np.array(
[
rouge_scorer_rouge_1,
rouge_scorer_rouge_2,
rouge_scorer_rouge_l,
]
),
np.array(
[
rouge_calculator_rouge_1,
rouge_calculator_rouge_2,
rouge_calculator_rouge_l,
]
),
decimal=5,
)
except AssertionError as e:
if stemming:
# If we use stemming in sumeval, the value will be different
# https://github.com/chakki-works/sumeval/issues/20
pass
else:
raise e
np.testing.assert_array_almost_equal(
np.array(
[
pythonrouge_rouge_1,
pythonrouge_rouge_2,
pythonrouge_rouge_l,
]
),
np.array(
[
rouge_scorer_rouge_1,
rouge_scorer_rouge_2,
rouge_scorer_rouge_lsum,
]
),
decimal=5,
)