scancode-toolkit
scancode-toolkit copied to clipboard
lines with license texts grouped together wrongly and decreases score/coverage
The following text from the linux kernel at https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/sound/drivers/opl3/opl3_lib.c:
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) by Jaroslav Kysela <[email protected]>,
* Hannu Savolainen 1993-1996,
* Rob Hooft
*
* Routines for control of AdLib FM cards (OPL2/OPL3/OPL4 chips)
*
* Most if code is ported from OSS/Lite.
*/
#include <sound/opl3.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/ioport.h>
#include <sound/minors.h>
#include "opl3_voice.h"
MODULE_AUTHOR("Jaroslav Kysela <[email protected]>, Hannu Savolainen 1993-1996, Rob Hooft");
MODULE_DESCRIPTION("Routines for control of AdLib FM cards (OPL2/OPL3/OPL4 chips)");
MODULE_LICENSE("GPL");
is grouped together for a match, even though the MODULE_LICENSE("GPL"); at the end should be in a different query for license detection rather than grouped together with the notice above + the lines of code in between.
The detection:
"detected_license_expression": "gpl-2.0-plus",
"detected_license_expression_spdx": "GPL-2.0-or-later",
"license_detections": [
{
"license_expression": "gpl-2.0-plus",
"license_expression_spdx": "GPL-2.0-or-later",
"matches": [
{
"license_expression": "gpl-2.0-plus",
"spdx_license_expression": "GPL-2.0-or-later",
"from_file": "linux-4.19.64/sound/drivers/opl3/opl3_lib.c",
"start_line": 10,
"end_line": 38,
"matcher": "3-seq",
"score": 93.44,
"matched_length": 114,
"match_coverage": 93.44,
"rule_relevance": 100,
"rule_identifier": "gpl-2.0-plus_592.RULE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0-plus_592.RULE",
"matched_text": " * This program is free software; you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation; either version 2 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program; if not, write to the Free Software\n * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA\n *\n */\n\n#include <sound/opl3.h>\n#include <linux/io.h>\n#include <linux/delay.h>\n#include <linux/module.h>\n#include <linux/init.h>\n#include <linux/slab.h>\n#include <linux/ioport.h>\n#include <sound/minors.h>\n#include \"opl3_voice.h\"\n\nMODULE_AUTHOR(\"Jaroslav Kysela <[email protected]>, Hannu Savolainen 1993-1996, Rob Hooft\");\nMODULE_DESCRIPTION(\"Routines for control of AdLib FM cards (OPL2/OPL3/OPL4 chips)\");\nMODULE_LICENSE(\"GPL\");",
"matched_text_diagnostics": "This program is free software; you can redistribute it and/or modify\n * it under the terms of the GNU General Public License as published by\n * the Free Software Foundation; either version 2 of the License, or\n * (at your option) any later version.\n *\n * This program is distributed in the hope that it will be useful,\n * but WITHOUT ANY WARRANTY; without even the implied warranty of\n * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n * GNU General Public License for more details.\n *\n * You should have received a copy of the GNU General Public License\n * along with this program; if not, write to the Free Software\n * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA\n *\n */\n\n#[include] <[sound]/[opl3].[h]>\n#[include] <[linux]/[io].[h]>\n#[include] <[linux]/[delay].[h]>\n#[include] <[linux]/[module].[h]>\n#[include] <[linux]/[init].[h]>\n#[include] <[linux]/[slab].[h]>\n#[include] <[linux]/[ioport].[h]>\n#[include] <[sound]/[minors].[h]>\n#[include] \"[opl3]_[voice].[h]\"\n\n[MODULE]_[AUTHOR](\"[Jaroslav] [Kysela] <[perex]@[perex].[cz]>, [Hannu] [Savolainen] [1993]-[1996], [Rob] [Hooft]\");\n[MODULE]_[DESCRIPTION](\"[Routines] [for] [control] [of] [AdLib] [FM] [cards] ([OPL2]/[OPL3]/[OPL4] [chips])\");\n[MODULE]_[LICENSE](\"GPL\");"
}
],
"detection_log": ["imperfect-match-coverage"],
"identifier": "gpl_2_0_plus-d60ad0ff-b7c5-b235-fbf8-80e2f095ded3"
}
],
The grouping together, and the non-license code files make this detection seem like a detection issue, when this is really a chunking/query creation pre-processing issue.