From a0011b1173dbbf0a259481985e3879576495a521 Mon Sep 17 00:00:00 2001 From: Jono Yang <jyang@nexb.com> Date: Tue, 24 May 2022 13:09:31 -0700 Subject: [PATCH] Normalize holders from package data #2972 * Update expected test results Signed-off-by: Jono Yang <jyang@nexb.com> --- src/summarycode/summarizer.py | 9 ++++++--- .../data/summary/single_file/single_file.expected.json | 2 +- .../use_holder_from_package_resource.expected.json | 9 +++++++-- .../tallies/copyright_tallies/tallies2.expected.json | 2 +- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/summarycode/summarizer.py b/src/summarycode/summarizer.py index 7428623cc27..2a45ad74dfa 100644 --- a/src/summarycode/summarizer.py +++ b/src/summarycode/summarizer.py @@ -213,13 +213,13 @@ def get_primary_language(programming_language_tallies): def get_origin_info_from_top_level_packages(top_level_packages, codebase): """ - Return a 3-tuple containing the strings of declared license expression, - copyright holder, and primary programming language from a + Return a 3-tuple containing the declared license expression string, a list + of copyright holder, and primary programming language string from a ``top_level_packages`` list of detected top-level packages mapping and a ``codebase``. """ if not top_level_packages: - return '', '', '' + return '', [], '' license_expressions = [] programming_languages = [] @@ -272,6 +272,9 @@ def get_origin_info_from_top_level_packages(top_level_packages, codebase): continue holders = [h['holder'] for h in key_file_resource.holders] declared_holders.extend(holders) + # Normalize holder names before collecting them + # This allows us to properly remove declared holders from `other_holders` later + declared_holders = [canonical_holder(h) for h in declared_holders] declared_holders = unique(declared_holders) # Programming language diff --git a/tests/summarycode/data/summary/single_file/single_file.expected.json b/tests/summarycode/data/summary/single_file/single_file.expected.json index d8404f98e06..2621e0b4e3f 100644 --- a/tests/summarycode/data/summary/single_file/single_file.expected.json +++ b/tests/summarycode/data/summary/single_file/single_file.expected.json @@ -12,7 +12,7 @@ "conflicting_license_categories": false, "ambiguous_compound_licensing": false }, - "declared_holder": "Mort Bay Consulting Pty. Ltd. (Australia) and others, Sun Microsystems, Inc.", + "declared_holder": "Mort Bay Consulting Pty. Ltd. (Australia) and others, Sun Microsystems", "primary_language": "", "other_license_expressions": [], "other_holders": [], diff --git a/tests/summarycode/data/summary/use_holder_from_package_resource/use_holder_from_package_resource.expected.json b/tests/summarycode/data/summary/use_holder_from_package_resource/use_holder_from_package_resource.expected.json index 892933e6a86..3f4b46cc57c 100644 --- a/tests/summarycode/data/summary/use_holder_from_package_resource/use_holder_from_package_resource.expected.json +++ b/tests/summarycode/data/summary/use_holder_from_package_resource/use_holder_from_package_resource.expected.json @@ -75,10 +75,15 @@ "conflicting_license_categories": false, "ambiguous_compound_licensing": false }, - "declared_holder": "Example Corporation, Google Inc., Fraunhofer FKIE", + "declared_holder": "Google, Fraunhofer FKIE", "primary_language": "Python", "other_license_expressions": [], - "other_holders": [], + "other_holders": [ + { + "value": "Example Corporation", + "count": 1 + } + ], "other_languages": [] }, "files": [ diff --git a/tests/summarycode/data/tallies/copyright_tallies/tallies2.expected.json b/tests/summarycode/data/tallies/copyright_tallies/tallies2.expected.json index adad73c3ffe..1d9bc4fdc9e 100644 --- a/tests/summarycode/data/tallies/copyright_tallies/tallies2.expected.json +++ b/tests/summarycode/data/tallies/copyright_tallies/tallies2.expected.json @@ -16,7 +16,7 @@ "count": 1 }, { - "value": "Sun Microsystems, Inc.", + "value": "Sun Microsystems", "count": 1 } ],