From a0011b1173dbbf0a259481985e3879576495a521 Mon Sep 17 00:00:00 2001
From: Jono Yang <jyang@nexb.com>
Date: Tue, 24 May 2022 13:09:31 -0700
Subject: [PATCH] Normalize holders from package data #2972

    * Update expected test results

Signed-off-by: Jono Yang <jyang@nexb.com>
---
 src/summarycode/summarizer.py                            | 9 ++++++---
 .../data/summary/single_file/single_file.expected.json   | 2 +-
 .../use_holder_from_package_resource.expected.json       | 9 +++++++--
 .../tallies/copyright_tallies/tallies2.expected.json     | 2 +-
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/summarycode/summarizer.py b/src/summarycode/summarizer.py
index 7428623cc27..2a45ad74dfa 100644
--- a/src/summarycode/summarizer.py
+++ b/src/summarycode/summarizer.py
@@ -213,13 +213,13 @@ def get_primary_language(programming_language_tallies):
 
 def get_origin_info_from_top_level_packages(top_level_packages, codebase):
     """
-    Return a 3-tuple containing the strings of declared license expression,
-    copyright holder, and primary programming language from a
+    Return a 3-tuple containing the declared license expression string, a list
+    of copyright holder, and primary programming language string from a
     ``top_level_packages`` list of detected top-level packages mapping and a
     ``codebase``.
     """
     if not top_level_packages:
-        return '', '', ''
+        return '', [], ''
 
     license_expressions = []
     programming_languages = []
@@ -272,6 +272,9 @@ def get_origin_info_from_top_level_packages(top_level_packages, codebase):
                     continue
                 holders = [h['holder'] for h in key_file_resource.holders]
                 declared_holders.extend(holders)
+    # Normalize holder names before collecting them
+    # This allows us to properly remove declared holders from `other_holders` later
+    declared_holders = [canonical_holder(h) for h in declared_holders]
     declared_holders = unique(declared_holders)
 
     # Programming language
diff --git a/tests/summarycode/data/summary/single_file/single_file.expected.json b/tests/summarycode/data/summary/single_file/single_file.expected.json
index d8404f98e06..2621e0b4e3f 100644
--- a/tests/summarycode/data/summary/single_file/single_file.expected.json
+++ b/tests/summarycode/data/summary/single_file/single_file.expected.json
@@ -12,7 +12,7 @@
       "conflicting_license_categories": false,
       "ambiguous_compound_licensing": false
     },
-    "declared_holder": "Mort Bay Consulting Pty. Ltd. (Australia) and others, Sun Microsystems, Inc.",
+    "declared_holder": "Mort Bay Consulting Pty. Ltd. (Australia) and others, Sun Microsystems",
     "primary_language": "",
     "other_license_expressions": [],
     "other_holders": [],
diff --git a/tests/summarycode/data/summary/use_holder_from_package_resource/use_holder_from_package_resource.expected.json b/tests/summarycode/data/summary/use_holder_from_package_resource/use_holder_from_package_resource.expected.json
index 892933e6a86..3f4b46cc57c 100644
--- a/tests/summarycode/data/summary/use_holder_from_package_resource/use_holder_from_package_resource.expected.json
+++ b/tests/summarycode/data/summary/use_holder_from_package_resource/use_holder_from_package_resource.expected.json
@@ -75,10 +75,15 @@
       "conflicting_license_categories": false,
       "ambiguous_compound_licensing": false
     },
-    "declared_holder": "Example Corporation, Google Inc., Fraunhofer FKIE",
+    "declared_holder": "Google, Fraunhofer FKIE",
     "primary_language": "Python",
     "other_license_expressions": [],
-    "other_holders": [],
+    "other_holders": [
+      {
+        "value": "Example Corporation",
+        "count": 1
+      }
+    ],
     "other_languages": []
   },
   "files": [
diff --git a/tests/summarycode/data/tallies/copyright_tallies/tallies2.expected.json b/tests/summarycode/data/tallies/copyright_tallies/tallies2.expected.json
index adad73c3ffe..1d9bc4fdc9e 100644
--- a/tests/summarycode/data/tallies/copyright_tallies/tallies2.expected.json
+++ b/tests/summarycode/data/tallies/copyright_tallies/tallies2.expected.json
@@ -16,7 +16,7 @@
         "count": 1
       },
       {
-        "value": "Sun Microsystems, Inc.",
+        "value": "Sun Microsystems",
         "count": 1
       }
     ],