Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consider only copyrights in summry #2972 #2974

Merged
merged 6 commits into from
Jun 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
74 changes: 37 additions & 37 deletions src/summarycode/copyright_tallies.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,49 +485,50 @@ def filter_junk(texts):


COMMON_NAMES = {
'3dfxinteractiveinc.': '3dfx Interactive, Inc.',
'3dfxinteractiveinc.': '3dfx Interactive',

'cern': 'CERN - European Organization for Nuclear Research',

'ciscosystemsinc': 'Cisco Systems, Inc.',
'ciscosystems': 'Cisco Systems, Inc.',
'cisco': 'Cisco Systems, Inc.',
'ciscosystemsinc': 'Cisco Systems',
'ciscosystems': 'Cisco Systems',
'cisco': 'Cisco Systems',

'daisy': 'Daisy Ltd.',
'daisy': 'Daisy',
'daisyltd': 'Daisy',

'fsf': 'Free Software Foundation, Inc.',
'freesoftwarefoundation': 'Free Software Foundation, Inc.',
'freesoftwarefoundationinc': 'Free Software Foundation, Inc.',
'thefreesoftwarefoundation': 'Free Software Foundation, Inc.',
'thefreesoftwarefoundationinc': 'Free Software Foundation, Inc.',
'fsf': 'Free Software Foundation',
'freesoftwarefoundation': 'Free Software Foundation',
'freesoftwarefoundationinc': 'Free Software Foundation',
'thefreesoftwarefoundation': 'Free Software Foundation',
'thefreesoftwarefoundationinc': 'Free Software Foundation',

'hp': 'Hewlett-Packard, Inc.',
'hewlettpackard': 'Hewlett-Packard, Inc.',
'hewlettpackardco': 'Hewlett-Packard, Inc.',
'hpcompany': 'Hewlett-Packard, Inc.',
'hpdevelopmentcompanylp': 'Hewlett-Packard, Inc.',
'hpdevelopmentcompany': 'Hewlett-Packard, Inc.',
'hewlettpackardcompany': 'Hewlett-Packard, Inc.',
'hp': 'Hewlett-Packard',
'hewlettpackard': 'Hewlett-Packard',
'hewlettpackardco': 'Hewlett-Packard',
'hpcompany': 'Hewlett-Packard',
'hpdevelopmentcompanylp': 'Hewlett-Packard',
'hpdevelopmentcompany': 'Hewlett-Packard',
'hewlettpackardcompany': 'Hewlett-Packard',

'theandroidopensourceproject': 'The Android Open Source Project, Inc.',
'androidopensourceproject': 'The Android Open Source Project, Inc.',
'theandroidopensourceproject': 'Android Open Source Project',
'androidopensourceproject': 'Android Open Source Project',

'ibm': 'IBM Corporation',
'ibm': 'IBM',

'redhat': 'Red Hat, Inc.',
'redhatinc': 'Red Hat, Inc.',
'redhat': 'Red Hat',
'redhatinc': 'Red Hat',

'softwareinthepublicinterest': 'Software in the Public Interest, Inc.',
'spiinc': 'Software in the Public Interest, Inc.',
'softwareinthepublicinterest': 'Software in the Public Interest',
'spiinc': 'Software in the Public Interest',

'suse': 'SuSE, Inc.',
'suseinc': 'SuSE, Inc.',
'suse': 'SuSE',
'suseinc': 'SuSE',

'sunmicrosystems': 'Sun Microsystems, Inc.',
'sunmicrosystemsinc': 'Sun Microsystems, Inc.',
'sunmicro': 'Sun Microsystems, Inc.',
'sunmicrosystems': 'Sun Microsystems',
'sunmicrosystemsinc': 'Sun Microsystems',
'sunmicro': 'Sun Microsystems',

'thaiopensourcesoftwarecenter': 'Thai Open Source Software Center Ltd.',
'thaiopensourcesoftwarecenter': 'Thai Open Source Software Center',

'apachefoundation': 'The Apache Software Foundation',
'apachegroup': 'The Apache Software Foundation',
Expand All @@ -540,20 +541,19 @@ def filter_junk(texts):

'regentsoftheuniversityofcalifornia': 'The Regents of the University of California',

# 'mit': 'the Massachusetts Institute of Technology',

'borland': 'Borland Corp.',
'borland': 'Borland',
'borlandcorp': 'Borland',

'microsoft': 'Microsoft',
'microsoftcorp': 'Microsoft',
'microsoftinc': 'Microsoft',
'microsoftcorporation': 'Microsoft',

'google': 'Google Inc.',
'googlellc': 'Google Inc.',
'googleinc': 'Google Inc.',
'google': 'Google',
'googlellc': 'Google',
'googleinc': 'Google',

'intel': 'Intel Corporation',
'intel': 'Intel',
}

# Remove everything except letters and numbers
Expand Down
41 changes: 25 additions & 16 deletions src/summarycode/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from cluecode.copyrights import CopyrightDetector
from packagedcode.utils import combine_expressions
from packagedcode import models
from summarycode.copyright_tallies import canonical_holder
from summarycode.score import compute_license_score
from summarycode.score import get_field_values_from_codebase_resources
from summarycode.score import unique
Expand Down Expand Up @@ -167,7 +168,7 @@ def get_declared_holders(codebase, holders_tallies):
codebase, 'holders', key_files_only=True
)
entry_by_key_file_holders = {
fingerprints.generate(entry['holder']): entry
fingerprints.generate(canonical_holder(entry['holder'])): entry
for entry in key_file_holders
if entry['holder']
}
Expand Down Expand Up @@ -212,25 +213,24 @@ def get_primary_language(programming_language_tallies):

def get_origin_info_from_top_level_packages(top_level_packages, codebase):
"""
Return a 3-tuple containing the strings of declared license expression,
copyright holder, and primary programming language from a
Return a 3-tuple containing the declared license expression string, a list
of copyright holder, and primary programming language string from a
``top_level_packages`` list of detected top-level packages mapping and a
``codebase``.
"""
if not top_level_packages:
return '', '', ''
return '', [], ''

license_expressions = []
programming_languages = []
copyrights = []
parties = []

for package_mapping in top_level_packages:
package = models.Package.from_dict(package_mapping)
# we are only interested in key packages
if not is_key_package(package, codebase):
continue

top_level_packages = [
models.Package.from_dict(package_mapping)
for package_mapping in top_level_packages
]
key_file_packages = [p for p in top_level_packages if is_key_package(p, codebase)]
for package in key_file_packages:
license_expression = package.license_expression
if license_expression:
license_expressions.append(license_expression)
Expand All @@ -243,8 +243,6 @@ def get_origin_info_from_top_level_packages(top_level_packages, codebase):
if copyright_statement:
copyrights.append(copyright_statement)

parties.extend(package.parties or [])

# Combine license expressions
unique_license_expressions = unique(license_expressions)
combined_declared_license_expression = combine_expressions(
Expand All @@ -263,9 +261,20 @@ def get_origin_info_from_top_level_packages(top_level_packages, codebase):
declared_holders = []
if holders:
declared_holders = holders
elif parties:
declared_holders = [party.name for party in parties or []]

else:
# If the package data does not contain an explicit copyright, check the
# key files where the package data was detected from and see if there
# are any holder detections that can be used.
for package in key_file_packages:
for datafile_path in package.datafile_paths:
key_file_resource = codebase.get_resource(path=datafile_path)
if not key_file_resource:
continue
holders = [h['holder'] for h in key_file_resource.holders]
declared_holders.extend(holders)
# Normalize holder names before collecting them
# This allows us to properly remove declared holders from `other_holders` later
declared_holders = [canonical_holder(h) for h in declared_holders]
declared_holders = unique(declared_holders)

# Programming language
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ holders:
- Free Software Foundation, Inc.
- the Free Software Foundation
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 2
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/afferogplv1-AfferoGPLv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ holders:
holders_summary:
- value: Affero Inc.
count: 2
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/afferogplv3-AfferoGPLv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
4 changes: 2 additions & 2 deletions tests/cluecode/data/copyrights/android_c-c.c.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ holders:
- The Android Open Source Project
- Colin Percival
holders_summary:
- value: Colin Percival
- value: Android Open Source Project
count: 1
- value: The Android Open Source Project, Inc.
- value: Colin Percival
count: 1
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ holders_summary:
count: 2
- value: Board of Trustees of the University of Illinois
count: 1
- value: Cisco Systems, Inc.
- value: Cisco Systems
count: 1
- value: Eric Haines
count: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ holders:
holders_summary:
- value: Bigelow & Holmes
count: 1
- value: Sun Microsystems, Inc.
- value: Sun Microsystems
count: 1
4 changes: 2 additions & 2 deletions tests/cluecode/data/copyrights/colin_android-bsdiff_c.c.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ holders:
- The Android Open Source Project
- Colin Percival
holders_summary:
- value: Colin Percival
- value: Android Open Source Project
count: 1
- value: The Android Open Source Project, Inc.
- value: Colin Percival
count: 1
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/complex_notice-NOTICE.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ holders:
holders_summary:
- value: David Schultz
count: 9
- value: Sun Microsystems, Inc.
- value: Sun Microsystems
count: 4
- value: Mike Barcroft
count: 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ holders:
holders_summary:
- value: IBM Corporation
count: 3
- value: Sun Microsystems, Inc.
- value: Sun Microsystems
count: 3
- value: Lotus Development Corporation
count: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ holders:
- Daisy Ltd.
- Daisy
holders_summary:
- value: Daisy Ltd.
- value: Daisy
count: 2
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/copytest/aosp.txt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ copyrights:
holders:
- Android Open Source Project
holders_summary:
- value: The Android Open Source Project, Inc.
- value: Android Open Source Project
count: 1
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ holders:
- Hewlett-Packard Co.
- Hewlett-Packard Co.
holders_summary:
- value: Hewlett-Packard, Inc.
- value: Hewlett-Packard
count: 3
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ holders:
- Free Software Foundation, Inc.
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 11
- value: Colin Plumb
count: 2
Expand Down
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/ecosv2_0-eCosv.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ copyrights:
holders:
- Red Hat, Inc.
holders_summary:
- value: Red Hat, Inc.
- value: Red Hat
count: 1
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/ed-ed.copyright.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ holders_summary:
count: 1
- value: Antonio Diaz Diaz
count: 1
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
- value: James Troup
count: 1
4 changes: 2 additions & 2 deletions tests/cluecode/data/copyrights/esmertec_java-java.java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ holders:
- Esmertec AG
- The Android Open Source Project
holders_summary:
- value: Esmertec AG
- value: Android Open Source Project
count: 1
- value: The Android Open Source Project, Inc.
- value: Esmertec AG
count: 1
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/fsf_py-999_py.py.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/gfdlv1_2-GFDLv.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/gfdlv1_3-GFDLv.3.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
Loading