Skip to content

Commit

Permalink
Add script to report rules
Browse files Browse the repository at this point in the history
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Sep 3, 2021
1 parent 3be07db commit a6c2cdb
Showing 1 changed file with 198 additions and 0 deletions.
198 changes: 198 additions & 0 deletions etc/scripts/licenses/report_license_rules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import io

import click
import csv

from commoncode.cliutils import PluggableCommandLineOption
from licensedcode.models import load_licenses
from licensedcode.models import load_rules


LICENSES_FIELDNAMES = [
'key', 'short_name', 'name', 'category', 'owner', 'text', 'words_count', 'notes',
'minimum_coverage', 'homepage_url', 'is_exception', 'language', 'is_unknown',
'spdx_license_key', 'text_urls', 'other_urls', 'standard_notice',
'license_filename', 'faq_url', 'ignorable_authors',
'ignorable_copyrights', 'ignorable_holders', 'ignorable_urls', 'ignorable_emails',
'osi_license_key', 'osi_url', 'other_spdx_license_keys',
]


RULES_FIELDNAMES = [
'identifier', 'license_expression', 'relevance', 'text', 'words_count', 'category',
'is_false_positive', 'is_license_text', 'is_license_notice', 'is_license_tag',
'is_license_reference', 'is_license_intro', 'has_unknown', 'only_known_words',
'notes', 'referenced_filenames', 'minimum_coverage', 'ignorable_copyrights',
'ignorable_holders', 'ignorable_authors', 'ignorable_urls', 'ignorable_emails',
]


def write_data_to_csv(data, output_csv, fieldnames):

with open(output_csv,'w',encoding='utf-8-sig',newline='') as f:
w = csv.DictWriter(f,fieldnames=fieldnames)
w.writeheader()

for entry in data:
w.writerow(entry)


def filter_by_attribute(data, attribute, expected):

filtered_output = []
for entry in data:
if entry.get(attribute, 'None') == expected:
filtered_output.append(entry)

return filtered_output

def flatten_output(data):

if not isinstance(data, list):
return

output = []
for entry in data:
if not isinstance(entry, dict):
continue

output_entry = {}
for key, value in entry.items():
entry_value = None
if value is None:
continue
elif isinstance(value, list):
entry_value = ' '.join(value)
elif not isinstance(value, str):
entry_value = repr(value)
elif not entry_value:
entry_value = value

output_entry[key] = entry_value

output.append(output_entry)

return output

@click.command()
@click.option('-l', '--licenses',
type=click.Path(dir_okay=False, writable=True, readable=False),
default=None,
metavar='FILE',
help='Write all Licenses data to the csv FILE.',
cls=PluggableCommandLineOption
)
@click.option('-r', '--rules',
type=click.Path(dir_okay=False, writable=True, readable=False),
default=None,
metavar='FILE',
help='Write all Rules data to the csv FILE.',
cls=PluggableCommandLineOption,
)
@click.option('-c', '--category',
type=str,
default=None,
metavar='STRING',
help='An optional filter to only output licenses/rules of this category'
'. Example STRING: `permissive`.',
cls=PluggableCommandLineOption,
)
@click.option('-k', '--license-key',
type=str,
default=None,
metavar='STRING',
help='An optional filter to only output licenses/rules which has this license key.'
'Example STRING: `mit`.',
cls=PluggableCommandLineOption,
)
@click.option('-t', '--with-text',
is_flag=True,
default=False,
help='Also include the license/rules texts.'
'Note that this increases the file size significantly.',
cls=PluggableCommandLineOption,
)
@click.help_option('-h', '--help')
def cli(licenses, rules, category, license_key, with_text):
"""
Write Licenses/Rules from scancode into a CSV file with all details.
Output can be optionally filtered by category/license-key.
"""
licenses_output = []
rules_output = []

licenses_data = load_licenses()

if licenses:
for license in licenses_data.values():
license_data = license.to_dict()
if with_text:
license_data['text'] = license.text
license_data['is_unknown'] = license.is_unknown
license_data['words_count'] = len(license.text)
licenses_output.append(license_data)

if category:
licenses_output = filter_by_attribute(
data=licenses_output,
attribute='category',
expected=category
)

if license_key:
licenses_output = filter_by_attribute(
data=licenses_output,
attribute='key',
expected=license_key,
)

licenses_output = flatten_output(data=licenses_output)
write_data_to_csv(data=licenses_output, output_csv=licenses, fieldnames=LICENSES_FIELDNAMES)


if rules:
rules_data = list(load_rules())
for rule in rules_data:
rule_data = rule.to_dict()
rule_data['identifier'] = rule.identifier
rule_data['referenced_filenames'] = rule.referenced_filenames
if with_text:
rule_data['text'] = rule.text()
rule_data['has_unknown'] = rule.has_unknown
rule_data['words_count'] = len(rule.text())
try:
rule_data['category'] = licenses_data[rule_data['license_expression']].category
except KeyError:
pass
rules_output.append(rule_data)

if category:
rules_output = filter_by_attribute(
data=rules_output,
attribute='category',
expected=category
)

if license_key:
rules_output = filter_by_attribute(
data=rules_output,
attribute='license_expression',
expected=license_key,
)

rules_output = flatten_output(rules_output)
write_data_to_csv(data=rules_output, output_csv=rules, fieldnames=RULES_FIELDNAMES)


if __name__ == '__main__':
cli()

0 comments on commit a6c2cdb

Please sign in to comment.