From c26b40bc4b7c2e9405bf4d5d13fb1a050decaef5 Mon Sep 17 00:00:00 2001 From: Adrian Ostrowski Date: Sun, 22 Jan 2023 18:53:33 +0100 Subject: [PATCH] Support flexible years in licenses (#23) (#59) --- README.md | 15 +++ pre_commit_hooks/insert_license.py | 110 ++++++++++++++++-- tests/insert_license_test.py | 72 ++++++++---- ...y_formatted_stale_year_range_in_license.py | 6 + .../module_with_stale_year_in_license.py | 6 + ...module_with_stale_year_range_in_license.py | 6 + .../module_with_year_range_in_license.py | 6 + 7 files changed, 190 insertions(+), 31 deletions(-) create mode 100644 tests/resources/module_with_badly_formatted_stale_year_range_in_license.py create mode 100644 tests/resources/module_with_stale_year_in_license.py create mode 100644 tests/resources/module_with_stale_year_range_in_license.py create mode 100644 tests/resources/module_with_year_range_in_license.py diff --git a/README.md b/README.md index 2ec4e61..255e048 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,7 @@ into separate repos: - src/license_header.txt # defaults to: LICENSE.txt - --comment-style - // # defaults to: # + - --use-current-year ``` ### insert-license @@ -84,6 +85,20 @@ In case you want to remove the comment headers introduced by 3. Remove the `--remove-header` arg and update your `LICENSE.txt` ; 4. Re-run the hook on all your files. +#### Handling years flexibly + +You can add `--use-current-year` to change how the hook treats years in the +headers: + +- When inserting a header, the current year will always be inserted + regardless of the year listed in the license file. +- When modifying a file that already has a header, the hook will ensure the + current year is listed in the header by using a range. For instance, + `2015` or `2015-2018` would get updated to `2015-2023` in the year 2023. +- When removing headers, the licenses will be removed regardless of the + years they contain -- as if they used the year currently present in the + license file. + #### Fuzzy license matching In some cases your license files can contain several slightly different diff --git a/pre_commit_hooks/insert_license.py b/pre_commit_hooks/insert_license.py index 33019ab..539d281 100755 --- a/pre_commit_hooks/insert_license.py +++ b/pre_commit_hooks/insert_license.py @@ -5,6 +5,8 @@ import collections import re import sys +from datetime import datetime +from typing import Sequence from fuzzywuzzy import fuzz @@ -12,7 +14,7 @@ " license used in the project.") FUZZY_MATCH_TODO_INSTRUCTIONS = ( " Delete the inconsistent license and above line" - " and rerun pre-commit to insert a good license." ) + " and rerun pre-commit to insert a good license.") FUZZY_MATCH_EXTRA_LINES_TO_CHECK = 3 SKIP_LICENSE_INSERTION_COMMENT = "SKIP LICENSE INSERTION" @@ -56,6 +58,11 @@ def main(argv=None): parser.add_argument('--insert-license-after-regex', default="", help="Insert license after line matching regex (ex: '^<\\?php$')") parser.add_argument('--remove-header', action='store_true') + parser.add_argument( + "--use-current-year", + action="store_true", + help=("Allow past years and ranges of years in headers. Use the current year in inserted and updated licenses."), + ) args = parser.parse_args(argv) license_info = get_license_info(args) @@ -78,6 +85,13 @@ def main(argv=None): return 0 +def _replace_year_in_license_with_current(plain_license: list[str]): + current_year = datetime.now().year + for i, line in enumerate(plain_license): + plain_license[i] = re.sub(r"\b\d{4}\b", str(current_year), line) + return plain_license + + def get_license_info(args) -> LicenseInfo: comment_start, comment_end = None, None comment_prefix = args.comment_style.replace('\\t', '\t') @@ -86,6 +100,10 @@ def get_license_info(args) -> LicenseInfo: comment_start, comment_prefix, comment_end = comment_prefix.split('|') with open(args.license_filepath, encoding='utf8') as license_file: plain_license = license_file.readlines() + + if args.use_current_year: + plain_license = _replace_year_in_license_with_current(plain_license) + prefixed_license = [f'{comment_prefix}{extra_space if line.strip() else ""}{line}' for line in plain_license] eol = '\r\n' if prefixed_license[0][-2:] == '\r\n' else '\n' @@ -138,7 +156,8 @@ def process_files(args, changed_files, todo_files, license_info: LicenseInfo): license_header_index = find_license_header_index( src_file_content=src_file_content, license_info=license_info, - top_lines_count=args.detect_license_in_X_top_lines) + top_lines_count=args.detect_license_in_X_top_lines, + match_years_strictly=not args.use_current_year) fuzzy_match_header_index = None if args.fuzzy_match_generates_todo and license_header_index is None: fuzzy_match_header_index = fuzzy_find_license_header_index( @@ -150,6 +169,7 @@ def process_files(args, changed_files, todo_files, license_info: LicenseInfo): ) if license_header_index is not None: if license_found(remove_header=args.remove_header, + update_year_range=args.use_current_year, license_header_index=license_header_index, license_info=license_info, src_file_content=src_file_content, @@ -235,7 +255,53 @@ def license_not_found( # pylint: disable=too-many-arguments return False -def license_found(remove_header, license_header_index, license_info, src_file_content, src_filepath, encoding): # pylint: disable=too-many-arguments +# a year, then optionally a dash (with optional spaces before and after), and another year, surrounded by word boundaries +_YEAR_RANGE_PATTERN = re.compile(r"\b\d{4}(?: *- *\d{2,4})?\b") + + +def try_update_year_range( + src_file_content: list[str], + license_header_index: int, +) -> tuple[Sequence[str], bool]: + """ + Updates the years in a copyright header in src_file_content by + ensuring it contains a range ending in the current year. + Does nothing if the current year is already present as the end of + the range. + The change will affect only the first line containing years. + :param src_file_content: the lines in the source file + :param license_header_index: line where the license starts + :return: source file contents and a flag indicating update + """ + current_year = datetime.now().year + for i in range(license_header_index, len(src_file_content)): + line = src_file_content[i] + matches = _YEAR_RANGE_PATTERN.findall(line) + if matches: + match = matches[-1] + start_year = int(match[:4]) + end_year = match[5:] + if not end_year or int(end_year) < current_year: + updated = line.replace(match, + str(start_year) + '-' + str(current_year)) + # verify the current list of years ends in the current one + if _YEARS_PATTERN.findall(updated)[-1][-4:] != str(current_year): + print(f"Unable to update year range in line: {line.rstrip()}. Got: {updated.rstrip()}") + break + src_file_content[i] = updated + return src_file_content, True + return src_file_content, False + + +def license_found( + remove_header, + update_year_range, + license_header_index, + license_info, + src_file_content, + src_filepath, + encoding, +): # pylint: disable=too-many-arguments """ Executed when license is found. It does nothing if remove_header is False, removes the license if remove_header is True. @@ -246,6 +312,7 @@ def license_found(remove_header, license_header_index, license_info, src_file_co :param src_filepath: path of the src_file :return: True if change was made, False otherwise """ + updated = False if remove_header: last_license_line_index = license_header_index + len(license_info.prefixed_license) if last_license_line_index < len(src_file_content) and src_file_content[last_license_line_index].strip(): @@ -255,10 +322,15 @@ def license_found(remove_header, license_header_index, license_info, src_file_co src_file_content = src_file_content[:license_header_index] + \ src_file_content[license_header_index + len(license_info.prefixed_license) + 1:] + updated = True + elif update_year_range: + src_file_content, updated = try_update_year_range(src_file_content, license_header_index) + + if updated: with open(src_filepath, 'w', encoding=encoding) as src_file: src_file.write(''.join(src_file_content)) - return True - return False + + return updated def fuzzy_license_found(license_info, # pylint: disable=too-many-arguments @@ -289,9 +361,28 @@ def fuzzy_license_found(license_info, # pylint: disable=too-many-arguments return True +# More flexible than _YEAR_RANGE_PATTERN. For detecting all years in a line, not just a range. +_YEARS_PATTERN = re.compile(r"\b\d{4}([ ,-]+\d{2,4})*\b") + + +def _strip_years(line): + return _YEARS_PATTERN.sub("", line) + + +def _license_line_matches(license_line, src_file_line, match_years_strictly): + license_line = license_line.strip() + src_file_line = src_file_line.strip() + + if match_years_strictly: + return license_line == src_file_line + + return _strip_years(license_line) == _strip_years(src_file_line) + + def find_license_header_index(src_file_content, - license_info, - top_lines_count): + license_info: LicenseInfo, + top_lines_count, + match_years_strictly): """ Returns the line number, starting from 0 and lower than `top_lines_count`, where the license header comment starts in this file, or else None. @@ -299,7 +390,10 @@ def find_license_header_index(src_file_content, for i in range(top_lines_count): license_match = True for j, license_line in enumerate(license_info.prefixed_license): - if i + j >= len(src_file_content) or license_line.strip() != src_file_content[i + j].strip(): + if (i + j >= len(src_file_content) or + not _license_line_matches(license_line, + src_file_content[i + j], + match_years_strictly)): license_match = False break if license_match: diff --git a/tests/insert_license_test.py b/tests/insert_license_test.py index c5cef54..1a84517 100644 --- a/tests/insert_license_test.py +++ b/tests/insert_license_test.py @@ -1,4 +1,5 @@ from contextlib import contextmanager +from datetime import datetime from itertools import product import os import shutil @@ -48,6 +49,12 @@ ('module_without_license.py', '#', 'module_with_license_nospace.py', True, ['--no-space-in-comment-prefix']), ('module_without_license.php', '/*| *| */', 'module_with_license.php', True, ['--insert-license-after-regex', '^<\\?php$']), ('module_without_license.py', '#', 'module_with_license_noeol.py', True, ['--no-extra-eol']), + + ('module_without_license.groovy', '//', 'module_with_license.groovy', True, ['--use-current-year']), + ('module_with_stale_year_in_license.py', '#', 'module_with_year_range_in_license.py', True, ['--use-current-year']), + ('module_with_stale_year_range_in_license.py', '#', 'module_with_year_range_in_license.py', True, ['--use-current-year']), + ('module_with_badly_formatted_stale_year_range_in_license.py', '#', 'module_with_badly_formatted_stale_year_range_in_license.py', False, + ['--use-current-year']), ), )), ) @@ -69,6 +76,8 @@ def test_insert_license(license_file_path, if new_src_file_expected: with open(new_src_file_expected, encoding=encoding) as expected_content_file: expected_content = expected_content_file.read() + if '--use-current-year' in args: + expected_content = expected_content.replace("2017", str(datetime.now().year)) new_file_content = path.open(encoding=encoding).read() assert new_file_content == expected_content @@ -127,14 +136,18 @@ def test_fuzzy_match_license(license_file_path, @pytest.mark.parametrize( - ('src_file_content', 'expected_index'), + ('src_file_content', 'expected_index', 'match_years_strictly'), ( - (['foo\n', 'bar\n'], None), - (['# License line 1\n', '# License line 2\n', '\n', 'foo\n', 'bar\n'], 0), - (['\n', '# License line 1\n', '# License line 2\n', 'foo\n', 'bar\n'], 1), + (['foo\n', 'bar\n'], None, True), + (['# License line 1\n', '# Copyright 2017\n', '\n', 'foo\n', 'bar\n'], 0, True), + (['\n', '# License line 1\n', '# Copyright 2017\n', 'foo\n', 'bar\n'], 1, True), + (['\n', '# License line 1\n', '# Copyright 2017\n', 'foo\n', 'bar\n'], 1, False), + (['# License line 1\n', '# Copyright 1984\n', '\n', 'foo\n', 'bar\n'], None, True), + (['# License line 1\n', '# Copyright 1984\n', '\n', 'foo\n', 'bar\n'], 0, False), + (['\n', '# License line 1\n', '# Copyright 2013,2015-2016\n', 'foo\n', 'bar\n'], 1, False), ), ) -def test_is_license_present(src_file_content, expected_index): +def test_is_license_present(src_file_content, expected_index, match_years_strictly): license_info = LicenseInfo( plain_license="", eol="\n", @@ -142,8 +155,10 @@ def test_is_license_present(src_file_content, expected_index): comment_prefix="#", comment_end="", num_extra_lines=0, - prefixed_license=['# License line 1\n', '# License line 2\n']) - assert expected_index == find_license_header_index(src_file_content, license_info, 5) + prefixed_license=['# License line 1\n', '# Copyright 2017\n']) + assert expected_index == find_license_header_index( + src_file_content, license_info, 5, match_years_strictly=match_years_strictly + ) @pytest.mark.parametrize( @@ -152,27 +167,35 @@ def test_is_license_present(src_file_content, expected_index): 'comment_style', 'fuzzy_match', 'new_src_file_expected', - 'fail_check'), + 'fail_check', + 'use_current_year'), map(lambda a: a[:1] + a[1], product( # combine license files with other args ('LICENSE_with_trailing_newline.txt', 'LICENSE_without_trailing_newline.txt'), ( - ('module_with_license.css', '/*| *| */', False, 'module_without_license.css', True), + ('module_with_license.css', '/*| *| */', False, 'module_without_license.css', True, False), ('module_with_license_and_few_words.css', '/*| *| */', False, - 'module_without_license_and_few_words.css', True), - ('module_with_license_todo.css', '/*| *| */', False, None, True), - ('module_with_fuzzy_matched_license.css', '/*| *| */', False, None, False), - ('module_without_license.css', '/*| *| */', False, None, False), - - ('module_with_license.py', '#', False, 'module_without_license.py', True), - ('module_with_license_and_shebang.py', '#', False, 'module_without_license_and_shebang.py', True), - ('init_with_license.py', '#', False, 'init_without_license.py', True), - ('init_with_license_and_newline.py', '#', False, 'init_without_license.py', True), + 'module_without_license_and_few_words.css', True, False), + ('module_with_license_todo.css', '/*| *| */', False, None, True, False), + ('module_with_fuzzy_matched_license.css', '/*| *| */', False, None, False, False), + ('module_without_license.css', '/*| *| */', False, None, False, False), + + ('module_with_license.py', '#', False, 'module_without_license.py', True, False), + ('module_with_license_and_shebang.py', '#', False, 'module_without_license_and_shebang.py', True, False), + ('init_with_license.py', '#', False, 'init_without_license.py', True, False), + ('init_with_license_and_newline.py', '#', False, 'init_without_license.py', True, False), # Fuzzy match - ('module_with_license.css', '/*| *| */', True, 'module_without_license.css', True), - ('module_with_license_todo.css', '/*| *| */', True, None, True), - ('module_with_fuzzy_matched_license.css', '/*| *| */', True, 'module_with_license_todo.css', True), - ('module_without_license.css', '/*| *| */', True, None, False), - ('module_with_license_and_shebang.py', '#', True, 'module_without_license_and_shebang.py', True), + ('module_with_license.css', '/*| *| */', True, 'module_without_license.css', True, False), + ('module_with_license_todo.css', '/*| *| */', True, None, True, False), + ('module_with_fuzzy_matched_license.css', '/*| *| */', True, 'module_with_license_todo.css', True, False), + ('module_without_license.css', '/*| *| */', True, None, False, False), + ('module_with_license_and_shebang.py', '#', True, 'module_without_license_and_shebang.py', True, False), + # Strict and flexible years + ('module_with_stale_year_in_license.py', '#', False, None, False, False), + ('module_with_stale_year_range_in_license.py', '#', False, None, False, False), + ('module_with_license.py', '#', False, 'module_without_license.py', True, True), + ('module_with_stale_year_in_license.py', '#', False, 'module_without_license.py', True, True), + ('module_with_stale_year_range_in_license.py', '#', False, 'module_without_license.py', True, True), + ('module_with_badly_formatted_stale_year_range_in_license.py', '#', False, 'module_without_license.py', True, True), ), )), ) @@ -182,6 +205,7 @@ def test_remove_license(license_file_path, fuzzy_match, new_src_file_expected, fail_check, + use_current_year, tmpdir): with chdir_to_test_resources(): path = tmpdir.join('src_file_path') @@ -191,6 +215,8 @@ def test_remove_license(license_file_path, '--comment-style', comment_style] if fuzzy_match: argv = ['--fuzzy-match-generates-todo'] + argv + if use_current_year: + argv = ['--use-current-year'] + argv assert insert_license(argv) == (1 if fail_check else 0) if new_src_file_expected: with open(new_src_file_expected, encoding='utf-8') as expected_content_file: diff --git a/tests/resources/module_with_badly_formatted_stale_year_range_in_license.py b/tests/resources/module_with_badly_formatted_stale_year_range_in_license.py new file mode 100644 index 0000000..2a47590 --- /dev/null +++ b/tests/resources/module_with_badly_formatted_stale_year_range_in_license.py @@ -0,0 +1,6 @@ +# Copyright (C) 2015 -- 16 Teela O'Malley +# +# Licensed under the Apache License, Version 2.0 (the "License"); + +import sys +sys.stdout.write("FOO") diff --git a/tests/resources/module_with_stale_year_in_license.py b/tests/resources/module_with_stale_year_in_license.py new file mode 100644 index 0000000..ce08f70 --- /dev/null +++ b/tests/resources/module_with_stale_year_in_license.py @@ -0,0 +1,6 @@ +# Copyright (C) 2015 Teela O'Malley +# +# Licensed under the Apache License, Version 2.0 (the "License"); + +import sys +sys.stdout.write("FOO") diff --git a/tests/resources/module_with_stale_year_range_in_license.py b/tests/resources/module_with_stale_year_range_in_license.py new file mode 100644 index 0000000..edd037f --- /dev/null +++ b/tests/resources/module_with_stale_year_range_in_license.py @@ -0,0 +1,6 @@ +# Copyright (C) 2015-2016 Teela O'Malley +# +# Licensed under the Apache License, Version 2.0 (the "License"); + +import sys +sys.stdout.write("FOO") diff --git a/tests/resources/module_with_year_range_in_license.py b/tests/resources/module_with_year_range_in_license.py new file mode 100644 index 0000000..e05fed8 --- /dev/null +++ b/tests/resources/module_with_year_range_in_license.py @@ -0,0 +1,6 @@ +# Copyright (C) 2015-2017 Teela O'Malley +# +# Licensed under the Apache License, Version 2.0 (the "License"); + +import sys +sys.stdout.write("FOO")