Skip to content

Commit

Permalink
Merge branch 'mkcopyright-glob-support'
Browse files Browse the repository at this point in the history
* Branch commit log:
  misc/mkcopyright.py: filter large files and support glob sections
	* Prefilter large files for Copyright notices
	* Add support for globs as sections with Copyright= and License= fields
  misc/checkcrlist.py: fix args check
  doc/anklang.1.md: remove unused header block lines
  .github/workflows/testing.yml: test build on all branches

Signed-off-by: Tim Janik <[email protected]>
  • Loading branch information
tim-janik committed Jan 11, 2025
2 parents 5c96187 + 58883e6 commit 4f9af4f
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 12 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
# Linting: xclip -sel c <.github/workflows/testing.yml # https://rhysd.github.io/actionlint/

on:
pull_request:
push:
branches: [ 'trunk', 'next', 'wip/**' ]
branches: [ '**' ]
# tags: [ 'v[0-9]+.[0-9]+.[0-9]+*' ]
pull_request:
branches: [ 'trunk', 'next' ]

jobs:

Expand Down
2 changes: 0 additions & 2 deletions doc/anklang.1.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
% ANKLANG(1) anklang-0 | Anklang Manual
%
% @FILE_REVISION@

# NAME
anklang - Music composition and modular synthesis application
Expand Down
2 changes: 1 addition & 1 deletion misc/checkcrlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def parse_options (sysargv):
elif k == '--git':
global GIT_COPYRIGHT
GIT_COPYRIGHT = True
if len (argv) != 2:
if len (argv) < 2:
die ("at least two input files are required: <FILELIST> <COPYRIGHTFILE>")
global COPYRIGHTFILES, FILELIST
FILELIST = argv[0]
Expand Down
77 changes: 71 additions & 6 deletions misc/mkcopyright.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Dedicated to the Public Domain under the Unlicense: https://unlicense.org/UNLICENSE

import sys, os, re, subprocess, getopt, itertools
import fnmatch
from datetime import datetime

# TODO:
Expand Down Expand Up @@ -37,16 +38,19 @@

# Patterns for Copyright notices, expects year range in one of two groups
copyrights = (
r'([0-9, \t-]+)\s+Copyright\s+(.+)',
r'([0-9, \t-]+)\s+Copyright\s*ⓒ\s+(.+)',
r'([0-9, \t-]+)\s+Copyright\s*\([Cc]\)\s+(.+)',
r'Copyright\s*([0-9, \t-]+)\s+(.+)',
r'Copyright\s*ⓒ\s*([0-9, \t-]+)\s+(.+)',
r'([0-9, \t-]+)\s+Copyright\s*[©ⓒ]\s+(.+)',
r'([0-9, \t-]+)\s+Copyright\s+(.+)',
r'Copyright\s*\([cC]\)\s*([0-9, \t-]+)\s+(.+)',
r'Copyright\s*[©ⓒ]\s*([0-9, \t-]+)\s+(.+)',
r'Copyright\s*([0-9, \t-]+)\s+(.+)',
)
copyright_prefilter = re.compile ('^.*Copyright.*$', re.MULTILINE)

re_MSI = re.M | re.S | re.I

glob_patterns = []

# Match <cc:license resource=""/> from ccREL specification
rdf_xmlnscc_license = re.compile (r'<rdf:RDF\b.*\bxmlns:cc="https?://.*<cc:license\b[^>]*\bresource="([^"]+)"', re_MSI)
# SVG metadata for cc:license
Expand Down Expand Up @@ -105,18 +109,59 @@ def open_as_utf8 (filename):
except: string = None
if string != None: yield string

def glob_translate (inputstr):
"""Translate pathname with wildcards to regexp."""
# TODO: use glob.translate from Python 3.13
pat = ''
for part in re.split (r'(\*\*|\*|\?)', inputstr):
if part == r'**': pat += r'.*'
elif part == r'*': pat += r'[^/]*'
elif part == r'?': pat += r'.'
elif part: pat += re.escape (part)
return fr'(?s:{pat})\Z'

def prepare_globs (config):
patterns = []
# construct: [ (string, { fields...}),... ]
for k, v in config.sections.items():
if k.find ('?') >= 0 or k.find ('*') >= 0:
pat = k
patterns.append ([pat, k, v])
# sort, longer matches come first
patterns = sorted (patterns, key = lambda sf: -len (sf[0]))
# compile: [ (regex, { fields...}),... ]
for pair in patterns:
pair[0] = re.compile (glob_translate (pair[0]), 0)
return patterns

def match_glob (config, filename, used_globs):
for tp, k, dct in glob_patterns:
if re.match (tp, filename):
used_globs.add (k)
return True
return False

def find_copyrights (filename):
try: # read file
ofile = open (filename, 'rb')
except IsADirectoryError: return # ignore dirs
utxt = ofile.read().decode ('utf-8')
# pre filter relevant lines
lines = copyright_prefilter.findall (utxt)
# extract specific copyright patterns
copyrights = {}
for line in open_as_utf8 (filename):
for line in lines:
line = line.strip()
for crpattern in copyright_patterns():
m = crpattern.match (line.strip())
m = crpattern.match (str (line).strip())
if m:
a, b = m.group (1).strip(), m.group (2).strip()
if len (a) < 1 or a[0] not in '0123456789':
if len (b) < 1 or b[0] not in '0123456789':
continue
b, a = a, b
copyrights[b] = copyrights.get (b, []) + parse_years (a)
break
return copyrights

def license_patterns():
Expand Down Expand Up @@ -253,6 +298,8 @@ class Config (object): pass
def mkcopyright (sysargv):
# parse options and check inputs
config = parse_options (sysargv)
global glob_patterns
glob_patterns = prepare_globs (config)
fileiter = ()
if config.filelist:
fileiter = open (config.filelist, 'rt').read().splitlines()
Expand All @@ -269,10 +316,16 @@ def mkcopyright (sysargv):
# gather copyrights and licenses
count_unlicensed = 0
used_licenses = set()
used_globs = set()
for filename in itertools.chain (config.argv, fileiter):
# ignore files
if match_section (filename, config, 'ignore'):
continue
# filter globs
if match_glob (config, filename, used_globs):
continue
if os.path.isdir (filename):
continue
# detect license
license = find_license (filename, config)
count_unlicensed += not license
Expand Down Expand Up @@ -327,6 +380,18 @@ def mkcopyright (sysargv):
print ('License:', license or '?')
if license:
used_licenses.add (license)
# print copyright globs
sorted_globs = sorted (sorted (used_globs), key = lambda k: -len (k))
for glob_x in sorted_globs:
print ('\nFiles:', glob_x.rstrip ('*'))
ts = config.sections[glob_x]
print ('Copyright:', ts['Copyright'])
license = ts['License']
if not license in spdx_licenses:
print ('%s: %s: error: unknown license ID: %s' % (sysargv[0], glob_x, license), file = sys.stderr)
sys.exit (1)
print ('License:', license or '?')
used_licenses.add (license)
# Print license identifiers
for l in sorted (used_licenses):
name, links = spdx_licenses.get (l, ('',''))
Expand Down

0 comments on commit 4f9af4f

Please sign in to comment.