-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Check for syntax errors in CODEOWNERS file
A significant class of errors with CODEOWNERS file are syntax errors, that is, malformed entries. This change adds checks for validating the codeowners file while it is being parsed. It will do a couple of simple checks, like: - if the format is: `pattern owner1 ... ownerN` - if the usernames/team names are following GitHubs restrictions - if the email looks like an email address (something @ something . something)
- Loading branch information
Showing
2 changed files
with
87 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ import ( | |
"io" | ||
"os" | ||
"path" | ||
"regexp" | ||
"strings" | ||
|
||
"github.com/spf13/afero" | ||
|
@@ -32,8 +33,7 @@ func NewFromPath(path string) ([]Entry, error) { | |
return nil, err | ||
} | ||
|
||
entries := ParseCodeowners(r) | ||
return entries, nil | ||
return parseCodeowners(r) | ||
} | ||
|
||
// openCodeownersFile finds a CODEOWNERS file and returns content. | ||
|
@@ -66,28 +66,63 @@ func openCodeownersFile(dir string) (io.Reader, error) { | |
return nil, fmt.Errorf("No CODEOWNERS found in the root, docs/, or .github/ directory of the repository %s", dir) | ||
} | ||
|
||
func ParseCodeowners(r io.Reader) []Entry { | ||
func parseCodeowners(r io.Reader) ([]Entry, error) { | ||
var e []Entry | ||
|
||
usernameOrTeamRegexp := regexp.MustCompile(`^@(?i:[a-z\d](?:[a-z\d-]){0,37}[a-z\d](/[a-z\d](?:[a-z\d_-]*)[a-z\d])?)$`) | ||
emailRegexp := regexp.MustCompile(`.+@.+\..+`) | ||
|
||
s := bufio.NewScanner(r) | ||
no := uint64(0) | ||
for s.Scan() { | ||
no++ | ||
fields := strings.Fields(s.Text()) | ||
|
||
if len(fields) == 0 { // empty | ||
line := s.Text() | ||
if strings.HasPrefix(line, "#") { // comment | ||
continue | ||
} | ||
|
||
if strings.HasPrefix(fields[0], "#") { // comment | ||
if len(line) == 0 { // empty | ||
continue | ||
} | ||
|
||
fields := strings.Fields(s.Text()) | ||
|
||
if len(fields) < 2 { | ||
return e, fmt.Errorf("line %d does not have 2 or more fields", no) | ||
} | ||
|
||
// This does syntax validation only | ||
// | ||
// Syntax check: all fields are valid team/username identifiers or emails | ||
// Allowed owner syntax: | ||
// @username | ||
// @org/team-name | ||
// [email protected] | ||
// source: https://help.github.com/articles/about-code-owners/#codeowners-syntax | ||
for _, entry := range fields[1:] { | ||
if strings.HasPrefix(entry, "@") { | ||
// A valid username/organization name has up to 39 characters (per GitHub Join page) | ||
// and is matched by the following regex: /^[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}$/i | ||
// A valid team name consists of alphanumerics, underscores and dashes | ||
if !usernameOrTeamRegexp.MatchString(entry) { | ||
return e, fmt.Errorf("entry '%s' on line %d does not look like a GitHub username or team name", entry, no) | ||
} | ||
} else { | ||
// Per: https://davidcel.is/posts/stop-validating-email-addresses-with-regex/ | ||
// just check if there is '@' and a '.' afterwards | ||
if !emailRegexp.MatchString(entry) { | ||
return e, fmt.Errorf("entry '%s' on line %d does not look like an email", entry, no) | ||
} | ||
} | ||
} | ||
|
||
e = append(e, Entry{ | ||
Pattern: fields[0], | ||
Owners: fields[1:], | ||
LineNo: no, | ||
}) | ||
} | ||
|
||
return e | ||
return e, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters