Skip to content

Commit 036532b

Browse files
committed
script for linting .tsv files
1 parent e01f88a commit 036532b

File tree

2 files changed

+51
-0
lines changed

2 files changed

+51
-0
lines changed

.github/workflows/lint-tsv

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#!/usr/bin/env -S awk -f
2+
# lint .tsv files
3+
# - make sure every record is the same length
4+
# - make sure every field has content - even a null value should at least be '-'
5+
# <[email protected]> 2021, public domain
6+
#
7+
# TODO: package this for public consumption
8+
# TODO: make BSD-awk compatible
9+
10+
BEGIN { FS="\t" }
11+
12+
NR == 1 { NF_HEADER=NF }
13+
14+
NF != NF_HEADER {
15+
print "incorrect number of columns, line " NR
16+
LINE_ERROR=1
17+
}
18+
19+
{
20+
for(i=1; i<=NF; i++) {
21+
22+
# strip surrounding whitespace
23+
# is there a better way?
24+
s=$i
25+
sub(/(^[[:space:]]+)/, "", s)
26+
sub(/([[:space:]]+)$/, "", s)
27+
#print "|" $i "| -> |" s "|" # DEBUG
28+
29+
# check that each field is stripped and non-null.
30+
if(length(s) == 0) {
31+
print "empty field, line " NR ", column " i ". Please use '-' for null values."
32+
LINE_ERROR=1
33+
}
34+
else if($i != s) {
35+
print "extraneous whitespace, line " NR ", column " i ": '" $i "'"
36+
LINE_ERROR=1
37+
}
38+
39+
}
40+
}
41+
42+
LINE_ERROR==1 {
43+
print "errors in line " NR ": \n\t'" $0 "'\n"
44+
ANY_ERROR=1
45+
LINE_ERROR=0 # reset for next time
46+
}
47+
48+
END { if(ANY_ERROR) { exit 1 } }

.github/workflows/validator.yml

+3
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ jobs:
4343
exit 1
4444
fi
4545
46+
- name: Lint participants.tsv
47+
run: .github/workflows/lint-tsv participants.tsv
48+
4649
#- name: Update software
4750
# run: |
4851
# # do we want to do this? it's helpful to avoid testing against surprise out-of-date software, but also so slow.

0 commit comments

Comments
 (0)