This repository has been archived by the owner on May 10, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTaskfile.yml
99 lines (89 loc) · 3.5 KB
/
Taskfile.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# https://github.com/opencultureconsulting/openrefine-task-runner
version: '3'
includes:
alephino: alephino
barcodes: barcodes
bibliotheca: bibliotheca
pica+: pica+
silent: true
output: prefixed
env:
OPENREFINE:
sh: readlink -m .openrefine/refine
CLIENT:
sh: readlink -m .openrefine/client
tasks:
default:
desc: Datenverarbeitung sequentiell
cmds:
- task: alephino:main
- task: bibliotheca:main
- task: pica+:refine
install:
desc: (re)install OpenRefine and openrefine-client into subdirectory .openrefine
cmds:
- | # delete existing install and recreate folder
rm -rf .openrefine
mkdir -p .openrefine
- > # download OpenRefine archive
wget --no-verbose -O openrefine.tar.gz
https://github.com/OpenRefine/OpenRefine/releases/download/3.4.1/openrefine-linux-3.4.1.tar.gz
- | # install OpenRefine into subdirectory .openrefine
tar -xzf openrefine.tar.gz -C .openrefine --strip 1
rm openrefine.tar.gz
- | # optimize OpenRefine for batch processing
sed -i 's/cd `dirname $0`/cd "$(dirname "$0")"/' ".openrefine/refine" # fix path issue in OpenRefine startup file
sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' ".openrefine/refine.ini" # do not try to open OpenRefine in browser
sed -i 's/#REFINE_AUTOSAVE_PERIOD=60/REFINE_AUTOSAVE_PERIOD=1440/' ".openrefine/refine.ini" # set autosave period from 5 minutes to 25 hours
- > # download openrefine-client into subdirectory .openrefine
wget --no-verbose -O .openrefine/client
https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.10/openrefine-client_0-3-10_linux
- chmod +x .openrefine/client # make client executable
start:
dir: ./{{.DIR}}
cmds:
- | # verify that OpenRefine is installed
if [ ! -f "$OPENREFINE" ]; then
echo 1>&2 "OpenRefine missing; try task install"; exit 1
fi
- | # delete temporary files and log file of previous run
rm -rf ./*.project* workspace.json
rm -rf "{{.PROJECT}}.log"
- > # launch OpenRefine with specific data directory and redirect its output to a log file
"$OPENREFINE" -v warn -p {{.PORT}} -m {{.RAM}}
-d ../{{.DIR}}
>> "{{.PROJECT}}.log" 2>&1 &
- | # wait until OpenRefine API is available
timeout 30s bash -c "until
wget -q -O - http://localhost:{{.PORT}} | cat | grep -q -o OpenRefine
do sleep 1
done"
stop:
dir: ./{{.DIR}}
cmds:
- | # shut down OpenRefine gracefully
PID=$(lsof -t -i:{{.PORT}})
kill $PID
while ps -p $PID > /dev/null; do sleep 1; done
- > # archive the OpenRefine project
tar cfz
"{{.PROJECT}}.openrefine.tar.gz"
-C $(grep -l "{{.PROJECT}}" *.project/metadata.json | cut -d '/' -f 1)
.
- rm -rf ./*.project* workspace.json # delete temporary files
kill:
dir: ./{{.DIR}}
cmds:
- | # shut down OpenRefine immediately to save time and disk space
PID=$(lsof -t -i:{{.PORT}})
kill -9 $PID
while ps -p $PID > /dev/null; do sleep 1; done
- rm -rf ./*.project* workspace.json # delete temporary files
check:
desc: check OpenRefine log for any warnings and exit on error
dir: ./{{.DIR}}
cmds:
- | # find log file(s) and check for "exception" or "error"
if grep -i 'exception\|error' $(find . -name '*.log'); then
echo 1>&2 "log contains warnings!"; exit 1
fi