-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathxgc_syslog.cori
executable file
·121 lines (109 loc) · 4.88 KB
/
xgc_syslog.cori
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/bin/csh
# XGC cori syslog script:
# xgc_syslog.cori <sampling interval (in seconds)> <number of processes> <job identifier> <run directory> <output file name> <timing directory> <output directory> <estimate remaining time: 0/1>
set sample_interval = $1
set nprocess = $2
set jid = $3
set run = $4
set outfile = $5
set timing = $6
set dir = $7
set est_remaining = $8
# wait until job mapping information is output before saving output file
# Target length was determined empirically, so it may need to be adjusted in the future.
# set nnodes = `sqs -f $jid | grep -F NumNodes | sed 's/ *NumNodes=\([0-9]*\) .*/\1/' `
# @ target_lines = 150 + $nnodes
@ target_lines = 150 + $nprocess
while (! -e $run/$outfile)
sleep 30
end
set outlth = `wc \-l $run/$outfile | sed 's/ *\([0-9]*\) *.*/\1/' `
while ($outlth < $target_lines)
sleep 20
set outlth = `wc \-l $run/$outfile | sed 's/ *\([0-9]*\) *.*/\1/' `
end
set TimeLimit = `sqs -f $jid | grep -F TimeLimit | sed 's/^ *RunTime=.*TimeLimit=\([0-9]*:[0-9]*:[0-9]*\) .*/\1/' `
set limit_hours = `echo $TimeLimit | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\1/' `
set limit_mins = `echo $TimeLimit | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\2/' `
set limit_secs = `echo $TimeLimit | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\3/' `
if ("X$limit_hours" == "X") set limit_hours = 0
if ("X$limit_mins" == "X") set limit_mins = 0
if ("X$limit_secs" == "X") set limit_secs = 0
@ limit = 3600 * $limit_hours + 60 * $limit_mins + $limit_secs
set RunTime = `sqs -f $jid | grep -F RunTime | sed 's/^ *RunTime=\([0-9]*:[0-9]*:[0-9]*\) .*/\1/' `
set runt_hours = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\1/' `
set runt_mins = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\2/' `
set runt_secs = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\3/' `
if ("X$runt_hours" == "X") set runt_hours = 0
if ("X$runt_mins" == "X") set runt_mins = 0
if ("X$runt_secs" == "X") set runt_secs = 0
@ runt = 3600 * $runt_hours + 60 * $runt_mins + $runt_secs
@ remaining = $limit - $runt
if (-d $dir) then
/bin/cp --preserve=timestamps $run/$outfile $dir/$outfile.$jid.$remaining
endif
if ($est_remaining > 0) then
# (try to) wait until application code has initialized MPI before recording
# remaining time for use in early termination logic
set sleep_counter = 0
@ remaining_tenth = $remaining / 10
while (! -e $run/xgc_mpi_init)
sleep 10
@ sleep_counter = $sleep_counter + 10
if ($sleep_counter > $remaining_tenth) then
break
endif
end
if (-e $run/xgc_mpi_init) then
set src = 4
else
set src = 3
endif
set RunTime = `sqs -f $jid | grep -F RunTime | sed 's/^ *RunTime=\([0-9]*:[0-9]*:[0-9]*\) .*/\1/' `
set runt_hours = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\1/' `
set runt_mins = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\2/' `
set runt_secs = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\3/' `
if ("X$runt_hours" == "X") set runt_hours = 0
if ("X$runt_mins" == "X") set runt_mins = 0
if ("X$runt_secs" == "X") set runt_secs = 0
@ runt = 3600 * $runt_hours + 60 * $runt_mins + $runt_secs
@ remaining = $limit - $runt
cat > $run/Walltime.Remaining << EOF1
&rem_param
rem_walltime = $remaining
rem_walltime_src = $src
/
EOF1
endif
if ($sample_interval > 0) then
while (($remaining > 0) && ($remaining < $limit))
echo "Wallclock time remaining: $remaining" >> $dir/$outfile.$jid.step
grep -Fa "step," $run/$outfile | tail -n 6 >> $dir/$outfile.$jid.step
## grep -a -i "step,ratio" $run/$outfile | tail > $dir/$outfile.$jid.step.$remaining
# tail -n 10 $run/$outfile > $dir/$outfile.$jid.step.$remaining
/bin/cp --preserve=timestamps -u $timing/* $dir
squeue -t R -o "%.10i %.15P %.20j %.10u %.7a %.2t %.6D %.8C %.10M %.10l" > $dir/squeuef.$jid.$remaining
squeue -s | grep -v -F extern > $dir/squeues.$jid.$remaining
chmod -R a+rX $dir
chmod -R g+w $dir
# sleep $sample_interval
set sleep_remaining = $sample_interval
while ($sleep_remaining > 120)
sleep 120
@ sleep_remaining = $sleep_remaining - 120
end
sleep $sleep_remaining
set RunTime = `sqs -f $jid | grep -F RunTime | sed 's/^ *RunTime=\([0-9]*:[0-9]*:[0-9]*\) .*/\1/' `
set runt_hours = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\1/' `
set runt_mins = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\2/' `
set runt_secs = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\3/' `
if ("X$runt_hours" == "X") set runt_hours = 0
if ("X$runt_mins" == "X") set runt_mins = 0
if ("X$runt_secs" == "X") set runt_secs = 0
@ runt = 3600 * $runt_hours + 60 * $runt_mins + $runt_secs
@ remaining = $limit - $runt
end
endif
if (-e $run/syslog_jobid.$jid.X) then
/bin/rm -f $run/syslog_jobid.$jid.X
endif