-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathxgc_syslog.theta
executable file
·109 lines (99 loc) · 4.61 KB
/
xgc_syslog.theta
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/bin/csh
# XGC theta syslog script:
# xgc_syslog.theta <sampling interval (in seconds)> <number of processes> <job identifier> <run directory> <output file name> <timing directory> <output directory> <estimate remaining time: 0/1>
set sample_interval = $1
set nprocess = $2
set jid = $3
set run = $4
set outfile = $5
set timing = $6
set dir = $7
set est_remaining = $8
# Wait until job mapping information is output before saving output file.
# Target length was determined empirically (more lines than number of MPICH environment variables
# plus number of lines for mapping information), so it may need to be adjusted in the future.
set nnodes = `qstat -l --header Nodes $jid | grep -F Nodes | sed 's/ *Nodes *: *\([0-9]*\)/\1/' `
@ target_lines = 135 + $nnodes
while (! -e $run/$outfile)
sleep 30
end
set outlth = `wc \-l $run/$outfile | sed 's/ *\([0-9]*\) *.*/\1/' `
while ($outlth < $target_lines)
sleep 20
set outlth = `wc \-l $run/$outfile | sed 's/ *\([0-9]*\) *.*/\1/' `
end
set TimeRemaining = `qstat -l --header TimeRemaining $jid | grep -F TimeRemaining | sed 's/^ *TimeRemaining *: *\([0-9]*:[0-9]*:[0-9]*\) */\1/' `
set rem_hours = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\1/' `
set rem_mins = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\2/' `
set rem_secs = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\3/' `
if ("X$rem_hours" == "X") set rem_hours = 0
if ("X$rem_mins" == "X") set rem_mins = 0
if ("X$rem_secs" == "X") set rem_secs = 0
@ remaining = 3600 * $rem_hours + 60 * $rem_mins + $rem_secs
if (-d $dir) then
/bin/cp --preserve=timestamps $run/$outfile $dir/$outfile.$jid.$remaining
xtnodestat > $dir/xtnodestat.$jid.$remaining
endif
if ($est_remaining > 0) then
# (try to) wait until application code has initialized MPI before recording
# remaining time for use in early termination logic
set sleep_counter = 0
@ remaining_tenth = $remaining / 10
while (! -e $run/xgc_mpi_init)
sleep 10
@ sleep_counter = $sleep_counter + 10
if ($sleep_counter > $remaining_tenth) then
break
endif
end
if (-e $run/xgc_mpi_init) then
set src = 4
else
set src = 3
endif
set TimeRemaining = `qstat -l --header TimeRemaining $jid | grep -F TimeRemaining | sed 's/^ *TimeRemaining *: *\([0-9]*:[0-9]*:[0-9]*\) */\1/' `
set rem_hours = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\1/' `
set rem_mins = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\2/' `
set rem_secs = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\3/' `
if ("X$rem_hours" == "X") set rem_hours = 0
if ("X$rem_mins" == "X") set rem_mins = 0
if ("X$rem_secs" == "X") set rem_secs = 0
@ remaining = 3600 * $rem_hours + 60 * $rem_mins + $rem_secs
cat > $run/Walltime.Remaining << EOF1
&rem_param
rem_walltime = $remaining
rem_walltime_src = $src
/
EOF1
endif
if ($sample_interval > 0) then
while ($remaining > 0)
echo "Wallclock time remaining: $remaining" >> $dir/$outfile.$jid.step
grep -Fa "step," $run/$outfile | tail -n 6 >> $dir/$outfile.$jid.step
# tail -n 10 $run/$outfile > $dir/$outfile.$jid.step.$remaining
/bin/cp --preserve=timestamps -u $timing/* $dir
xtnodestat > $dir/xtnodestat.$jid.$remaining
qstat --header JobID:State:Nodes:Location | grep -Fa -e "running" -e "State" -e "starting" -e "exiting" > $dir/qstatn.$jid.$remaining
qstat --header JobID:JobName:User:Project:WallTime:RunTime:TimeRemaining:Nodes:State:StartTime:attrs | grep -Fa -e "running" -e "State" -e "starting" -e "exiting" > $dir/qstatr.$jid.$remaining
chmod -R a+rX $dir
chmod -R g+w $dir
# sleep $sample_interval
set sleep_remaining = $sample_interval
while ($sleep_remaining > 120)
sleep 120
@ sleep_remaining = $sleep_remaining - 120
end
sleep $sleep_remaining
set TimeRemaining = `qstat -l --header TimeRemaining $jid | grep -F TimeRemaining | sed 's/^ *TimeRemaining *: *\([0-9]*:[0-9]*:[0-9]*\) */\1/' `
set rem_hours = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\1/' `
set rem_mins = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\2/' `
set rem_secs = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\3/' `
if ("X$rem_hours" == "X") set rem_hours = 0
if ("X$rem_mins" == "X") set rem_mins = 0
if ("X$rem_secs" == "X") set rem_secs = 0
@ remaining = 3600 * $rem_hours + 60 * $rem_mins + $rem_secs
end
endif
if (-e $run/syslog_jobid.$jid) then
/bin/rm -f $run/syslog_jobid.$jid
endif