prologue.user 7.49 KB
Newer Older
Andrew McNab's avatar
Andrew McNab committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/usr/bin/python
#
#  prologue.user script for Machine/Job Features on Torque/PBS
#
#  Andrew McNab, University of Manchester.
#  Copyright (c) 2016. All rights reserved.
#
#  Redistribution and use in source and binary forms, with or
#  without modification, are permitted provided that the following
#  conditions are met:
#
#    o Redistributions of source code must retain the above
#      copyright notice, this list of conditions and the following
#      disclaimer. 
#    o Redistributions in binary form must reproduce the above
#      copyright notice, this list of conditions and the following
#      disclaimer in the documentation and/or other materials
#      provided with the distribution. 
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
#  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
#  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
#  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
#  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
#  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
#  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
#  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
#  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
#  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
#  POSSIBILITY OF SUCH DAMAGE.
33
34
35
36
37

#  Create $JOBFEATURES files following the Machine/Job Features 
#  specification in HSF-TN-2016-02
#
#  This script creates the $JOBFEATURES directory at 
Andrew McNab's avatar
Andrew McNab committed
38
#  /tmp/mjf-$USER/jobfeatures-$PBS_JOBID and attempts to populate it
39
40
41
42
#  from Torque/PBS information and from 
#  $MACHINEFEATURES=/etc/machinefeatures
#
#  If the following variables are present in /var/run/mjf or 
Andrew McNab's avatar
Andrew McNab committed
43
44
#  /etc/sysconfig/mjf then they are used as defaults if not given
#  by Torque/PBS:
45
46
47
48
49
50
51
#
#  - allocated_cpu
#  - wall_limit_secs
#  - cpu_limit_secs
#  - max_rss_bytes
#  - max_swap_bytes
#  - scratch_limit_bytes
Andrew McNab's avatar
Andrew McNab committed
52
#  - hs06_job
53
#  - db12_job
Andrew McNab's avatar
Andrew McNab committed
54
55
56
57
58
#
# Values in /var/run/mjf are preferred over /etc/sysconfig/mjf.
#
# Additionally you can set mjf_tmp_dir in those files to use a 
# directory other than /tmp for the mjf-$USER directories.
Andrew McNab's avatar
Andrew McNab committed
59
60
61
#

import os
Andrew McNab's avatar
Andrew McNab committed
62
import re
Andrew McNab's avatar
Andrew McNab committed
63
import sys
Andrew McNab's avatar
Andrew McNab committed
64
import stat
Andrew McNab's avatar
Andrew McNab committed
65
66
import time

Andrew McNab's avatar
Andrew McNab committed
67
68
os.umask(0022)

Andrew McNab's avatar
Andrew McNab committed
69
70
71
72
try:
  job_id = sys.argv[1]
except:
  print 'JOB ID not given on command line'
Andrew McNab's avatar
Andrew McNab committed
73
  sys.exit(11)
Andrew McNab's avatar
Andrew McNab committed
74

Andrew McNab's avatar
Andrew McNab committed
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Values in /var/run/mjf take precedence
try:
  fromRun = open('/var/run/mjf','r').read()
except:
  fromRun = ''

# Also look in persistent /etc/sysconfig/mjf
try:
  fromSysconfig = open('/etc/sysconfig/mjf','r').read()
except:
  fromSysconfig = ''  

mjfTmpDir = '/tmp'

if 'mjf_tmp_dir=' in fromRun + fromSysconfig:
  try:
    matchObject = re.search("^mjf_tmp_dir=(.*)", fromRun + '\n' + fromSysconfig)
    mjfTmpDir = matchObject.group(1)
  except:
    pass

# First make sure the mjfTmpDir/mjf-$USER directory exists
try:
  os.mkdir(mjfTmpDir + '/mjf-' + sys.argv[2])
except:
  # Ok if exists already
  pass

try: 
  userDirStat = os.stat(mjfTmpDir + '/mjf-' + sys.argv[2])
except:
  print mjfTmpDir + '/mjf-' + sys.argv[2] + ' could not be created'
  sys.exit(12)

# Check it is owned by us
if userDirStat.st_uid != os.getuid() or userDirStat.st_gid != os.getgid():
  print mjfTmpDir + '/mjf-' + sys.argv[2] + ' has the wrong UID/GID'
  sys.exit(13)

# Ensure the right permissions
os.chmod(mjfTmpDir + '/mjf-' + sys.argv[2], 
         stat.S_IRWXU + stat.S_IRGRP + stat.S_IXGRP + stat.S_IROTH + stat.S_IXOTH)

# Now the directory for this job
jobfeaturesDir = mjfTmpDir + '/mjf-' + sys.argv[2] + '/jobfeatures-' + job_id
Andrew McNab's avatar
Andrew McNab committed
120

Andrew McNab's avatar
Andrew McNab committed
121
122
123
try:
  os.mkdir(jobfeaturesDir)
except:
Andrew McNab's avatar
Andrew McNab committed
124
125
126
127
  print 'Failed to create ' + jobfeaturesDir
  sys.exit(14)
#else:
#  os.chmod(jobfeaturesDir, stat.S_IRWXU + stat.S_IRGRP + stat.S_IXGRP + stat.S_IROTH + stat.S_IXOTH)
Andrew McNab's avatar
Andrew McNab committed
128
129
130

open(jobfeaturesDir + '/job_id', 'w').write(job_id)

131
jobfeatures = {}
Andrew McNab's avatar
Andrew McNab committed
132
jobfeatures['allocated_cpu'] = 1
Andrew McNab's avatar
Andrew McNab committed
133
  
134
135
136
jobstart_secs = int(time.time())
open(jobfeaturesDir + '/jobstart_secs', 'w').write(str(jobstart_secs))

Andrew McNab's avatar
Andrew McNab committed
137
138
139
140
141
142
143
144
145
146
147
# Get any defaults for these (integer) values
for key in ['allocated_cpu', 'wall_limit_secs', 'cpu_limit_secs',
            'max_rss_bytes', 'max_swap_bytes', 'scratch_limit_bytes']:

  if key + '=' in fromRun + fromSysconfig:
    try:
      matchObject = re.search(key + "=([0-9]*)", fromRun + '\n' + fromSysconfig)
      jobfeatures[key] = int(matchObject.group(1))
    except:
      pass

Andrew McNab's avatar
Andrew McNab committed
148
# Examine the 5th argument, for resource limits
Andrew McNab's avatar
Andrew McNab committed
149
150
151
152
153
154
155
try:
  # if just 1 processor-per-node then "nodes=1", but if 8, say, then "nodes=1:ppn=8"
  ppnMatchObject = re.search('nodes=[0-9]*:ppn=([0-9]*)', sys.argv[5])
  jobfeatures['allocated_cpu'] = int(ppnMatchObject.group(1))
except:
  pass

Andrew McNab's avatar
Andrew McNab committed
156
157
158
try:
  wallMatchObject = re.search('walltime=([0-9]*):([0-9]*):([0-9]*)', sys.argv[5])
  jobfeatures['wall_limit_secs'] = int(wallMatchObject.group(1)) * 3600 + int(wallMatchObject.group(2)) * 60 + int(wallMatchObject.group(3))
Andrew McNab's avatar
Andrew McNab committed
159
160
161
except:
  pass

162
try:
Andrew McNab's avatar
Andrew McNab committed
163
164
165
166
167
168
169
170
171
172
173
174
  cpuMatchObject = re.search('cput=([0-9]*):([0-9]*):([0-9]*)', sys.argv[5])
  jobfeatures['cpu_limit_secs'] = int(cpuMatchObject.group(1)) * 3600 + int(cpuMatchObject.group(2)) * 60 + int(cpuMatchObject.group(3))
except:
  pass
  
try:
  rssMatchObject = re.search('mem=([0-9]*)([a-z]*)', sys.argv[5])
  rssInt  = int(rssMatchObject.group(1))
  rssUnit = rssMatchObject.group(2)
except:
  pass
else:    
Andrew McNab's avatar
Sync    
Andrew McNab committed
175
  # Safer to assume powers of 1000 rather than 1024
Andrew McNab's avatar
Andrew McNab committed
176
177
178
179
180
181
182
183
184
185
  if rssUnit == 'gb':
    jobfeatures['max_rss_bytes'] = rssInt * 1000000000
  elif rssUnit == 'mb':
    jobfeatures['max_rss_bytes'] = rssInt * 1000000
  elif rssUnit == 'kb':
    jobfeatures['max_rss_bytes'] = rssInt * 1000
  elif rssUnit == 'b':
    # Is this the right name??
    jobfeatures['max_rss_bytes'] = rssInt

Andrew McNab's avatar
Andrew McNab committed
186
# Write out if these have been set from files or prologue.user arguments
187
188
189
190
191
for key in ['allocated_cpu', 'wall_limit_secs', 'cpu_limit_secs',
            'max_rss_bytes', 'max_swap_bytes', 'scratch_limit_bytes']:

  if key in jobfeatures:
    open(jobfeaturesDir + '/' + key, 'w').write(str(jobfeatures[key]))
Andrew McNab's avatar
Andrew McNab committed
192

Andrew McNab's avatar
Andrew McNab committed
193
194
195
196
197
# Try to get/calculate hs06_job
hs06_job = None

if 'hs06_job=' in fromRun + fromSysconfig:
  try:
Andrew McNab's avatar
Andrew McNab committed
198
    matchObject = re.search("hs06_job=([0-9.]*)", fromRun + '\n' + fromSysconfig)
Andrew McNab's avatar
Andrew McNab committed
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
    hs06_job = float(matchObject.group(1))
  except:
    pass

if not hs06_job:
  try:
    hs06 = float(open('/etc/machinefeatures/hs06','r').readline())
  except:
    hs06 = None

  try:
    total_cpu = int(open('/etc/machinefeatures/total_cpu','r').readline())
  except:
    total_cpu = None

  if hs06 and total_cpu:
    # Simple pro-rata allocation of total hs06 depending on processors for this job
    hs06_job = (jobfeatures['allocated_cpu'] * hs06) / total_cpu
    
if hs06_job:
  # We got it from somewhere
  open(jobfeaturesDir + '/hs06_job', 'w').write('%.2f' % hs06_job)

222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# Try to get/calculate db12_job
db12_job = None

if 'db12_job=' in fromRun + fromSysconfig:
  try:
    matchObject = re.search("db12_job=([0-9.]*)", fromRun + '\n' + fromSysconfig)
    db12_job = float(matchObject.group(1))
  except:
    pass

if not db12_job:
  try:
    db12 = float(open('/etc/machinefeatures/db12','r').readline())
  except:
    db12 = None

  try:
    total_cpu = int(open('/etc/machinefeatures/total_cpu','r').readline())
  except:
    total_cpu = None

  if db12 and total_cpu:
    # Simple pro-rata allocation of total db12 depending on processors for this job
    db12_job = (jobfeatures['allocated_cpu'] * db12) / total_cpu
    
if db12_job:
  # We got it from somewhere
  open(jobfeaturesDir + '/db12_job', 'w').write('%.2f' % db12_job)