prologue.user 5.14 KB
Newer Older
Andrew McNab's avatar
Andrew McNab committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/usr/bin/python
#
#  prologue.user script for Machine/Job Features on Torque/PBS
#
#  Andrew McNab, University of Manchester.
#  Copyright (c) 2016. All rights reserved.
#
#  Redistribution and use in source and binary forms, with or
#  without modification, are permitted provided that the following
#  conditions are met:
#
#    o Redistributions of source code must retain the above
#      copyright notice, this list of conditions and the following
#      disclaimer. 
#    o Redistributions in binary form must reproduce the above
#      copyright notice, this list of conditions and the following
#      disclaimer in the documentation and/or other materials
#      provided with the distribution. 
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
#  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
#  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
#  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
#  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
#  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
#  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
#  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
#  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
#  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
#  POSSIBILITY OF SUCH DAMAGE.
33
34
35
36
37

#  Create $JOBFEATURES files following the Machine/Job Features 
#  specification in HSF-TN-2016-02
#
#  This script creates the $JOBFEATURES directory at 
Andrew McNab's avatar
Andrew McNab committed
38
#  $PBS_O_HOME/jobfeatures-$PBS_JOBID and attempts to populate it
39
40
41
42
43
44
45
46
47
48
49
50
51
#  from Torque/PBS information and from 
#  $MACHINEFEATURES=/etc/machinefeatures
#
#  If the following variables are present in /var/run/mjf or 
#  /etc/sysconfig/mjf then they are preferred:
#
#  - hs06_job
#  - allocated_cpu
#  - wall_limit_secs
#  - cpu_limit_secs
#  - max_rss_bytes
#  - max_swap_bytes
#  - scratch_limit_bytes
Andrew McNab's avatar
Andrew McNab committed
52
53
54
#

import os
Andrew McNab's avatar
Andrew McNab committed
55
import re
Andrew McNab's avatar
Andrew McNab committed
56
57
58
import sys
import time

Andrew McNab's avatar
Andrew McNab committed
59
homeDir = os.environ['PBS_O_HOME']
Andrew McNab's avatar
Andrew McNab committed
60

Andrew McNab's avatar
Andrew McNab committed
61
job_id = sys.argv[1]
Andrew McNab's avatar
Andrew McNab committed
62

63
jobfeaturesDir = homeDir + '/jobfeatures-' + job_id
Andrew McNab's avatar
Andrew McNab committed
64
65
66
os.mkdir(jobfeaturesDir)
open(jobfeaturesDir + '/job_id', 'w').write(job_id)

67
jobfeatures = {}
Andrew McNab's avatar
Andrew McNab committed
68
jobfeatures['allocated_cpu'] = 1
Andrew McNab's avatar
Andrew McNab committed
69
  
70
71
72
jobstart_secs = int(time.time())
open(jobfeaturesDir + '/jobstart_secs', 'w').write(str(jobstart_secs))

Andrew McNab's avatar
Andrew McNab committed
73
# Examine the 5th argument, for resource limits
Andrew McNab's avatar
Andrew McNab committed
74
75
76
77
78
79
80
try:
  # if just 1 processor-per-node then "nodes=1", but if 8, say, then "nodes=1:ppn=8"
  ppnMatchObject = re.search('nodes=[0-9]*:ppn=([0-9]*)', sys.argv[5])
  jobfeatures['allocated_cpu'] = int(ppnMatchObject.group(1))
except:
  pass

Andrew McNab's avatar
Andrew McNab committed
81
82
83
try:
  wallMatchObject = re.search('walltime=([0-9]*):([0-9]*):([0-9]*)', sys.argv[5])
  jobfeatures['wall_limit_secs'] = int(wallMatchObject.group(1)) * 3600 + int(wallMatchObject.group(2)) * 60 + int(wallMatchObject.group(3))
Andrew McNab's avatar
Andrew McNab committed
84
85
86
except:
  pass

87
try:
Andrew McNab's avatar
Andrew McNab committed
88
89
90
91
92
93
94
95
96
97
98
99
  cpuMatchObject = re.search('cput=([0-9]*):([0-9]*):([0-9]*)', sys.argv[5])
  jobfeatures['cpu_limit_secs'] = int(cpuMatchObject.group(1)) * 3600 + int(cpuMatchObject.group(2)) * 60 + int(cpuMatchObject.group(3))
except:
  pass
  
try:
  rssMatchObject = re.search('mem=([0-9]*)([a-z]*)', sys.argv[5])
  rssInt  = int(rssMatchObject.group(1))
  rssUnit = rssMatchObject.group(2)
except:
  pass
else:    
Andrew McNab's avatar
Sync    
Andrew McNab committed
100
  # Safer to assume powers of 1000 rather than 1024
Andrew McNab's avatar
Andrew McNab committed
101
102
103
104
105
106
107
108
109
110
  if rssUnit == 'gb':
    jobfeatures['max_rss_bytes'] = rssInt * 1000000000
  elif rssUnit == 'mb':
    jobfeatures['max_rss_bytes'] = rssInt * 1000000
  elif rssUnit == 'kb':
    jobfeatures['max_rss_bytes'] = rssInt * 1000
  elif rssUnit == 'b':
    # Is this the right name??
    jobfeatures['max_rss_bytes'] = rssInt

Andrew McNab's avatar
Andrew McNab committed
111
# Values in /var/run/mjf take precedence
Andrew McNab's avatar
Andrew McNab committed
112
try:
Andrew McNab's avatar
Andrew McNab committed
113
  fromRun = open('/var/run/mjf','r').read()
114
115
116
except:
  fromRun = ''

Andrew McNab's avatar
Andrew McNab committed
117
# Also look in persistent /etc/sysconfig/mjf
118
try:
Andrew McNab's avatar
Andrew McNab committed
119
  fromSysconfig = open('/etc/sysconfig/mjf','r').read()
120
121
122
except:
  fromSysconfig = ''  

Andrew McNab's avatar
Andrew McNab committed
123
# These are all integers so handle with a list
124
125
126
127
128
129
130
131
132
133
134
135
for key in ['allocated_cpu', 'wall_limit_secs', 'cpu_limit_secs',
            'max_rss_bytes', 'max_swap_bytes', 'scratch_limit_bytes']:

  if key + '=' in fromRun + fromSysconfig:
    try:
      matchObject = re.search(key + "=([0-9]*)", fromSysconfig + fromRun)
      jobfeatures[key] = int(matchObject.group(1))
    except:
      pass

  if key in jobfeatures:
    open(jobfeaturesDir + '/' + key, 'w').write(str(jobfeatures[key]))
Andrew McNab's avatar
Andrew McNab committed
136

Andrew McNab's avatar
Andrew McNab committed
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# Try to get/calculate hs06_job
hs06_job = None

if 'hs06_job=' in fromRun + fromSysconfig:
  try:
    matchObject = re.search("hs06_job=([0-9.]*)", fromSysconfig + fromRun)
    hs06_job = float(matchObject.group(1))
  except:
    pass

if not hs06_job:
  try:
    hs06 = float(open('/etc/machinefeatures/hs06','r').readline())
  except:
    hs06 = None

  try:
    total_cpu = int(open('/etc/machinefeatures/total_cpu','r').readline())
  except:
    total_cpu = None

  if hs06 and total_cpu:
    # Simple pro-rata allocation of total hs06 depending on processors for this job
    hs06_job = (jobfeatures['allocated_cpu'] * hs06) / total_cpu
    
if hs06_job:
  # We got it from somewhere
  open(jobfeaturesDir + '/hs06_job', 'w').write('%.2f' % hs06_job)