prologue.user 5.12 KB
Newer Older
Andrew McNab's avatar
Andrew McNab committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/usr/bin/python
#
#  prologue.user script for Machine/Job Features on Torque/PBS
#
#  Andrew McNab, University of Manchester.
#  Copyright (c) 2016. All rights reserved.
#
#  Redistribution and use in source and binary forms, with or
#  without modification, are permitted provided that the following
#  conditions are met:
#
#    o Redistributions of source code must retain the above
#      copyright notice, this list of conditions and the following
#      disclaimer. 
#    o Redistributions in binary form must reproduce the above
#      copyright notice, this list of conditions and the following
#      disclaimer in the documentation and/or other materials
#      provided with the distribution. 
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
#  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
#  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
#  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
#  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
#  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
#  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
#  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
#  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
#  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
#  POSSIBILITY OF SUCH DAMAGE.
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

#  Create $JOBFEATURES files following the Machine/Job Features 
#  specification in HSF-TN-2016-02
#
#  This script creates the $JOBFEATURES directory at 
#  $HOME/jobfeatures-$PBS_JOBID and attempts to populate it
#  from Torque/PBS information and from 
#  $MACHINEFEATURES=/etc/machinefeatures
#
#  If the following variables are present in /var/run/mjf or 
#  /etc/sysconfig/mjf then they are preferred:
#
#  - hs06_job
#  - allocated_cpu
#  - wall_limit_secs
#  - cpu_limit_secs
#  - max_rss_bytes
#  - max_swap_bytes
#  - scratch_limit_bytes
Andrew McNab's avatar
Andrew McNab committed
52
53
54
#

import os
Andrew McNab's avatar
Andrew McNab committed
55
import re
Andrew McNab's avatar
Andrew McNab committed
56
57
58
import sys
import time

59
homeDir = os.path.expanduser('~')
Andrew McNab's avatar
Andrew McNab committed
60

Andrew McNab's avatar
Andrew McNab committed
61
job_id = sys.argv[1]
Andrew McNab's avatar
Andrew McNab committed
62

63
jobfeaturesDir = homeDir + '/jobfeatures-' + job_id
Andrew McNab's avatar
Andrew McNab committed
64
65
66
os.mkdir(jobfeaturesDir)
open(jobfeaturesDir + '/job_id', 'w').write(job_id)

67
jobfeatures = {}
Andrew McNab's avatar
Andrew McNab committed
68
jobfeatures['allocated_cpu'] = 1
Andrew McNab's avatar
Andrew McNab committed
69
70
  
try:
Andrew McNab's avatar
Andrew McNab committed
71
  hs06 = float(open('/etc/machinefeatures/hs06','r').readline())
Andrew McNab's avatar
Andrew McNab committed
72
except:
Andrew McNab's avatar
Andrew McNab committed
73
  hs06 = None
Andrew McNab's avatar
Andrew McNab committed
74
75

try:
Andrew McNab's avatar
Andrew McNab committed
76
  total_cpu = int(open('/etc/machinefeatures/total_cpu','r').readline())
Andrew McNab's avatar
Andrew McNab committed
77
except:
Andrew McNab's avatar
Andrew McNab committed
78
  total_cpu = None
Andrew McNab's avatar
Andrew McNab committed
79
  
Andrew McNab's avatar
Andrew McNab committed
80
81
if hs06 and total_cpu:
  # Simple pro-rata allocation of total hs06 depending on processors for this job
82
83
84
85
86
87
88
  hs06_job = (jobfeatures['allocated_cpu'] * hs06) / total_cpu
else:
  hs06_job = None

jobstart_secs = int(time.time())
open(jobfeaturesDir + '/jobstart_secs', 'w').write(str(jobstart_secs))

Andrew McNab's avatar
Andrew McNab committed
89
# Examine the 5th argument, for resource limits
Andrew McNab's avatar
Andrew McNab committed
90
91
92
93
94
95
96
try:
  # if just 1 processor-per-node then "nodes=1", but if 8, say, then "nodes=1:ppn=8"
  ppnMatchObject = re.search('nodes=[0-9]*:ppn=([0-9]*)', sys.argv[5])
  jobfeatures['allocated_cpu'] = int(ppnMatchObject.group(1))
except:
  pass

Andrew McNab's avatar
Andrew McNab committed
97
98
99
try:
  wallMatchObject = re.search('walltime=([0-9]*):([0-9]*):([0-9]*)', sys.argv[5])
  jobfeatures['wall_limit_secs'] = int(wallMatchObject.group(1)) * 3600 + int(wallMatchObject.group(2)) * 60 + int(wallMatchObject.group(3))
Andrew McNab's avatar
Andrew McNab committed
100
101
102
except:
  pass

103
try:
Andrew McNab's avatar
Andrew McNab committed
104
105
106
107
108
109
110
111
112
113
114
115
  cpuMatchObject = re.search('cput=([0-9]*):([0-9]*):([0-9]*)', sys.argv[5])
  jobfeatures['cpu_limit_secs'] = int(cpuMatchObject.group(1)) * 3600 + int(cpuMatchObject.group(2)) * 60 + int(cpuMatchObject.group(3))
except:
  pass
  
try:
  rssMatchObject = re.search('mem=([0-9]*)([a-z]*)', sys.argv[5])
  rssInt  = int(rssMatchObject.group(1))
  rssUnit = rssMatchObject.group(2)
except:
  pass
else:    
Andrew McNab's avatar
Sync    
Andrew McNab committed
116
  # Safer to assume powers of 1000 rather than 1024
Andrew McNab's avatar
Andrew McNab committed
117
118
119
120
121
122
123
124
125
126
  if rssUnit == 'gb':
    jobfeatures['max_rss_bytes'] = rssInt * 1000000000
  elif rssUnit == 'mb':
    jobfeatures['max_rss_bytes'] = rssInt * 1000000
  elif rssUnit == 'kb':
    jobfeatures['max_rss_bytes'] = rssInt * 1000
  elif rssUnit == 'b':
    # Is this the right name??
    jobfeatures['max_rss_bytes'] = rssInt

Andrew McNab's avatar
Andrew McNab committed
127
# Values in /var/run/mjf take precedence
Andrew McNab's avatar
Andrew McNab committed
128
try:
Andrew McNab's avatar
Andrew McNab committed
129
  fromRun = open('/var/run/mjf','r').read()
130
131
132
except:
  fromRun = ''

Andrew McNab's avatar
Andrew McNab committed
133
# Also look in persistent /etc/sysconfig/mjf
134
try:
Andrew McNab's avatar
Andrew McNab committed
135
  fromSysconfig = open('/etc/sysconfig/mjf','r').read()
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
except:
  fromSysconfig = ''  

# hs06_job is a float so special handling
if 'hs06_job=' in fromRun + fromSysconfig:
  try:
    matchObject = re.search(key + "=([0-9.]*)", fromSysconfig + fromRun)
    hs06_job = float(matchObject.group(1))
  except:
    pass
    
  if hs06_job:
    # Has been defined here or before
    open(jobfeaturesDir + '/hs06_job', 'w').write('%.2f' % hs06_job)

# The rest are integers so handle with a list
for key in ['allocated_cpu', 'wall_limit_secs', 'cpu_limit_secs',
            'max_rss_bytes', 'max_swap_bytes', 'scratch_limit_bytes']:

  if key + '=' in fromRun + fromSysconfig:
    try:
      matchObject = re.search(key + "=([0-9]*)", fromSysconfig + fromRun)
      jobfeatures[key] = int(matchObject.group(1))
    except:
      pass

  if key in jobfeatures:
    open(jobfeaturesDir + '/' + key, 'w').write(str(jobfeatures[key]))
Andrew McNab's avatar
Andrew McNab committed
164