Commit 316c79e5 authored by Sergiu Weisz's avatar Sergiu Weisz
Browse files

Add error handling code for command execution in SLURM BQ

parent 37b6c37c
......@@ -30,6 +30,8 @@ import javax.naming.directory.SearchResult;
import alien.site.Functions;
import lia.util.process.ExternalProcess.ExitStatus;
/**
* @author maarten
*/
......@@ -268,7 +270,8 @@ public class ARC extends BatchQueue {
logger.info("Checking remaining proxy lifetime");
final String proxy_info_cmd = "voms-proxy-info -acsubject -actimeleft 2>&1";
final ArrayList<String> proxy_info_output = executeCommand(proxy_info_cmd);
ExitStatus exitStatus = executeCommand(proxy_info_cmd);
final ArrayList<String> proxy_info_output = getStdOut(exitStatus);
String dn_str = "";
String time_left_str = "";
......@@ -310,7 +313,8 @@ public class ARC extends BatchQueue {
ArrayList<String> proxy_renewal_output = null;
try {
proxy_renewal_output = executeCommand(proxy_renewal_cmd);
exitStatus = executeCommand(proxy_renewal_cmd);
proxy_renewal_output = getStdOut(exitStatus);
}
catch (final Exception e) {
logger.info(String.format("[LCG] Problem while executing command: %s", proxy_renewal_cmd));
......@@ -426,7 +430,8 @@ public class ARC extends BatchQueue {
submit_cmd.append(' ').append(s);
}
final ArrayList<String> output = executeCommand(submit_cmd.toString());
ExitStatus exitStatus = executeCommand(submit_cmd.toString());
final ArrayList<String> output = getStdOut(exitStatus);
if (logger.isLoggable(Level.INFO)) {
for (final String line : output) {
......@@ -657,7 +662,8 @@ public class ARC extends BatchQueue {
for (final String suffix : suffixes) {
final String f = prefix + suffix;
final String cmd = String.format("test ! -e %s || mv %s %s.%d", f, f, f, Integer.valueOf(cDay));
final ArrayList<String> output = executeCommand(cmd);
ExitStatus exitStatus = executeCommand(cmd);
final ArrayList<String> output = getStdOut(exitStatus);
for (final String line : output) {
logger.info(line);
......
package alien.site.batchqueue;
import java.io.BufferedReader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.ArrayList;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import lia.util.process.ExternalProcess.ExitStatus;
import utils.ProcessWithTimeout;
import java.util.logging.Level;
......@@ -56,8 +57,8 @@ public abstract class BatchQueue {
* @param cmd
* @return the output of the given command, one array entry per line
*/
public ArrayList<String> executeCommand(String cmd) {
ArrayList<String> proc_output = new ArrayList<>();
public ExitStatus executeCommand(String cmd) {
ExitStatus exitStatus = null;
logger.info("Executing: " + cmd);
......@@ -115,22 +116,23 @@ public abstract class BatchQueue {
pTimeout.waitFor(60, TimeUnit.SECONDS);
final ExitStatus exitStatus = pTimeout.getExitStatus();
exitStatus = pTimeout.getExitStatus();
logger.info("Process exit status: " + exitStatus.getExecutorFinishStatus());
final BufferedReader reader = new BufferedReader(new StringReader(exitStatus.getStdOut()));
String output_str;
while ((output_str = reader.readLine()) != null) {
proc_output.add(output_str.trim());
}
}
catch (final Throwable t) {
logger.log(Level.WARNING, "Exception executing command: " + cmd, t);
}
return proc_output;
return exitStatus;
}
static ArrayList<String> getStdOut(ExitStatus exitStatus) {
return new ArrayList<String>(Arrays.asList(exitStatus.getStdOut().split("\n")).stream().map(String::trim).collect(Collectors.toList()));
}
static ArrayList<String> getStdErr(ExitStatus exitStatus) {
return new ArrayList<String>(Arrays.asList(exitStatus.getStdOut().split("\n")).stream().map(String::trim).collect(Collectors.toList()));
}
/**
......
......@@ -19,6 +19,8 @@ import java.util.regex.Pattern;
import alien.site.Functions;
import lia.util.process.ExternalProcess.ExitStatus;
/**
* @author mmmartin
*/
......@@ -258,7 +260,8 @@ public class HTCONDOR extends BatchQueue {
logger.info("Checking remaining proxy lifetime");
final String proxy_info_cmd = "voms-proxy-info -acsubject -actimeleft 2>&1";
final ArrayList<String> proxy_info_output = executeCommand(proxy_info_cmd);
ExitStatus exitStatus = executeCommand(proxy_info_cmd);
final ArrayList<String> proxy_info_output = getStdOut(exitStatus);
String dn_str = "";
String time_left_str = "";
......@@ -300,7 +303,8 @@ public class HTCONDOR extends BatchQueue {
ArrayList<String> proxy_renewal_output = null;
try {
proxy_renewal_output = executeCommand(proxy_renewal_cmd);
exitStatus = executeCommand(proxy_renewal_cmd);
proxy_renewal_output = getStdOut(exitStatus);
}
catch (final Exception e) {
logger.info(String.format("[LCG] Problem while executing command: %s", proxy_renewal_cmd));
......@@ -465,7 +469,8 @@ public class HTCONDOR extends BatchQueue {
}
final String submit_cmd = submitCmd + " " + submitArgs + " " + submit_file;
final ArrayList<String> output = executeCommand(submit_cmd);
final ExitStatus exitStatus = executeCommand(submit_cmd);
final ArrayList<String> output = getStdOut(exitStatus);
for (final String line : output) {
final String trimmed_line = line.trim();
......@@ -530,7 +535,8 @@ public class HTCONDOR extends BatchQueue {
final String fmt = (local_pool != null) ? " -format " + local_pool : "";
final String cmd = "condor_q -const 'JobStatus < 3' -af JobStatus" +
fmt + " GridResource || (echo " + bad + " x; exit 1)";
final ArrayList<String> job_list = executeCommand(cmd);
final ExitStatus exitStatus = executeCommand(cmd);
final ArrayList<String> job_list = getStdOut(exitStatus);
tot_running = tot_waiting = 0;
......
......@@ -21,6 +21,9 @@ import java.util.logging.Logger;
import alien.site.Functions;
import lazyj.Utils;
import lia.util.process.ExternalProcess.ExitStatus;
import lia.util.process.ExternalProcess.ExecutorFinishStatus;
/**
*
*/
......@@ -243,7 +246,8 @@ public class SLURM extends BatchQueue {
}
final String cmd = "cat " + this.temp_file.getAbsolutePath() + " | " + this.submitCmd + " " + this.submitArgs;
final ArrayList<String> output = executeCommand(cmd);
final ExitStatus exitStatus = executeCommand(cmd);
final ArrayList<String> output = getStdOut(exitStatus);
for (final String line : output) {
final String trimmed_line = line.trim();
this.logger.info(trimmed_line);
......@@ -256,7 +260,12 @@ public class SLURM extends BatchQueue {
@Override
public int getNumberActive() {
final String status = "R,S,CG";
final ArrayList<String> output_list = this.executeCommand(statusCmd + " -t " + status + " " + statusArgs);
final ExitStatus exitStatus = executeCommand(statusCmd + " -t " + status + " " + statusArgs);
final ArrayList<String> output_list = getStdOut(exitStatus);
if (exitStatus.getExecutorFinishStatus() != ExecutorFinishStatus.NORMAL)
return -1;
return output_list.size();
}
......@@ -266,15 +275,22 @@ public class SLURM extends BatchQueue {
@Override
public int getNumberQueued() {
final String status = "PD,CF";
final ArrayList<String> output_list = this.executeCommand(statusCmd + " -t " + status + " " + statusArgs);
final ExitStatus exitStatus = executeCommand(statusCmd + " -t " + status + " " + statusArgs);
final ArrayList<String> output_list = getStdOut(exitStatus);
if (exitStatus.getExecutorFinishStatus() != ExecutorFinishStatus.NORMAL)
return -1;
return output_list.size();
}
@Override
public int kill() {
final ExitStatus exitStatus;
ArrayList<String> kill_cmd_output = null;
try {
kill_cmd_output = executeCommand(this.killCmd);
exitStatus = executeCommand(this.killCmd);
kill_cmd_output = getStdOut(exitStatus);
}
catch (final Exception e) {
this.logger.info(String.format("[SLURM] Prolem while executing command: %s", this.killCmd));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment