Commit 1a489d15 authored by Georgios Bitzes's avatar Georgios Bitzes
Browse files

Add node health status to output of raft-info and quarkdb-info

parent 8555f778
Pipeline #1105640 passed with stages
in 56 minutes and 25 seconds
......@@ -162,7 +162,7 @@ QuarkDBInfo QuarkDBNode::info() {
return {configuration.getMode(), configuration.getDatabase(),
configuration.getConfigurationPath(),
VERSION_FULL_STRING, SSTR(ROCKSDB_MAJOR << "." << ROCKSDB_MINOR << "." << ROCKSDB_PATCH),
SSTR(XrdVERSION),
SSTR(XrdVERSION), chooseWorstHealth(shard->getHealth().getIndicators()),
shard->monitors(), std::chrono::duration_cast<std::chrono::seconds>(bootEnd - bootStart).count(), std::chrono::duration_cast<std::chrono::seconds>(std::chrono::steady_clock::now() - bootEnd).count()
};
}
......@@ -175,6 +175,7 @@ std::vector<std::string> QuarkDBInfo::toVector() const {
ret.emplace_back(SSTR("QUARKDB-VERSION " << version));
ret.emplace_back(SSTR("ROCKSDB-VERSION " << rocksdbVersion));
ret.emplace_back(SSTR("XROOTD-HEADERS " << xrootdHeaders));
ret.emplace_back(SSTR("NODE-HEALTH " << healthStatusAsString(nodeHealthStatus)));
ret.emplace_back(SSTR("MONITORS " << monitors));
ret.emplace_back(SSTR("BOOT-TIME " << bootTime << " (" << formatTime(std::chrono::seconds(bootTime)) << ")"));
ret.emplace_back(SSTR("UPTIME " << uptime << " (" << formatTime(std::chrono::seconds(uptime)) << ")"));
......
......@@ -30,6 +30,7 @@
#include "Configuration.hh"
#include "raft/RaftTimeouts.hh"
#include "auth/AuthenticationDispatcher.hh"
#include "health/HealthIndicator.hh"
namespace quarkdb {
......@@ -40,6 +41,8 @@ struct QuarkDBInfo {
std::string version;
std::string rocksdbVersion;
std::string xrootdHeaders;
HealthStatus nodeHealthStatus;
size_t monitors;
int64_t bootTime;
int64_t uptime;
......
......@@ -30,6 +30,7 @@
#include "redis/LeaseFilter.hh"
#include "utils/ScopedAdder.hh"
#include "utils/VectorUtils.hh"
#include "Version.hh"
using namespace quarkdb;
......@@ -117,6 +118,26 @@ LinkStatus Shard::dispatch(Connection *conn, Transaction &transaction) {
return dispatcher->dispatch(conn, transaction);
}
NodeHealth Shard::getHealth() {
NodeHealth nodeHealth;
InFlightRegistration registration(inFlightTracker);
if(!registration.ok()) {
std::vector<HealthIndicator> indicators;
indicators.emplace_back(HealthStatus::kRed, "BACKEND-GROUP-ATTACHED", "No");
return NodeHealth(VERSION_FULL_STRING, indicators);
}
if(standaloneGroup) {
return standaloneGroup->getHealth();
}
else if(raftGroup) {
return raftGroup->dispatcher()->getHealth();
}
qdb_throw("should never reach here");
}
LinkStatus Shard::dispatch(Connection *conn, RedisRequest &req) {
commandMonitor.broadcast(conn->describe(), req);
......@@ -223,22 +244,7 @@ LinkStatus Shard::dispatch(Connection *conn, RedisRequest &req) {
}
case RedisCommand::QUARKDB_HEALTH: {
if(req.size() != 1) return conn->errArgs(req[0]);
InFlightRegistration registration(inFlightTracker);
if(!registration.ok()) {
return conn->err("unavailable");
}
NodeHealth nodeHealth;
if(standaloneGroup) {
nodeHealth = standaloneGroup->getHealth();
}
else if(raftGroup) {
nodeHealth = raftGroup->dispatcher()->getHealth();
}
return conn->raw(Formatter::nodeHealth(nodeHealth));
return conn->raw(Formatter::nodeHealth(getHealth()));
}
case RedisCommand::COMMAND_STATS: {
if(req.size() != 1) return conn->errArgs(req[0]);
......
......@@ -29,6 +29,7 @@
#include "Configuration.hh"
#include "redis/CommandMonitor.hh"
#include "utils/InFlightTracker.hh"
#include "health/HealthIndicator.hh"
namespace quarkdb {
......@@ -46,6 +47,7 @@ public:
virtual LinkStatus dispatch(Connection *conn, Transaction &transaction) override final;
virtual void notifyDisconnect(Connection *conn) override final {}
size_t monitors() { return commandMonitor.size(); }
NodeHealth getHealth();
private:
void detach();
......
......@@ -28,6 +28,7 @@
#include "utils/TimeFormatting.hh"
#include "Common.hh"
#include "utils/Macros.hh"
#include "health/HealthIndicator.hh"
#include "Utils.hh"
namespace quarkdb {
......@@ -339,6 +340,7 @@ struct RaftInfo {
RaftClusterID clusterID;
RaftServer myself;
RaftServer leader;
HealthStatus nodeHealthStatus;
LogIndex membershipEpoch;
std::vector<RaftServer> nodes;
std::vector<RaftServer> observers;
......@@ -369,7 +371,7 @@ struct RaftInfo {
ret.push_back(SSTR("MYSELF " << myself.toString()));
ret.push_back(SSTR("VERSION " << myVersion));
ret.push_back(SSTR("STATUS " << statusToString(status)));
ret.push_back(SSTR("NODE-HEALTH " << healthStatusAsString(nodeHealthStatus)));
ret.push_back("----------");
ret.push_back(SSTR("MEMBERSHIP-EPOCH " << membershipEpoch));
ret.push_back(SSTR("NODES " << serializeNodes(nodes)));
......
......@@ -705,8 +705,9 @@ RaftInfo RaftDispatcher::info() {
RaftStateSnapshotPtr snapshot = state.getSnapshot();
RaftMembership membership = journal.getMembership();
ReplicationStatus replicationStatus = replicator.getStatus();
HealthStatus nodeHealthStatus = chooseWorstHealth(getHealth().getIndicators());
return {journal.getClusterID(), state.getMyself(), snapshot->leader, membership.epoch, membership.nodes, membership.observers, snapshot->term, journal.getLogStart(),
return {journal.getClusterID(), state.getMyself(), snapshot->leader, nodeHealthStatus, membership.epoch, membership.nodes, membership.observers, snapshot->term, journal.getLogStart(),
journal.getLogSize(), snapshot->status, journal.getCommitIndex(), stateMachine.getLastApplied(), writeTracker.size(),
std::chrono::duration_cast<std::chrono::seconds>(std::chrono::steady_clock::now() - snapshot->timeCreated).count(),
replicationStatus, VERSION_FULL_STRING
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment