Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
eos
QuarkDB
Commits
a65b8192
Commit
a65b8192
authored
Feb 21, 2020
by
Georgios Bitzes
Browse files
Implement ParanoidManifestChecker to try and catch potential MANIFEST corruption early
parent
7bbcef95
Pipeline
#1432887
failed with stages
in 91 minutes and 15 seconds
Changes
9
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/CMakeLists.txt
View file @
a65b8192
...
...
@@ -86,6 +86,7 @@ add_library(XrdQuarkDB SHARED
storage/KeyDescriptorBuilder.cc storage/KeyDescriptorBuilder.hh
storage/KeyLocators.hh
storage/LeaseInfo.hh
storage/ParanoidManifestChecker.cc storage/ParanoidManifestChecker.hh
storage/PatternMatching.hh
storage/Randomization.cc storage/Randomization.hh
storage/ReverseLocator.hh
...
...
src/StateMachine.cc
View file @
a65b8192
...
...
@@ -33,6 +33,8 @@
#include "storage/ExpirationEventIterator.hh"
#include "storage/ReverseLocator.hh"
#include "storage/InternalKeyParsing.hh"
#include "storage/ConsistencyScanner.hh"
#include "storage/ParanoidManifestChecker.hh"
#include "utils/IntToBinaryString.hh"
#include "utils/TimeFormatting.hh"
#include <sys/stat.h>
...
...
@@ -153,6 +155,7 @@ StateMachine::StateMachine(std::string_view f, bool write_ahead_log, bool bulk_l
ensureClockSanity
(
!
dirExists
);
retrieveLastApplied
();
manifestChecker
.
reset
(
new
ParanoidManifestChecker
(
filename
));
consistencyScanner
.
reset
(
new
ConsistencyScanner
(
*
this
));
}
...
...
@@ -184,6 +187,7 @@ void StateMachine::ensureClockSanity(bool justCreated) {
}
StateMachine
::~
StateMachine
()
{
manifestChecker
.
reset
();
consistencyScanner
.
reset
();
if
(
db
)
{
...
...
@@ -1879,10 +1883,10 @@ rocksdb::Status StateMachine::noop(LogIndex index) {
}
//------------------------------------------------------------------------------
// Return health information
about the state machin
e
// Return health information
regarding free spac
e
//------------------------------------------------------------------------------
std
::
vector
<
HealthIndicator
>
StateMachine
::
get
HealthIndicators
()
{
std
::
string
description
=
"FREE-SPACE
-SM
"
;
HealthIndicator
StateMachine
::
get
FreeSpaceHealth
()
{
std
::
string
description
=
"
SM-
FREE-SPACE"
;
struct
statfs
out
;
if
(
statfs
(
filename
.
c_str
(),
&
out
)
!=
0
)
{
...
...
@@ -1911,7 +1915,22 @@ std::vector<HealthIndicator> StateMachine::getHealthIndicators() {
status
=
chooseWorstHealth
(
status
,
HealthStatus
::
kYellow
);
}
return
{
HealthIndicator
(
status
,
description
,
SSTR
(
freeBytes
<<
" bytes ("
<<
percentFree
<<
"%)"
))
};
return
HealthIndicator
(
status
,
description
,
SSTR
(
freeBytes
<<
" bytes ("
<<
percentFree
<<
"%)"
));
}
//------------------------------------------------------------------------------
// Return health information about the state machine
//------------------------------------------------------------------------------
std
::
vector
<
HealthIndicator
>
StateMachine
::
getHealthIndicators
()
{
std
::
string
description
=
"SM-MANIFEST-TIMEDIFF"
;
HealthStatus
healthStatus
=
HealthStatus
::
kGreen
;
Status
status
=
manifestChecker
->
getLastStatus
();
if
(
!
status
.
ok
())
{
healthStatus
=
HealthStatus
::
kRed
;
}
return
{
getFreeSpaceHealth
(),
HealthIndicator
(
healthStatus
,
description
,
status
.
getMsg
())
};
}
rocksdb
::
Status
StateMachine
::
manualCompaction
()
{
...
...
src/StateMachine.hh
View file @
a65b8192
...
...
@@ -31,7 +31,6 @@
#include "utils/RequestCounter.hh"
#include "storage/KeyDescriptor.hh"
#include "storage/KeyLocators.hh"
#include "storage/ConsistencyScanner.hh"
#include "storage/KeyConstants.hh"
#include "storage/LeaseInfo.hh"
#include "health/HealthIndicator.hh"
...
...
@@ -42,6 +41,9 @@
namespace
quarkdb
{
class
ConsistencyScanner
;
class
ParanoidManifestChecker
;
enum
class
LeaseAcquisitionStatus
{
kKeyTypeMismatch
,
kAcquired
,
...
...
@@ -366,7 +368,7 @@ private:
std
::
mutex
writeMtx
;
std
::
unique_ptr
<
rocksdb
::
DB
>
db
;
std
::
unique_ptr
<
ParanoidManifestChecker
>
manifestChecker
;
std
::
unique_ptr
<
ConsistencyScanner
>
consistencyScanner
;
const
std
::
string
filename
;
...
...
@@ -375,6 +377,11 @@ private:
Timekeeper
timeKeeper
;
RequestCounter
requestCounter
;
//----------------------------------------------------------------------------
// Return health information regarding free space
//----------------------------------------------------------------------------
HealthIndicator
getFreeSpaceHealth
();
};
...
...
src/storage/ParanoidManifestChecker.cc
0 → 100644
View file @
a65b8192
// ----------------------------------------------------------------------
// File: ParanoidManifestChecker.cc
// Author: Georgios Bitzes - CERN
// ----------------------------------------------------------------------
/************************************************************************
* quarkdb - a redis-like highly available key-value store *
* Copyright (C) 2020 CERN/Switzerland *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>.*
************************************************************************/
#include "storage/ParanoidManifestChecker.hh"
#include "utils/DirectoryIterator.hh"
#include "utils/StringUtils.hh"
#include <sys/stat.h>
namespace
quarkdb
{
ParanoidManifestChecker
::
ParanoidManifestChecker
(
std
::
string_view
path
)
:
mPath
(
path
)
{
mThread
.
reset
(
&
ParanoidManifestChecker
::
main
,
this
);
}
void
ParanoidManifestChecker
::
main
(
ThreadAssistant
&
assistant
)
{
while
(
!
assistant
.
terminationRequested
())
{
Status
st
=
checkDB
(
mPath
);
if
(
!
st
.
ok
())
{
qdb_error
(
"Potential MANIFEST corruption for DB at "
<<
mPath
<<
"("
<<
st
.
getMsg
()
<<
")"
);
}
mLastStatus
.
set
(
st
);
assistant
.
wait_for
(
std
::
chrono
::
minutes
(
5
));
}
}
bool
operator
<
(
struct
timespec
&
one
,
struct
timespec
&
two
)
{
if
(
one
.
tv_sec
==
two
.
tv_sec
)
{
return
one
.
tv_nsec
<
two
.
tv_nsec
;
}
return
one
.
tv_sec
<
two
.
tv_sec
;
}
Status
ParanoidManifestChecker
::
checkDB
(
std
::
string_view
path
)
{
DirectoryIterator
iter
(
path
);
struct
dirent
*
entry
=
nullptr
;
struct
timespec
manifestMtime
;
struct
timespec
sstMtime
;
while
((
entry
=
iter
.
next
()))
{
struct
stat
statbuf
;
if
(
stat
(
SSTR
(
path
<<
"/"
<<
entry
->
d_name
).
c_str
(),
&
statbuf
)
==
0
)
{
if
(
StringUtils
::
startsWith
(
entry
->
d_name
,
"MANIFEST"
)
&&
manifestMtime
<
statbuf
.
st_mtim
)
{
manifestMtime
=
statbuf
.
st_mtim
;
}
if
(
StringUtils
::
endsWith
(
entry
->
d_name
,
".sst"
)
&&
sstMtime
<
statbuf
.
st_mtim
)
{
sstMtime
=
statbuf
.
st_mtim
;
}
}
}
int
secDiff
=
sstMtime
.
tv_sec
-
manifestMtime
.
tv_sec
;
std
::
string
diff
=
SSTR
(
secDiff
<<
" sec"
);
// 1 hour should be more than enough (?)
if
(
secDiff
>=
3600
)
{
return
Status
(
1
,
diff
);
}
return
Status
(
0
,
diff
);
}
//------------------------------------------------------------------------------
// Get last status
//------------------------------------------------------------------------------
Status
ParanoidManifestChecker
::
getLastStatus
()
const
{
return
mLastStatus
.
get
();
}
}
src/storage/ParanoidManifestChecker.hh
0 → 100644
View file @
a65b8192
// ----------------------------------------------------------------------
// File: ParanoidManifestChecker.hh
// Author: Georgios Bitzes - CERN
// ----------------------------------------------------------------------
/************************************************************************
* quarkdb - a redis-like highly available key-value store *
* Copyright (C) 2020 CERN/Switzerland *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>.*
************************************************************************/
#ifndef QUARKDB_PARANOID_MANIFEST_CHECKER_HH
#define QUARKDB_PARANOID_MANIFEST_CHECKER_HH
#include "utils/AssistedThread.hh"
#include <rocksdb/db.h>
#include <string_view>
#include "Status.hh"
#include "utils/Synchronized.hh"
namespace
quarkdb
{
//------------------------------------------------------------------------------
// We've observed in the past RocksDB corrupting its MANIFEST file, while new
// SST files were being written.
//
// This is an attempt at detecting this problem early, but we're not sure if
// it works, or how useful it might be.
//------------------------------------------------------------------------------
class
ParanoidManifestChecker
{
public:
//----------------------------------------------------------------------------
// Constructor receiving the rocksdb path
//----------------------------------------------------------------------------
ParanoidManifestChecker
(
std
::
string_view
path
);
//----------------------------------------------------------------------------
// Main thread checking the status on regular intervals
//----------------------------------------------------------------------------
void
main
(
ThreadAssistant
&
assistant
);
//----------------------------------------------------------------------------
// Check the given DB path
//----------------------------------------------------------------------------
static
Status
checkDB
(
std
::
string_view
path
);
//----------------------------------------------------------------------------
// Get last status
//----------------------------------------------------------------------------
Status
getLastStatus
()
const
;
private:
AssistedThread
mThread
;
std
::
string
mPath
;
Synchronized
<
Status
>
mLastStatus
;
};
}
#endif
src/utils/DirectoryIterator.cc
View file @
a65b8192
...
...
@@ -30,7 +30,7 @@ using namespace quarkdb;
//------------------------------------------------------------------------------
// Construct iterator object on the given path - must be a directory.
//------------------------------------------------------------------------------
DirectoryIterator
::
DirectoryIterator
(
const
std
::
string
&
mypath
)
DirectoryIterator
::
DirectoryIterator
(
std
::
string
_view
mypath
)
:
path
(
mypath
),
reachedEnd
(
false
),
dir
(
nullptr
)
{
dir
=
opendir
(
path
.
c_str
());
...
...
src/utils/DirectoryIterator.hh
View file @
a65b8192
...
...
@@ -26,6 +26,7 @@
#include <dirent.h>
#include <string>
#include <string_view>
namespace
quarkdb
{
...
...
@@ -35,7 +36,7 @@ public:
//----------------------------------------------------------------------------
// Construct iterator object on the given path - must be a directory.
//----------------------------------------------------------------------------
DirectoryIterator
(
const
std
::
string
&
path
);
DirectoryIterator
(
std
::
string
_view
path
);
//----------------------------------------------------------------------------
// Destructor
...
...
test/state-machine.cc
View file @
a65b8192
...
...
@@ -26,6 +26,7 @@
#include "storage/ReverseLocator.hh"
#include "storage/PatternMatching.hh"
#include "storage/ExpirationEventIterator.hh"
#include "storage/ConsistencyScanner.hh"
#include "StateMachine.hh"
#include "test-utils.hh"
#include <gtest/gtest.h>
...
...
test/stress/replication.cc
View file @
a65b8192
...
...
@@ -29,6 +29,7 @@
#include "raft/RaftReplicator.hh"
#include "raft/RaftConfig.hh"
#include "raft/RaftTrimmer.hh"
#include "storage/ConsistencyScanner.hh"
#include "Configuration.hh"
#include "QuarkDBNode.hh"
#include "../test-utils.hh"
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment