sharding: avoid transient overlaps while auditing.

During normal sharding, parent shard and child shard ranges
may have transient overlaps which will disapear when done.
To avoid this transient overlap shows up in the warning log,
when sharder audits root container, it will not emit
'overlapping ranges in state ...' warning when overlapping
shard ranges are parent-children during the past time period
of reclaim_age.

Change-Id: I88bce3c178923457e3887790fa58aad56086b1e6
This commit is contained in:
Jianjian Huo 2022-11-08 23:13:18 -08:00
parent 5a9f5f521a
commit 230b467af0
2 changed files with 195 additions and 6 deletions

View File

@ -158,19 +158,55 @@ def find_paths_with_gaps(shard_ranges, within_range=None):
return paths_with_gaps
def find_overlapping_ranges(shard_ranges):
def _is_parent_or_child(shard_range, other, time_period):
"""
Test if shard range ``shard_range`` is the parent or a child of another
shard range ``other`` within past time period ``time_period``. This method
is limited to work only within the scope of the same user-facing account
(with and without shard prefix).
:param shard_range: an instance of ``ShardRange``.
:param other: an instance of ``ShardRange``.
:param time_period: the specified past time period in seconds. Value of
0 means all time in the past.
:return: True if ``shard_range`` is the parent or a child of ``other``
within past time period, False otherwise, assuming that they are within
the same account.
"""
exclude_age = (time.time() - float(time_period)) if time_period > 0 else 0
if shard_range.is_child_of(other) and shard_range.timestamp >= exclude_age:
return True
if other.is_child_of(shard_range) and other.timestamp >= exclude_age:
return True
return False
def find_overlapping_ranges(
shard_ranges, exclude_parent_child=False, time_period=0):
"""
Find all pairs of overlapping ranges in the given list.
:param shard_ranges: A list of :class:`~swift.utils.ShardRange`
:param exclude_parent_child: If True then overlapping pairs that have a
parent-child relationship within the past time period
``time_period`` are excluded from the returned set. Default is
False.
:param time_period: the specified past time period in seconds. Value of
0 means all time in the past.
:return: a set of tuples, each tuple containing ranges that overlap with
each other.
"""
result = set()
for i, shard_range in enumerate(shard_ranges):
overlapping = [
sr for sr in shard_ranges[i + 1:]
if shard_range.name != sr.name and shard_range.overlaps(sr)]
if exclude_parent_child:
overlapping = [
sr for sr in shard_ranges[i + 1:]
if shard_range.name != sr.name and shard_range.overlaps(sr) and
not _is_parent_or_child(shard_range, sr, time_period)]
else:
overlapping = [
sr for sr in shard_ranges[i + 1:]
if shard_range.name != sr.name and shard_range.overlaps(sr)]
if overlapping:
overlapping.append(shard_range)
overlapping.sort(key=ShardRange.sort_key)
@ -1199,7 +1235,18 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
# allow multiple shards in that state
continue
shard_ranges = broker.get_shard_ranges(states=state)
overlaps = find_overlapping_ranges(shard_ranges)
# Transient overlaps can occur during the period immediately after
# sharding if a root learns about new child shards before it learns
# that the parent has sharded. These overlaps are normally
# corrected as an up-to-date version of the parent shard range is
# replicated to the root. Parent-child overlaps are therefore
# ignored for a reclaim age after the child was created. After
# that, parent-child overlaps may indicate that there is
# permanently stale parent shard range data, perhaps from a node
# that has been offline, so these are reported.
overlaps = find_overlapping_ranges(
shard_ranges, exclude_parent_child=True,
time_period=self.reclaim_age)
if overlaps:
self._increment_stat('audit_root', 'has_overlap')
self._update_stat('audit_root', 'num_overlap',

View File

@ -43,7 +43,7 @@ from swift.container.sharder import ContainerSharder, sharding_enabled, \
find_shrinking_candidates, process_compactible_shard_sequences, \
find_compactible_shard_sequences, is_shrinking_candidate, \
is_sharding_candidate, find_paths, rank_paths, ContainerSharderConf, \
find_paths_with_gaps, combine_shard_ranges
find_paths_with_gaps, combine_shard_ranges, find_overlapping_ranges
from swift.common.utils import ShardRange, Timestamp, hash_path, \
encode_timestamps, parse_db_filename, quorum_size, Everything, md5, \
ShardName
@ -5562,6 +5562,63 @@ class TestSharder(BaseTestSharder):
broker.merge_shard_ranges(shrinking_shard_ranges)
check_missing()
def test_audit_root_container_with_parent_child_overlapping(self):
# Test '_audit_root_container' when overlapping shard ranges are
# parent and children, expect no warnings. The case of non parent-child
# overlapping is tested in 'test_audit_root_container'.
now_ts = next(self.ts_iter)
past_ts = Timestamp(float(now_ts) - 604801)
root_sr = ShardRange('a/c', past_ts, state=ShardRange.SHARDED)
parent_range = ShardRange(ShardRange.make_path(
'.shards_a', 'c', root_sr.container,
past_ts, 0),
past_ts, 'a', 'f', object_count=1,
state=ShardRange.CLEAVED)
child_ranges = [
ShardRange(
ShardRange.make_path(
'.shards_a', 'c', parent_range.container, past_ts, 0),
past_ts, lower='a', upper='c', object_count=1,
state=ShardRange.CLEAVED),
ShardRange(
ShardRange.make_path(
'.shards_a', 'c', parent_range.container, past_ts, 1),
past_ts, lower='c', upper='f', object_count=1,
state=ShardRange.CLEAVED)]
self.assertTrue(find_overlapping_ranges([parent_range] + child_ranges))
broker = self._make_broker()
# The case of transient overlapping within reclaim_age.
expected_stats = {'attempted': 1, 'success': 1, 'failure': 0,
'has_overlap': 0, 'num_overlap': 0}
broker.merge_shard_ranges([parent_range] + child_ranges)
with mock.patch('swift.container.sharder.time.time',
return_value=float(now_ts) - 10):
with self._mock_sharder() as sharder:
with mock.patch.object(
sharder, '_audit_shard_container') as mocked:
sharder._audit_container(broker)
self._assert_stats(expected_stats, sharder, 'audit_root')
self.assertFalse(sharder.logger.get_lines_for_level('warning'))
self.assertFalse(sharder.logger.get_lines_for_level('error'))
mocked.assert_not_called()
# The case of overlapping past reclaim_age.
expected_stats = {'attempted': 1, 'success': 0, 'failure': 1,
'has_overlap': 1, 'num_overlap': 2}
with mock.patch('swift.container.sharder.time.time',
return_value=float(now_ts)):
with self._mock_sharder() as sharder:
with mock.patch.object(
sharder, '_audit_shard_container') as mocked:
sharder._audit_container(broker)
lines = sharder.logger.get_lines_for_level('warning')
self.assertIn('Audit failed for root', lines[0])
self.assertFalse(lines[1:])
self.assertFalse(sharder.logger.get_lines_for_level('error'))
self._assert_stats(expected_stats, sharder, 'audit_root')
mocked.assert_not_called()
def test_audit_deleted_root_container(self):
broker = self._make_broker()
shard_bounds = (
@ -8708,6 +8765,91 @@ class TestSharderFunctions(BaseTestSharder):
paths_with_gaps = find_paths_with_gaps(ranges, range_of_interest)
self.assertFalse(paths_with_gaps)
def test_find_overlapping_ranges(self):
now_ts = next(self.ts_iter)
past_ts = Timestamp(float(now_ts) - 61)
root_sr = ShardRange('a/c', past_ts, state=ShardRange.SHARDED)
bounds = (
('', 'a'),
('a', 'f'), # the 'parent_range' in this test.
('f', 'm'), # shard range overlaps with the next.
('k', 'p'),
('p', 'y'),
('y', '')
)
ranges = [
ShardRange(
ShardRange.make_path(
'.shards_a', 'c', root_sr.container, past_ts,
index),
past_ts, lower, upper, object_count=1,
state=ShardRange.SHARDED)
for index, (lower, upper) in enumerate(bounds)]
parent_range = ranges[1]
child_ranges = [
ShardRange(
ShardRange.make_path(
'.shards_a', 'c', parent_range.container, past_ts, 0),
past_ts, lower='a', upper='c', object_count=1,
state=ShardRange.CLEAVED),
ShardRange(
ShardRange.make_path(
'.shards_a', 'c', parent_range.container, past_ts, 1),
past_ts, lower='c', upper='f', object_count=1,
state=ShardRange.CLEAVED)]
overlapping_ranges = find_overlapping_ranges(ranges)
self.assertEqual({(ranges[2], ranges[3])}, overlapping_ranges)
overlapping_ranges = find_overlapping_ranges(
[ranges[1]] + child_ranges)
self.assertEqual(
{(child_ranges[0], child_ranges[1], ranges[1])},
overlapping_ranges)
overlapping_ranges = find_overlapping_ranges(
[ranges[1]] + child_ranges, exclude_parent_child=True)
self.assertEqual(0, len(overlapping_ranges))
with mock.patch(
'swift.container.sharder.time.time',
return_value=float(now_ts)):
overlapping_ranges = find_overlapping_ranges(
[ranges[1]] + child_ranges, exclude_parent_child=True,
time_period=61)
self.assertEqual(0, len(overlapping_ranges))
overlapping_ranges = find_overlapping_ranges(
[ranges[1]] + child_ranges, exclude_parent_child=True,
time_period=60)
self.assertEqual(
{(child_ranges[0], child_ranges[1], ranges[1])},
overlapping_ranges)
overlapping_ranges = find_overlapping_ranges(
ranges + child_ranges)
self.assertEqual(
{(child_ranges[0],
child_ranges[1],
ranges[1]),
(ranges[2],
ranges[3])},
overlapping_ranges)
overlapping_ranges = find_overlapping_ranges(
ranges + child_ranges, exclude_parent_child=True)
self.assertEqual({(ranges[2], ranges[3])}, overlapping_ranges)
with mock.patch(
'swift.container.sharder.time.time',
return_value=float(now_ts)):
overlapping_ranges = find_overlapping_ranges(
ranges + child_ranges, exclude_parent_child=True,
time_period=61)
self.assertEqual({(ranges[2], ranges[3])}, overlapping_ranges)
overlapping_ranges = find_overlapping_ranges(
ranges + child_ranges, exclude_parent_child=True,
time_period=60)
self.assertEqual(
{(child_ranges[0],
child_ranges[1],
ranges[1]),
(ranges[2],
ranges[3])},
overlapping_ranges)
class TestContainerSharderConf(unittest.TestCase):
def test_default(self):