From 3661db0a34a27c608c27c0ae25f40fd0d856d144 Mon Sep 17 00:00:00 2001 From: Simon Van den Bossche Date: Wed, 13 Mar 2019 10:55:54 +0100 Subject: [PATCH 01/18] Objectify snapshots and buckets for better readable code --- ovs/lib/generic.py | 82 ++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/ovs/lib/generic.py b/ovs/lib/generic.py index 3ec4c5b58..4c3c9263a 100644 --- a/ovs/lib/generic.py +++ b/ovs/lib/generic.py @@ -92,6 +92,20 @@ def delete_snapshots(timestamp=None): day = timedelta(1) week = day * 7 + class Snapshot(object): + def __init__(self, timestamp, snapshot_id, vdisk_guid, is_consistent): + self.timestamp = timestamp + self.snapshot_id = snapshot_id + self.vdisk_guid = vdisk_guid + self.consistent = is_consistent + + class Bucket(object): + def __init__(self, start, end, type): + self.start = start + self.end = end + self.type = type + self.snapshots = [] + def make_timestamp(offset): """ Create an integer based timestamp @@ -107,20 +121,17 @@ def make_timestamp(offset): buckets = [] # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[ for i in xrange(0, 7): - buckets.append({'start': make_timestamp(day * i), - 'end': make_timestamp(day * (i + 1)), - 'type': '1d', - 'snapshots': []}) + buckets.append(Bucket(start=make_timestamp(day * i), + end=make_timestamp(day * (i + 1)), + type='1d')) # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[ for i in xrange(1, 4): - buckets.append({'start': make_timestamp(week * i), - 'end': make_timestamp(week * (i + 1)), - 'type': '1w', - 'snapshots': []}) - buckets.append({'start': make_timestamp(week * 4), - 'end': 0, - 'type': 'rest', - 'snapshots': []}) + buckets.append(Bucket(start=make_timestamp(week * i), + end=make_timestamp(week * (i + 1)), + type='1w')) + buckets.append(Bucket(start=make_timestamp(week * 4), + end=0, + type='rest')) # Get a list of all snapshots that are used as parents for clones parent_snapshots = set([vd.parentsnapshot for vd in VDiskList.get_with_parent_snaphots()]) @@ -134,19 +145,16 @@ def make_timestamp(offset): if vdisk.info['object_type'] in ['BASE']: bucket_chain = copy.deepcopy(buckets) - for snapshot in vdisk.snapshots: - if snapshot.get('is_sticky') is True: + for vdisk_snapshot in vdisk.snapshots: + if vdisk_snapshot.get('is_sticky') is True: continue - if snapshot['guid'] in parent_snapshots: - GenericController._logger.info('Not deleting snapshot {0} because it has clones'.format(snapshot['guid'])) + if vdisk_snapshot['guid'] in parent_snapshots: + GenericController._logger.info('Not deleting snapshot {0} because it has clones'.format(vdisk_snapshot['guid'])) continue - timestamp = int(snapshot['timestamp']) + timestamp = int(vdisk_snapshot['timestamp']) for bucket in bucket_chain: - if bucket['start'] >= timestamp > bucket['end']: - bucket['snapshots'].append({'timestamp': timestamp, - 'snapshot_id': snapshot['guid'], - 'vdisk_guid': vdisk.guid, - 'is_consistent': snapshot['is_consistent']}) + if bucket.start >= timestamp > bucket.end: + bucket.snapshots.append(Snapshot(timestamp, vdisk_snapshot['guid'], vdisk.guid, vdisk_snapshot.['is_consistent'])) bucket_chains.append(bucket_chain) # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep @@ -156,38 +164,40 @@ def make_timestamp(offset): for bucket in bucket_chain: if first is True: best = None - for snapshot in bucket['snapshots']: + for snapshot in bucket.snapshots: if best is None: best = snapshot # Consistent is better than inconsistent - elif snapshot['is_consistent'] and not best['is_consistent']: + elif snapshot.consistent and not best.consistent: best = snapshot # Newer (larger timestamp) is better than older snapshots - elif snapshot['is_consistent'] == best['is_consistent'] and \ - snapshot['timestamp'] > best['timestamp']: + elif snapshot.consistent == best.consistent and \ + snapshot.timestamp > best.timestamp: best = snapshot - bucket['snapshots'] = [s for s in bucket['snapshots'] if - s['timestamp'] != best['timestamp']] + bucket.snapshots = [s for s in bucket.snapshots if + s.timestamp != best.timestamp] first = False - elif bucket['end'] > 0: + elif bucket.end > 0: oldest = None - for snapshot in bucket['snapshots']: + for snapshot in bucket.snapshots: if oldest is None: oldest = snapshot # Older (smaller timestamp) is the one we want to keep - elif snapshot['timestamp'] < oldest['timestamp']: + elif snapshot.timestamp < oldest.timestamp: oldest = snapshot - bucket['snapshots'] = [s for s in bucket['snapshots'] if - s['timestamp'] != oldest['timestamp']] + bucket.snapshots = [s for s in bucket.snapshots if + s.timestamp != oldest.timestamp] # Delete obsolete snapshots for bucket_chain in bucket_chains: for bucket in bucket_chain: - for snapshot in bucket['snapshots']: - VDiskController.delete_snapshot(vdisk_guid=snapshot['vdisk_guid'], - snapshot_id=snapshot['snapshot_id']) + for snapshot in bucket.snapshots: + VDiskController.delete_snapshot(vdisk_guid=snapshot.vdisk_guid, + snapshot_id=snapshot.snapshot_id) GenericController._logger.info('Delete snapshots finished') + + @staticmethod @ovs_task(name='ovs.generic.execute_scrub', schedule=Schedule(minute='0', hour='3'), ensure_single_info={'mode': 'DEDUPED'}) def execute_scrub(vpool_guids=None, vdisk_guids=None, storagerouter_guid=None, manual=False): From eafb8ea2c695e661212de95cd9dde8dcaa45dc47 Mon Sep 17 00:00:00 2001 From: Simon Van den Bossche Date: Fri, 15 Mar 2019 10:16:18 +0100 Subject: [PATCH 02/18] Working custom retention policy. Defaults to: One per hour first day, one per day first week, one per week first month, one older. Will read `ovs/framework/scheduling/retention_policy/` for config if different intervals are passed, and will fall back to default if none provided. Todo: -Make unittest pass -Check if config file is in correct format. --- ovs/lib/generic.py | 88 +++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 33 deletions(-) diff --git a/ovs/lib/generic.py b/ovs/lib/generic.py index 4c3c9263a..fea8cde15 100644 --- a/ovs/lib/generic.py +++ b/ovs/lib/generic.py @@ -28,6 +28,7 @@ from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vdisklist import VDiskList from ovs.extensions.db.arakooninstaller import ArakoonClusterConfig +from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient, UnableToConnectException from ovs.lib.helpers.decorators import ovs_task from ovs.lib.helpers.generic.scrubber import Scrubber @@ -88,9 +89,7 @@ def delete_snapshots(timestamp=None): :return: None """ GenericController._logger.info('Delete snapshots started') - - day = timedelta(1) - week = day * 7 + day_timedelta = timedelta(1) class Snapshot(object): def __init__(self, timestamp, snapshot_id, vdisk_guid, is_consistent): @@ -99,13 +98,19 @@ def __init__(self, timestamp, snapshot_id, vdisk_guid, is_consistent): self.vdisk_guid = vdisk_guid self.consistent = is_consistent + def __str__(self): + return 'Snapshot for vDisk {0}'.format(self.vdisk_guid) + class Bucket(object): - def __init__(self, start, end, type): + def __init__(self, start, end, type=None): self.start = start self.end = end - self.type = type + self.type = type or '' self.snapshots = [] + def __str__(self): + return 'Bucket (start: {0}, end: {1}, type: {2}) with {3}'.format(self.start, self.end, self.type, self.snapshots) + def make_timestamp(offset): """ Create an integer based timestamp @@ -114,24 +119,45 @@ def make_timestamp(offset): """ return int(mktime((base - offset).timetuple())) - # Calculate bucket structure + def _calculate_bucket_structure_for_vdisk(vdisk_path): + """ + Path in configuration management where the config is located for deleting snapshots of given vdisk path. + Located in ovs/framework/scheduling/retention_policy/{0}.format(vdisk_guid). + Should look like this: + [{'nr_of_snapshots': 24, 'nr_of_days': 1}, + {'nr_of_snapshots': 6, 'nr_of_days': 6}, + {'nr_of_snapshots': 3, 'nr_of_days': 21}]) + More periods in time can be given. + The passed number of snapshots is an absolute number of snapshots and is evenly distributed across the number of days passed in the interval. + This way, this config will result in storing + one snapshot per hour the first day + one snapshot per day the rest of the week + one snapshot per week the rest of the month + one older snapshot snapshot will always be stored for an interval older then the longest interval passed in the config + :param vdisk_path: ovs/framework/scheduling/retention_policy/{0}.format(vdisk_guid) + :return: + """ + buckets = [] + policies = Configuration.get(vdisk_path, default=[{'nr_of_snapshots': 24, 'nr_of_days': 1}, # One per hour + {'nr_of_snapshots': 6, 'nr_of_days': 6}, # one per day for rest of the week + {'nr_of_snapshots': 3, 'nr_of_days': 21}]) # one per week for the rest of the month + + total_length = 0 + offset = total_length * day_timedelta + for policy in policies: + number_of_days = policy.get('nr_of_days', 1) + number_of_snapshots = policy.get('nr_of_snapshots', number_of_days * 24) + snapshot_timedelta = number_of_days * day_timedelta / number_of_snapshots + for i in xrange(0, number_of_snapshots): + buckets.append(Bucket(start=make_timestamp(offset + snapshot_timedelta * i), end=make_timestamp(offset + snapshot_timedelta * (i + 1)))) + total_length += number_of_days + offset = total_length * day_timedelta + buckets.append(Bucket(start=make_timestamp(total_length * day_timedelta), end=0, type='rest')) + return buckets + if timestamp is None: timestamp = time.time() - base = datetime.fromtimestamp(timestamp).date() - day - buckets = [] - # Buckets first 7 days: [0-1[, [1-2[, [2-3[, [3-4[, [4-5[, [5-6[, [6-7[ - for i in xrange(0, 7): - buckets.append(Bucket(start=make_timestamp(day * i), - end=make_timestamp(day * (i + 1)), - type='1d')) - # Week buckets next 3 weeks: [7-14[, [14-21[, [21-28[ - for i in xrange(1, 4): - buckets.append(Bucket(start=make_timestamp(week * i), - end=make_timestamp(week * (i + 1)), - type='1w')) - buckets.append(Bucket(start=make_timestamp(week * 4), - end=0, - type='rest')) + base = datetime.fromtimestamp(timestamp).date() - day_timedelta # Get a list of all snapshots that are used as parents for clones parent_snapshots = set([vd.parentsnapshot for vd in VDiskList.get_with_parent_snaphots()]) @@ -139,14 +165,13 @@ def make_timestamp(offset): # Place all snapshots in bucket_chains bucket_chains = [] for vdisk in VDiskList.get_vdisks(): - vdisk.invalidate_dynamics('being_scrubbed') - if vdisk.being_scrubbed: - continue + path = 'ovs/framework/scheduling/retention_policy/{0}'.format(vdisk.guid) + buckets = _calculate_bucket_structure_for_vdisk(path) if vdisk.info['object_type'] in ['BASE']: bucket_chain = copy.deepcopy(buckets) - for vdisk_snapshot in vdisk.snapshots: - if vdisk_snapshot.get('is_sticky') is True: + for vdisk_snapshot in vdisk.snapshots: # type: Dict + if vdisk_snapshot.get('is_sticky'): continue if vdisk_snapshot['guid'] in parent_snapshots: GenericController._logger.info('Not deleting snapshot {0} because it has clones'.format(vdisk_snapshot['guid'])) @@ -154,7 +179,7 @@ def make_timestamp(offset): timestamp = int(vdisk_snapshot['timestamp']) for bucket in bucket_chain: if bucket.start >= timestamp > bucket.end: - bucket.snapshots.append(Snapshot(timestamp, vdisk_snapshot['guid'], vdisk.guid, vdisk_snapshot.['is_consistent'])) + bucket.snapshots.append(Snapshot(timestamp, vdisk_snapshot['guid'], vdisk.guid, vdisk_snapshot['is_consistent'])) bucket_chains.append(bucket_chain) # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep @@ -171,11 +196,9 @@ def make_timestamp(offset): elif snapshot.consistent and not best.consistent: best = snapshot # Newer (larger timestamp) is better than older snapshots - elif snapshot.consistent == best.consistent and \ - snapshot.timestamp > best.timestamp: + elif snapshot.consistent == best.consistent and snapshot.timestamp > best.timestamp: best = snapshot - bucket.snapshots = [s for s in bucket.snapshots if - s.timestamp != best.timestamp] + bucket.snapshots = [s for s in bucket.snapshots if s.timestamp != best.timestamp] first = False elif bucket.end > 0: oldest = None @@ -185,8 +208,7 @@ def make_timestamp(offset): # Older (smaller timestamp) is the one we want to keep elif snapshot.timestamp < oldest.timestamp: oldest = snapshot - bucket.snapshots = [s for s in bucket.snapshots if - s.timestamp != oldest.timestamp] + bucket.snapshots = [s for s in bucket.snapshots if s.timestamp != oldest.timestamp] # Delete obsolete snapshots for bucket_chain in bucket_chains: From 70f9ca84e1f39c3d774633dc2d6b901449d6deaa Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Mon, 18 Mar 2019 19:18:50 +0100 Subject: [PATCH 03/18] Refactor the configurable delete_snapshot --- ovs/lib/generic.py | 134 +----------- ovs/lib/helpers/generic/snapshots.py | 303 +++++++++++++++++++++++++++ 2 files changed, 306 insertions(+), 131 deletions(-) create mode 100644 ovs/lib/helpers/generic/snapshots.py diff --git a/ovs/lib/generic.py b/ovs/lib/generic.py index fea8cde15..9882322fc 100644 --- a/ovs/lib/generic.py +++ b/ovs/lib/generic.py @@ -35,6 +35,7 @@ from ovs.lib.helpers.toolbox import Toolbox, Schedule from ovs.lib.vdisk import VDiskController from ovs.log.log_handler import LogHandler +from ovs.lib.helpers.generic.snapshots import SnapshotManager class GenericController(object): @@ -85,140 +86,11 @@ def delete_snapshots(timestamp=None): :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used :type timestamp: float - :return: None """ GenericController._logger.info('Delete snapshots started') - day_timedelta = timedelta(1) - - class Snapshot(object): - def __init__(self, timestamp, snapshot_id, vdisk_guid, is_consistent): - self.timestamp = timestamp - self.snapshot_id = snapshot_id - self.vdisk_guid = vdisk_guid - self.consistent = is_consistent - - def __str__(self): - return 'Snapshot for vDisk {0}'.format(self.vdisk_guid) - - class Bucket(object): - def __init__(self, start, end, type=None): - self.start = start - self.end = end - self.type = type or '' - self.snapshots = [] - - def __str__(self): - return 'Bucket (start: {0}, end: {1}, type: {2}) with {3}'.format(self.start, self.end, self.type, self.snapshots) - - def make_timestamp(offset): - """ - Create an integer based timestamp - :param offset: Offset in days - :return: Timestamp - """ - return int(mktime((base - offset).timetuple())) - - def _calculate_bucket_structure_for_vdisk(vdisk_path): - """ - Path in configuration management where the config is located for deleting snapshots of given vdisk path. - Located in ovs/framework/scheduling/retention_policy/{0}.format(vdisk_guid). - Should look like this: - [{'nr_of_snapshots': 24, 'nr_of_days': 1}, - {'nr_of_snapshots': 6, 'nr_of_days': 6}, - {'nr_of_snapshots': 3, 'nr_of_days': 21}]) - More periods in time can be given. - The passed number of snapshots is an absolute number of snapshots and is evenly distributed across the number of days passed in the interval. - This way, this config will result in storing - one snapshot per hour the first day - one snapshot per day the rest of the week - one snapshot per week the rest of the month - one older snapshot snapshot will always be stored for an interval older then the longest interval passed in the config - :param vdisk_path: ovs/framework/scheduling/retention_policy/{0}.format(vdisk_guid) - :return: - """ - buckets = [] - policies = Configuration.get(vdisk_path, default=[{'nr_of_snapshots': 24, 'nr_of_days': 1}, # One per hour - {'nr_of_snapshots': 6, 'nr_of_days': 6}, # one per day for rest of the week - {'nr_of_snapshots': 3, 'nr_of_days': 21}]) # one per week for the rest of the month - - total_length = 0 - offset = total_length * day_timedelta - for policy in policies: - number_of_days = policy.get('nr_of_days', 1) - number_of_snapshots = policy.get('nr_of_snapshots', number_of_days * 24) - snapshot_timedelta = number_of_days * day_timedelta / number_of_snapshots - for i in xrange(0, number_of_snapshots): - buckets.append(Bucket(start=make_timestamp(offset + snapshot_timedelta * i), end=make_timestamp(offset + snapshot_timedelta * (i + 1)))) - total_length += number_of_days - offset = total_length * day_timedelta - buckets.append(Bucket(start=make_timestamp(total_length * day_timedelta), end=0, type='rest')) - return buckets - - if timestamp is None: - timestamp = time.time() - base = datetime.fromtimestamp(timestamp).date() - day_timedelta - - # Get a list of all snapshots that are used as parents for clones - parent_snapshots = set([vd.parentsnapshot for vd in VDiskList.get_with_parent_snaphots()]) - - # Place all snapshots in bucket_chains - bucket_chains = [] - for vdisk in VDiskList.get_vdisks(): - path = 'ovs/framework/scheduling/retention_policy/{0}'.format(vdisk.guid) - buckets = _calculate_bucket_structure_for_vdisk(path) - - if vdisk.info['object_type'] in ['BASE']: - bucket_chain = copy.deepcopy(buckets) - for vdisk_snapshot in vdisk.snapshots: # type: Dict - if vdisk_snapshot.get('is_sticky'): - continue - if vdisk_snapshot['guid'] in parent_snapshots: - GenericController._logger.info('Not deleting snapshot {0} because it has clones'.format(vdisk_snapshot['guid'])) - continue - timestamp = int(vdisk_snapshot['timestamp']) - for bucket in bucket_chain: - if bucket.start >= timestamp > bucket.end: - bucket.snapshots.append(Snapshot(timestamp, vdisk_snapshot['guid'], vdisk.guid, vdisk_snapshot['is_consistent'])) - bucket_chains.append(bucket_chain) - - # Clean out the snapshot bucket_chains, we delete the snapshots we want to keep - # And we'll remove all snapshots that remain in the buckets - for bucket_chain in bucket_chains: - first = True - for bucket in bucket_chain: - if first is True: - best = None - for snapshot in bucket.snapshots: - if best is None: - best = snapshot - # Consistent is better than inconsistent - elif snapshot.consistent and not best.consistent: - best = snapshot - # Newer (larger timestamp) is better than older snapshots - elif snapshot.consistent == best.consistent and snapshot.timestamp > best.timestamp: - best = snapshot - bucket.snapshots = [s for s in bucket.snapshots if s.timestamp != best.timestamp] - first = False - elif bucket.end > 0: - oldest = None - for snapshot in bucket.snapshots: - if oldest is None: - oldest = snapshot - # Older (smaller timestamp) is the one we want to keep - elif snapshot.timestamp < oldest.timestamp: - oldest = snapshot - bucket.snapshots = [s for s in bucket.snapshots if s.timestamp != oldest.timestamp] - - # Delete obsolete snapshots - for bucket_chain in bucket_chains: - for bucket in bucket_chain: - for snapshot in bucket.snapshots: - VDiskController.delete_snapshot(vdisk_guid=snapshot.vdisk_guid, - snapshot_id=snapshot.snapshot_id) - GenericController._logger.info('Delete snapshots finished') - - + snapshot_manager = SnapshotManager() + snapshot_manager.delete_snapshots(timestamp) @staticmethod @ovs_task(name='ovs.generic.execute_scrub', schedule=Schedule(minute='0', hour='3'), ensure_single_info={'mode': 'DEDUPED'}) diff --git a/ovs/lib/helpers/generic/snapshots.py b/ovs/lib/helpers/generic/snapshots.py new file mode 100644 index 000000000..fdb9b1458 --- /dev/null +++ b/ovs/lib/helpers/generic/snapshots.py @@ -0,0 +1,303 @@ +# Copyright (C) 2019 iNuron NV +# +# This file is part of Open vStorage Open Source Edition (OSE), +# as available from +# +# http://www.openvstorage.org and +# http://www.openvstorage.com. +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License v3 (GNU AGPLv3) +# as published by the Free Software Foundation, in version 3 as it comes +# in the LICENSE.txt file of the Open vStorage OSE distribution. +# +# Open vStorage is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY of any kind. + +import time +from datetime import datetime, timedelta +from ovs.dal.hybrids.vdisk import VDisk +from ovs.dal.hybrids.vpool import VPool +from ovs.dal.lists.vpoollist import VPoolList +from ovs.dal.lists.vdisklist import VDiskList +from ovs.lib.vdisk import VDiskController +from ovs.log.log_handler import LogHandler + + +class RetentionPolicy(object): + def __init__(self, nr_of_snapshots, nr_of_days): + # type: (int, int) -> None + """ + Initialize a retention policy + :param nr_of_snapshots: Number of snapshots to keep over the configured number of days + :type nr_of_snapshots: int + :param nr_of_days: Number of days to account the number of snapshots for + :type nr_of_days: int + """ + self.nr_of_snapshots = nr_of_snapshots + self.nr_of_days = nr_of_days + + @classmethod + def from_configuration(cls, configuration): + # type: (List[Dict[str, int]]) -> List[RetentionPolicy] + """ + A configuration should look like this: + [{'nr_of_snapshots': 24, 'nr_of_days': 1}, + {'nr_of_snapshots': 6, 'nr_of_days': 6}, + {'nr_of_snapshots': 3, 'nr_of_days': 21}]) + The passed number of snapshots is an absolute number of snapshots and is evenly distributed across the number of days passed in the interval. + This way, this config will result in storing + one snapshot per hour the first day + one snapshot per day the rest of the week + one snapshot per week the rest of the month + one older snapshot snapshot will always be stored for an interval older then the longest interval passed in the config + :param configuration: Configuration to use + :type configuration: List[Dict[str, int]] + :return: List[RetentionPolicy] + """ + return [cls(**c) for c in configuration] + + +class Snapshot(object): + def __init__(self, timestamp, snapshot_id, vdisk_guid, is_consistent, is_sticky=False, *args, **kwargs): + """ + Initialize a snapshot object + :param timestamp: + :param snapshot_id: + :param vdisk_guid: + :param is_consistent: + :param is_sticky: + :param args: + :param kwargs: + """ + self.timestamp = int(timestamp) + self.snapshot_id = snapshot_id + self.vdisk_guid = vdisk_guid + self.consistent = is_consistent + self.is_sticky = is_sticky + + def __str__(self): + return 'Snapshot for vDisk {0}'.format(self.vdisk_guid) + + +class Bucket(object): + def __init__(self, start, end): + self.start = start + self.end = end + self.snapshots = [] + + def is_snapshot_in_interval(self, snapshot): + # type: (Snapshot) -> bool + return self.start >= snapshot.timestamp > self.end + + def try_add_snapshot(self, snapshot): + if self.is_snapshot_in_interval(snapshot): + self.snapshots.append(snapshot) + + def get_obsolete_snapshots(self, consistency_first=False): + # type: (bool) -> List[Snapshot] + """ + Retrieve all snapshots which are no longer within this interval + :param consistency_first: Consistency of the snapshot is priortized above the age + :type consistency_first: bool + :return: List with Snapshots + :rtype: List[Snapshot] + """ + if not self.end: + # No end date for the interval, every snapshot is obsolete + return self.snapshots + + if consistency_first: + best = None + for snapshot in self.snapshots: + if best is None: + best = snapshot + # Consistent is better than inconsistent + elif snapshot.consistent and not best.consistent: + best = snapshot + # Newer (larger timestamp) is better than older snapshots + elif snapshot.consistent == best.consistent and snapshot.timestamp > best.timestamp: + best = snapshot + return [s for s in self.snapshots if s.timestamp != best.timestamp] + # First the oldest snapshot and remove all younger ones + oldest = None + for snapshot in self.snapshots: + if oldest is None: + oldest = snapshot + # Older (smaller timestamp) is the one we want to keep + elif snapshot.timestamp < oldest.timestamp: + oldest = snapshot + return [s for s in self.snapshots if s.timestamp != oldest.timestamp] + + def __str__(self): + return 'Bucket (start: {0}, end: {1}) with {2}'.format(self.start, self.end, self.snapshots) + + +class SnapshotManager(object): + """ + Manages snapshots of all vdisks + """ + _logger = LogHandler.get('lib', name='generic tasks') + + def __init__(self): + self.global_policy = self.get_retention_policy() + self.vpool_policies = self.get_retention_policies_for_vpools() + + def get_policy_to_enforce(self, vdisk): + # type: (VDisk) -> List[RetentionPolicy] + """ + Retrieve the policy to enforce for a VDisk + :param vdisk: VDisk to retrieve policy for + :type vdisk: VDisk + :return: Policy to enforce + :rtype: List[RetentionPolicy] + """ + return self.get_retention_policy_vdisk(vdisk) or self.vpool_policies.get(vdisk.vpool) or self.global_policy + + @staticmethod + def get_retention_policy(): + # type: () -> List[RetentionPolicy] + """ + Retrieve the globally configured retention policy + """ + # @todo retrieve the config path + return RetentionPolicy.from_configuration([{'nr_of_snapshots': 24, 'nr_of_days': 1}, # One per hour + {'nr_of_snapshots': 6, 'nr_of_days': 6}, # one per day for rest of the week + {'nr_of_snapshots': 3, 'nr_of_days': 21}]) # One per week for the rest of the week + + @classmethod + def get_retention_policies_for_vpools(cls): + # type: () -> Dict[VPool, List[RetentionPolicy]] + """ + Map VPool with its retention policy (if any) + :return: Dict with VPool as keys and list of RetentionPolicy as value + :rtype: Dict[VPool, List[RetentionPolicy]] + """ + vpool_policies = {} + for vpool in VPoolList.get_vpools(): + policies_config = cls.get_retention_policy_vpool(vpool) + if policies_config: + vpool_policies[vpool] = RetentionPolicy.from_configuration(policies_config) + return vpool_policies + + @staticmethod + def get_retention_policy_vpool(vpool): + # type: (VPool) -> Union[List[RetentionPolicy], None] + """ + Retrieve the retention policy for the VPool (if any) + """ + # @todo Retrieve config key + return None + + @staticmethod + def get_retention_policy_vdisk(vdisk): + # type: (VDisk) -> Union[List[RetentionPolicy], None] + """ + Retrieve the retention policy for the VDisk (if any) + """ + # @todo retrieve config key + return None + + @staticmethod + def make_timestamp(base, offset): + # type: (datetime, timedelta) -> int + """ + Create an integer based timestamp based on a datetime and a timedelta + :param base: Base timestamp + :type base: datetime + :param offset: Offset in days + :type offset: timedelta + :return: Timestamp + """ + return int(time.mktime((base - offset).timetuple())) + + @classmethod + def _get_snapshot_buckets(cls, start_time, policies): + # type: (datetime, List[RetentionPolicy]) -> List[Bucket] + """ + Retrieve the bucket distribution based on the policies + There is always an additional bucket to keep track of older snapshots + :param start_time: Datetime to start counting from + :type start_time: datetime + :param policies + :type policies: RetentionPolicies + :return: + """ + day_delta = timedelta(1) # Convert to number of seconds in calculations + buckets = [] + processed_retention_days = 0 + offset = processed_retention_days * day_delta + + for policy in policies: # type: RetentionPolicy + number_of_days = policy.nr_of_days + number_of_snapshots = policy.nr_of_snapshots + snapshot_timedelta = number_of_days * day_delta / number_of_snapshots + for i in xrange(0, number_of_snapshots): + buckets.append(Bucket(start=cls.make_timestamp(start_time, offset + snapshot_timedelta * i), + end=cls.make_timestamp(start_time, offset + snapshot_timedelta * (i + 1)))) + processed_retention_days += number_of_days + offset = processed_retention_days * day_delta + # Always add a bucket which falls out of the configured retention + buckets.append(Bucket(start=cls.make_timestamp(start_time, processed_retention_days * day_delta), end=0)) + return buckets + + @staticmethod + def is_vdisk_running(vdisk): + # type: (VDisk) -> bool + """ + Determine if the VDisk is running + :return: True if the vdisk is running + :rtype: bool + """ + return vdisk.info['object_type'] in ['BASE'] + + def delete_snapshots(self, timestamp=None): + """ + Delete snapshots & scrubbing policy + + Implemented delete snapshot policy: + < 1d | 1d bucket | 1 | best of bucket | 1d + < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w + < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m + > 1m | delete + + :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used + :type timestamp: float + :return: None + """ + if timestamp is None: + timestamp = time.time() + + # @todo think about backwards compatibility. The previous code would not account for the first day + start_time = datetime.fromtimestamp(timestamp) + + # Get a list of all snapshots that are used as parents for clones + parent_snapshots = set([vd.parentsnapshot for vd in VDiskList.get_with_parent_snaphots()]) + + # Distribute all snapshots into buckets. These buckets specify an interval and are ordered young to old + bucket_chains = [] + for vdisk in VDiskList.get_vdisks(): + if not self.is_vdisk_running(vdisk): + continue + + bucket_chain = self._get_snapshot_buckets(start_time, self.get_policy_to_enforce(vdisk)) + for vdisk_snapshot in vdisk.snapshots: + snapshot = Snapshot(**vdisk_snapshot) + if snapshot.is_sticky: + continue + if snapshot.vdisk_guid in parent_snapshots: + self._logger.info('Not deleting snapshot {0} because it has clones'.format(snapshot.vdisk_guid)) + continue + for bucket in bucket_chain: + bucket.try_add_snapshot(snapshot) + bucket_chains.append(bucket_chain) + + # Delete obsolete snapshots + for index, bucket_chain in enumerate(bucket_chains): + # @todo this consistency first behaviour changed with the new implementation + # There are now buckets based on hourly intervals which means the consistency of the first day is not guaranteed (unless the config is specified that way) + consistency_first = index == 0 + for bucket in bucket_chain: + obsolete_snapshots = bucket.get_obsolete_snapshots(consistency_first) + for snapshot in obsolete_snapshots: + VDiskController.delete_snapshot(vdisk_guid=snapshot.vdisk_guid, snapshot_id=snapshot.snapshot_id) From 9e13c5c7f532e9e4e608564752796962fc8c1c9e Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Mon, 18 Mar 2019 19:20:43 +0100 Subject: [PATCH 04/18] Rebase on the latest 2.9.x --- ovs/lib/helpers/generic/snapshots.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ovs/lib/helpers/generic/snapshots.py b/ovs/lib/helpers/generic/snapshots.py index fdb9b1458..029d43dc2 100644 --- a/ovs/lib/helpers/generic/snapshots.py +++ b/ovs/lib/helpers/generic/snapshots.py @@ -279,6 +279,9 @@ def delete_snapshots(self, timestamp=None): for vdisk in VDiskList.get_vdisks(): if not self.is_vdisk_running(vdisk): continue + vdisk.invalidate_dynamics('being_scrubbed') + if vdisk.being_scrubbed: + continue bucket_chain = self._get_snapshot_buckets(start_time, self.get_policy_to_enforce(vdisk)) for vdisk_snapshot in vdisk.snapshots: From bf2bcaecdeedeb6978943b3a278ed91697915321 Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Mon, 18 Mar 2019 19:32:34 +0100 Subject: [PATCH 05/18] Fully convert a snapshot dict to an object --- ovs/lib/helpers/generic/snapshots.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ovs/lib/helpers/generic/snapshots.py b/ovs/lib/helpers/generic/snapshots.py index 029d43dc2..e9998f3ae 100644 --- a/ovs/lib/helpers/generic/snapshots.py +++ b/ovs/lib/helpers/generic/snapshots.py @@ -59,22 +59,22 @@ def from_configuration(cls, configuration): class Snapshot(object): - def __init__(self, timestamp, snapshot_id, vdisk_guid, is_consistent, is_sticky=False, *args, **kwargs): + + def __init__(self, guid, timestamp, label, is_consistent, is_automatic, is_sticky, in_backend, stored, vdisk_guid=None, *args, **kwargs): """ Initialize a snapshot object - :param timestamp: - :param snapshot_id: - :param vdisk_guid: - :param is_consistent: - :param is_sticky: - :param args: - :param kwargs: """ + if not vdisk_guid: + raise ValueError('Add the guid of the vdisk to the constructor') + self.guid = guid self.timestamp = int(timestamp) - self.snapshot_id = snapshot_id - self.vdisk_guid = vdisk_guid + self.label = label + self.is_automatic = is_automatic self.consistent = is_consistent self.is_sticky = is_sticky + self.in_backend = in_backend + self.stored = stored + self.vdisk_guid = vdisk_guid def __str__(self): return 'Snapshot for vDisk {0}'.format(self.vdisk_guid) @@ -285,7 +285,7 @@ def delete_snapshots(self, timestamp=None): bucket_chain = self._get_snapshot_buckets(start_time, self.get_policy_to_enforce(vdisk)) for vdisk_snapshot in vdisk.snapshots: - snapshot = Snapshot(**vdisk_snapshot) + snapshot = Snapshot(vdisk_guid=vdisk.guid, **vdisk_snapshot) if snapshot.is_sticky: continue if snapshot.vdisk_guid in parent_snapshots: @@ -303,4 +303,4 @@ def delete_snapshots(self, timestamp=None): for bucket in bucket_chain: obsolete_snapshots = bucket.get_obsolete_snapshots(consistency_first) for snapshot in obsolete_snapshots: - VDiskController.delete_snapshot(vdisk_guid=snapshot.vdisk_guid, snapshot_id=snapshot.snapshot_id) + VDiskController.delete_snapshot(vdisk_guid=snapshot.vdisk_guid, snapshot_id=snapshot.guid) From 38e940f54e32d1ce27c5f4277cb767be71a2bedf Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Tue, 19 Mar 2019 10:49:57 +0100 Subject: [PATCH 06/18] Make consistency configurable --- ovs/lib/helpers/generic/snapshots.py | 69 ++++++++++++++++++---------- 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/ovs/lib/helpers/generic/snapshots.py b/ovs/lib/helpers/generic/snapshots.py index e9998f3ae..66f4258c3 100644 --- a/ovs/lib/helpers/generic/snapshots.py +++ b/ovs/lib/helpers/generic/snapshots.py @@ -25,17 +25,26 @@ class RetentionPolicy(object): - def __init__(self, nr_of_snapshots, nr_of_days): - # type: (int, int) -> None + def __init__(self, nr_of_snapshots, nr_of_days, consistency_first=False, consistency_first_on=None): + # type: (int, int, bool, List[int]) -> None """ Initialize a retention policy :param nr_of_snapshots: Number of snapshots to keep over the configured number of days :type nr_of_snapshots: int :param nr_of_days: Number of days to account the number of snapshots for :type nr_of_days: int + :param consistency_first: Consistency of the snapshot is prioritized above the age + :type consistency_first: bool + :param consistency_first_on: Apply the consistency first on the snapsnot numbers given + :type consistency_first_on: List[int] """ + if consistency_first_on is None: + consistency_first_on = [] + self.nr_of_snapshots = nr_of_snapshots self.nr_of_days = nr_of_days + self.consistency_first = consistency_first + self.consistency_first_on = consistency_first_on @classmethod def from_configuration(cls, configuration): @@ -81,10 +90,11 @@ def __str__(self): class Bucket(object): - def __init__(self, start, end): + def __init__(self, start, end, retention_policy=None): self.start = start self.end = end self.snapshots = [] + self.retention_policy = retention_policy def is_snapshot_in_interval(self, snapshot): # type: (Snapshot) -> bool @@ -94,31 +104,37 @@ def try_add_snapshot(self, snapshot): if self.is_snapshot_in_interval(snapshot): self.snapshots.append(snapshot) - def get_obsolete_snapshots(self, consistency_first=False): - # type: (bool) -> List[Snapshot] + def get_obsolete_snapshots(self, consistency_first=False, bucket_count=0): + # type: (bool, int) -> List[Snapshot] """ Retrieve all snapshots which are no longer within this interval - :param consistency_first: Consistency of the snapshot is priortized above the age + :param consistency_first: Consistency of the snapshot is prioritized above the age :type consistency_first: bool + :param bucket_count: Number of the bucket in the chain. Used to determine if the current snapshot must be consistent + :type bucket_count: int :return: List with Snapshots :rtype: List[Snapshot] """ + _ = consistency_first + if not self.end: # No end date for the interval, every snapshot is obsolete return self.snapshots - if consistency_first: - best = None - for snapshot in self.snapshots: - if best is None: - best = snapshot - # Consistent is better than inconsistent - elif snapshot.consistent and not best.consistent: - best = snapshot - # Newer (larger timestamp) is better than older snapshots - elif snapshot.consistent == best.consistent and snapshot.timestamp > best.timestamp: - best = snapshot - return [s for s in self.snapshots if s.timestamp != best.timestamp] + if self.retention_policy.consistency_first: + # Using + 1 as snapshot provided in the consistency_first_on are > 0 + if self.retention_policy.consistency_first_on and bucket_count + 1 in self.retention_policy.consistency_first_on: + best = None + for snapshot in self.snapshots: + if best is None: + best = snapshot + # Consistent is better than inconsistent + elif snapshot.consistent and not best.consistent: + best = snapshot + # Newer (larger timestamp) is better than older snapshots + elif snapshot.consistent == best.consistent and snapshot.timestamp > best.timestamp: + best = snapshot + return [s for s in self.snapshots if s.timestamp != best.timestamp] # First the oldest snapshot and remove all younger ones oldest = None for snapshot in self.snapshots: @@ -161,9 +177,13 @@ def get_retention_policy(): Retrieve the globally configured retention policy """ # @todo retrieve the config path - return RetentionPolicy.from_configuration([{'nr_of_snapshots': 24, 'nr_of_days': 1}, # One per hour - {'nr_of_snapshots': 6, 'nr_of_days': 6}, # one per day for rest of the week - {'nr_of_snapshots': 3, 'nr_of_days': 21}]) # One per week for the rest of the week + return RetentionPolicy.from_configuration([ + # One per hour + {'nr_of_snapshots': 24, 'nr_of_days': 1}, + # one per day for rest of the week and opt for a consistent snapshot for the first day + {'nr_of_snapshots': 6, 'nr_of_days': 6, 'consistency_first': True, 'consistency_first_on': [1]}, + # One per week for the rest of the week + {'nr_of_snapshots': 3, 'nr_of_days': 21}]) @classmethod def get_retention_policies_for_vpools(cls): @@ -234,7 +254,8 @@ def _get_snapshot_buckets(cls, start_time, policies): snapshot_timedelta = number_of_days * day_delta / number_of_snapshots for i in xrange(0, number_of_snapshots): buckets.append(Bucket(start=cls.make_timestamp(start_time, offset + snapshot_timedelta * i), - end=cls.make_timestamp(start_time, offset + snapshot_timedelta * (i + 1)))) + end=cls.make_timestamp(start_time, offset + snapshot_timedelta * (i + 1)), + retention_policy=policy)) processed_retention_days += number_of_days offset = processed_retention_days * day_delta # Always add a bucket which falls out of the configured retention @@ -299,8 +320,8 @@ def delete_snapshots(self, timestamp=None): for index, bucket_chain in enumerate(bucket_chains): # @todo this consistency first behaviour changed with the new implementation # There are now buckets based on hourly intervals which means the consistency of the first day is not guaranteed (unless the config is specified that way) - consistency_first = index == 0 + # consistency_first = index == 0 for bucket in bucket_chain: - obsolete_snapshots = bucket.get_obsolete_snapshots(consistency_first) + obsolete_snapshots = bucket.get_obsolete_snapshots(index) for snapshot in obsolete_snapshots: VDiskController.delete_snapshot(vdisk_guid=snapshot.vdisk_guid, snapshot_id=snapshot.guid) From e83b1288afd1f3a1fe0e8c4d3abe7e3077b81438 Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Mon, 25 Mar 2019 17:47:51 +0100 Subject: [PATCH 07/18] Add documentation Skip the first 24hours when scheduling the task to allow the user to create and use snapshots --- docs/scheduledtasks.md | 4 +- docs/snapshots.md | 58 +++ ovs/constants/vdisk.py | 8 + ovs/lib/generic.py | 21 +- ovs/lib/helpers/generic/snapshots.py | 105 +++-- ovs/lib/tests/generic_tests/test_snapshot.py | 468 +++++++------------ 6 files changed, 317 insertions(+), 347 deletions(-) create mode 100644 docs/snapshots.md diff --git a/docs/scheduledtasks.md b/docs/scheduledtasks.md index bf81259ff..cec231756 100644 --- a/docs/scheduledtasks.md +++ b/docs/scheduledtasks.md @@ -16,8 +16,8 @@ Example configuration: ``` -To disblale the automatic scrubbing job add `"ovs.generic.execute_scrub": null` to the JSON object. -In case you want to change the schedule for the ALBA backend verifictaion process which checks the state of each object in the backend, add `"alba.verify_namespaces": {"minute": "0", "hour": "0", "month_of_year": "*/X"}` where X is the amount of months between each run. +To disable the automatic scrubbing job add `"ovs.generic.execute_scrub": null` to the JSON object. +In case you want to change the schedule for the ALBA backend verification process which checks the state of each object in the backend, add `"alba.verify_namespaces": {"minute": "0", "hour": "0", "month_of_year": "*/X"}` where X is the amount of months between each run. In case the configuration cannot be parsed at all (e.g. invalid JSON), the code will fallback to the hardcoded schedule. If the crontab arguments are invalid (e.g. they contain an unsupported key) the task will be disabled. diff --git a/docs/snapshots.md b/docs/snapshots.md new file mode 100644 index 000000000..0ceedbdbf --- /dev/null +++ b/docs/snapshots.md @@ -0,0 +1,58 @@ +### Snapshot management +The Framework will, by default, create snapshots of every vDisk every hour +(can be adjusted. See docs/scheduledtasks.md). + +To keep the snapshots manageable overtime, the Framework schedules a clean-up every day to enforce a retention policy. +This automatic task will: +- Create an overview of the all the snapshots for every volume +- Skip the first 24 hours (allows the user to create as many snaphots as he wants daily) +- Enforce the retention policy + +The default retention policy is: +- an hourly snapshot is kept for yesterday +- a single snapshot is kept for the 6 days after that + - Prioritizes consistent snapshots over older ones for the first day in the policy + (which is 2 days back, starting from now) +- A single snapshot is kept for the 2nd, 3rd and 4th week to have a single snapshot of the week for the first month +- All older snapshots are discarded + +#### Configuring the retention policy +A retention policy can be configured so the scheduled task will enforce a different one from the default. + +It can be customized on: +- Global level, enforces the policy to all vDisks within the cluster +- VPool level, overrides the global level, enforce to all vDisks within the vPool +- VDisk level, overrides the global and vPool level, enforce to this vDisk only + +The notation of the policy is a list containing policies. A policies consists minimally of `nr_of_snapshots`, which +is the the number of snapshots to have over the given `nr_of_days`, and `nr_of_days` which is the number of days to span +the `nr_of_snapshots` over. This notation allows for some fine grained control while also being easy to configure. + +There are two additional options available: `consistency_first` +which indicates that: +- this policy has to search for the oldest consistent snapshot instead of oldest one +- When no consistent snapshot was found, find the oldest snapshot + +If a policy interval spans multiple days, the `consistency_first_on` can be configured to narrow the days down +to apply the `consistency_first` rules +This options takes in a list of day numbers. + +If we were to write out the default retention policy, it would look like: +``` +[# One per hour + {'nr_of_snapshots': 24, 'nr_of_days': 1}, + # one per day for rest of the week and opt for a consistent snapshot for the first day + {'nr_of_snapshots': 6, 'nr_of_days': 6, 'consistency_first': True, 'consistency_first_on': [1]}, + # One per week for the rest of the month + {'nr_of_snapshots': 3, 'nr_of_days': 21}] +``` + +Configuring it on different levels can be done using the API: +- Global level: <> +- vPool level: <> +- vDisk level: <> + +Example: +``` +Insert +``` \ No newline at end of file diff --git a/ovs/constants/vdisk.py b/ovs/constants/vdisk.py index fe9169073..f8a988166 100644 --- a/ovs/constants/vdisk.py +++ b/ovs/constants/vdisk.py @@ -23,3 +23,11 @@ # Scrub related SCRUB_VDISK_LOCK = '{0}_{{0}}'.format(LOCK_NAMESPACE) # Second format is the vdisk guid + +# Snapshot related +# Note: the scheduled task will always skip the first 24 hours before enforcing the policy +SNAPSHOT_POLICY_DEFAULT = [{'nr_of_snapshots': 24, 'nr_of_days': 1}, # One per hour + # one per day for rest of the week and opt for a consistent snapshot for the first day + {'nr_of_snapshots': 6, 'nr_of_days': 6, 'consistency_first': True, 'consistency_first_on': [1]}, + # One per week for the rest of the month + {'nr_of_snapshots': 3, 'nr_of_days': 21}] diff --git a/ovs/lib/generic.py b/ovs/lib/generic.py index 9882322fc..d04bbb8dd 100644 --- a/ovs/lib/generic.py +++ b/ovs/lib/generic.py @@ -20,15 +20,13 @@ import os import copy import time -from datetime import datetime, timedelta +from datetime import timedelta from threading import Thread -from time import mktime from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.lists.servicelist import ServiceList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vdisklist import VDiskList from ovs.extensions.db.arakooninstaller import ArakoonClusterConfig -from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient, UnableToConnectException from ovs.lib.helpers.decorators import ovs_task from ovs.lib.helpers.generic.scrubber import Scrubber @@ -75,22 +73,29 @@ def snapshot_all_vdisks(): @staticmethod @ovs_task(name='ovs.generic.delete_snapshots', schedule=Schedule(minute='1', hour='2'), ensure_single_info={'mode': 'DEFAULT'}) def delete_snapshots(timestamp=None): + # type: (float) -> Dict[str, List[str]] """ Delete snapshots & scrubbing policy - Implemented delete snapshot policy: + Implemented default delete snapshot policy: < 1d | 1d bucket | 1 | best of bucket | 1d < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m > 1m | delete - - :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used + The configured policy can differ from this one. + :param timestamp: Timestamp to determine whether snapshots should be kept or not, + if none provided, the current timestamp - 1 day is used. + The scheduled task will not remove snapshots of the current day this way! :type timestamp: float - :return: None + :return: Dict with vdisk guid as key, deleted snapshot ids as value + :rtype: dict """ + if timestamp is None: + timestamp = time.time() - timedelta(1).total_seconds() + GenericController._logger.info('Delete snapshots started') snapshot_manager = SnapshotManager() - snapshot_manager.delete_snapshots(timestamp) + return snapshot_manager.delete_snapshots(timestamp) @staticmethod @ovs_task(name='ovs.generic.execute_scrub', schedule=Schedule(minute='0', hour='3'), ensure_single_info={'mode': 'DEDUPED'}) diff --git a/ovs/lib/helpers/generic/snapshots.py b/ovs/lib/helpers/generic/snapshots.py index 66f4258c3..fa889f540 100644 --- a/ovs/lib/helpers/generic/snapshots.py +++ b/ovs/lib/helpers/generic/snapshots.py @@ -16,6 +16,7 @@ import time from datetime import datetime, timedelta +from ovs.constants.vdisk import SNAPSHOT_POLICY_DEFAULT from ovs.dal.hybrids.vdisk import VDisk from ovs.dal.hybrids.vpool import VPool from ovs.dal.lists.vpoollist import VPoolList @@ -23,6 +24,8 @@ from ovs.lib.vdisk import VDiskController from ovs.log.log_handler import LogHandler +_logger = LogHandler.get('lib', name='generic tasks') + class RetentionPolicy(object): def __init__(self, nr_of_snapshots, nr_of_days, consistency_first=False, consistency_first_on=None): @@ -69,12 +72,10 @@ def from_configuration(cls, configuration): class Snapshot(object): - def __init__(self, guid, timestamp, label, is_consistent, is_automatic, is_sticky, in_backend, stored, vdisk_guid=None, *args, **kwargs): + def __init__(self, guid, timestamp, label, is_consistent, is_automatic, is_sticky, in_backend, stored, vdisk_guid, *args, **kwargs): """ Initialize a snapshot object """ - if not vdisk_guid: - raise ValueError('Add the guid of the vdisk to the constructor') self.guid = guid self.timestamp = int(timestamp) self.label = label @@ -86,7 +87,9 @@ def __init__(self, guid, timestamp, label, is_consistent, is_automatic, is_stick self.vdisk_guid = vdisk_guid def __str__(self): - return 'Snapshot for vDisk {0}'.format(self.vdisk_guid) + prop_strings = ['{}: {}'.format(prop, val) for prop, val in vars(self).iteritems()] + prop_strings.append('humanized timestamp: {}'.format(datetime.fromtimestamp(self.timestamp).strftime('%Y-%m-%d %H:%M'))) + return 'Snapshot for vDisk {0} ({1})'.format(self.vdisk_guid, ', '.join(prop_strings)) class Bucket(object): @@ -117,43 +120,51 @@ def get_obsolete_snapshots(self, consistency_first=False, bucket_count=0): """ _ = consistency_first - if not self.end: - # No end date for the interval, every snapshot is obsolete - return self.snapshots - - if self.retention_policy.consistency_first: - # Using + 1 as snapshot provided in the consistency_first_on are > 0 - if self.retention_policy.consistency_first_on and bucket_count + 1 in self.retention_policy.consistency_first_on: - best = None + obsolete_snapshots = None + if self.end: + snapshot_to_keep = None + if self.retention_policy.consistency_first: + # Using + 1 as snapshot provided in the consistency_first_on are > 0 + if self.retention_policy.consistency_first_on and bucket_count + 1 in self.retention_policy.consistency_first_on: + best = None + for snapshot in self.snapshots: + if best is None: + best = snapshot + # Consistent is better than inconsistent + elif snapshot.consistent and not best.consistent: + best = snapshot + # Newer (larger timestamp) is better than older snapshots + elif snapshot.consistent == best.consistent and snapshot.timestamp > best.timestamp: + best = snapshot + snapshot_to_keep = best + if not snapshot_to_keep: + # First the oldest snapshot and remove all younger ones + oldest = None for snapshot in self.snapshots: - if best is None: - best = snapshot - # Consistent is better than inconsistent - elif snapshot.consistent and not best.consistent: - best = snapshot - # Newer (larger timestamp) is better than older snapshots - elif snapshot.consistent == best.consistent and snapshot.timestamp > best.timestamp: - best = snapshot - return [s for s in self.snapshots if s.timestamp != best.timestamp] - # First the oldest snapshot and remove all younger ones - oldest = None - for snapshot in self.snapshots: - if oldest is None: - oldest = snapshot - # Older (smaller timestamp) is the one we want to keep - elif snapshot.timestamp < oldest.timestamp: - oldest = snapshot - return [s for s in self.snapshots if s.timestamp != oldest.timestamp] + if oldest is None: + oldest = snapshot + # Older (smaller timestamp) is the one we want to keep + elif snapshot.timestamp < oldest.timestamp: + oldest = snapshot + snapshot_to_keep = oldest + _logger.info('Elected {} as the snapshot to keep within {}.'.format(snapshot_to_keep, self)) + obsolete_snapshots = [s for s in self.snapshots if s.timestamp != snapshot_to_keep.timestamp] + else: + # No end date for the interval, every snapshot is obsolete + obsolete_snapshots = self.snapshots + _logger.info('Marking {} as obsolete within {} ({} in total)'.format(', '.join([str(s) for s in obsolete_snapshots]), self, len(obsolete_snapshots))) + return obsolete_snapshots def __str__(self): - return 'Bucket (start: {0}, end: {1}) with {2}'.format(self.start, self.end, self.snapshots) + humanized_start = datetime.fromtimestamp(self.start).strftime('%Y-%m-%d %H:%M') + humanized_end = datetime.fromtimestamp(self.end).strftime('%Y-%m-%d %H:%M') if self.end else self.end + return 'Bucket (start: {0}, end: {1}) with {2}'.format(humanized_start, humanized_end, self.snapshots) class SnapshotManager(object): """ Manages snapshots of all vdisks """ - _logger = LogHandler.get('lib', name='generic tasks') def __init__(self): self.global_policy = self.get_retention_policy() @@ -177,13 +188,7 @@ def get_retention_policy(): Retrieve the globally configured retention policy """ # @todo retrieve the config path - return RetentionPolicy.from_configuration([ - # One per hour - {'nr_of_snapshots': 24, 'nr_of_days': 1}, - # one per day for rest of the week and opt for a consistent snapshot for the first day - {'nr_of_snapshots': 6, 'nr_of_days': 6, 'consistency_first': True, 'consistency_first_on': [1]}, - # One per week for the rest of the week - {'nr_of_snapshots': 3, 'nr_of_days': 21}]) + return RetentionPolicy.from_configuration(SNAPSHOT_POLICY_DEFAULT) @classmethod def get_retention_policies_for_vpools(cls): @@ -272,23 +277,22 @@ def is_vdisk_running(vdisk): """ return vdisk.info['object_type'] in ['BASE'] - def delete_snapshots(self, timestamp=None): + def delete_snapshots(self, timestamp): + # type: (float) -> Dict[str, List[str]] """ Delete snapshots & scrubbing policy - Implemented delete snapshot policy: + Implemented default delete snapshot policy: < 1d | 1d bucket | 1 | best of bucket | 1d < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m > 1m | delete - :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used + :param timestamp: Timestamp to determine whether snapshots should be kept or not :type timestamp: float - :return: None + :return: Dict with vdisk guid as key, deleted snapshot ids as value + :rtype: dict """ - if timestamp is None: - timestamp = time.time() - # @todo think about backwards compatibility. The previous code would not account for the first day start_time = datetime.fromtimestamp(timestamp) @@ -310,18 +314,23 @@ def delete_snapshots(self, timestamp=None): if snapshot.is_sticky: continue if snapshot.vdisk_guid in parent_snapshots: - self._logger.info('Not deleting snapshot {0} because it has clones'.format(snapshot.vdisk_guid)) + _logger.info('Not deleting snapshot {0} because it has clones'.format(snapshot.vdisk_guid)) continue for bucket in bucket_chain: bucket.try_add_snapshot(snapshot) bucket_chains.append(bucket_chain) # Delete obsolete snapshots + removed_snapshot_map = {} for index, bucket_chain in enumerate(bucket_chains): # @todo this consistency first behaviour changed with the new implementation # There are now buckets based on hourly intervals which means the consistency of the first day is not guaranteed (unless the config is specified that way) # consistency_first = index == 0 for bucket in bucket_chain: - obsolete_snapshots = bucket.get_obsolete_snapshots(index) + obsolete_snapshots = bucket.get_obsolete_snapshots(False, index) for snapshot in obsolete_snapshots: + deleted_snapshots = removed_snapshot_map.get(snapshot.vdisk_guid, []) VDiskController.delete_snapshot(vdisk_guid=snapshot.vdisk_guid, snapshot_id=snapshot.guid) + deleted_snapshots.append(snapshot.guid) + removed_snapshot_map[snapshot.vdisk_guid] = deleted_snapshots + return removed_snapshot_map diff --git a/ovs/lib/tests/generic_tests/test_snapshot.py b/ovs/lib/tests/generic_tests/test_snapshot.py index df71e033a..45696a411 100644 --- a/ovs/lib/tests/generic_tests/test_snapshot.py +++ b/ovs/lib/tests/generic_tests/test_snapshot.py @@ -21,20 +21,27 @@ import time import datetime import unittest +from ovs.dal.hybrids.vdisk import VDisk from ovs.dal.tests.helpers import DalHelper from ovs.lib.generic import GenericController from ovs.lib.vdisk import VDiskController +MINUTE = 60 +HOUR = MINUTE * 60 +DAY = datetime.timedelta(1) + class SnapshotTestCase(unittest.TestCase): """ - This test class will validate the various scenarios of the Generic logic + Test the scheduling of snapshot creation and the enforced retention policy Actual snapshot logic is tested in the vdisk_tests.test_snapshot """ + def setUp(self): """ (Re)Sets the stores on every test """ + # self.debug = True self.volatile, self.persistent = DalHelper.setup() def tearDown(self): @@ -95,11 +102,9 @@ def test_clone_snapshot(self): [dynamic for dynamic in vdisk_1._dynamics if dynamic.name == 'snapshots'][0].timeout = 0 base = datetime.datetime.now().date() - base_timestamp = self._make_timestamp(base, datetime.timedelta(1)) - minute = 60 - hour = minute * 60 + base_timestamp = self._make_timestamp(base, DAY) for h in [6, 12, 18]: - timestamp = base_timestamp + (hour * h) + timestamp = base_timestamp + (HOUR * h) VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, metadata={'label': 'snapshot_{0}:30'.format(str(h)), 'is_consistent': True, @@ -113,23 +118,25 @@ def test_clone_snapshot(self): clone_vdisk.save() for day in range(10): - base_timestamp = self._make_timestamp(base, datetime.timedelta(1) * day) + base_timestamp = self._make_timestamp(base, DAY * day) for h in [6, 12, 18]: - timestamp = base_timestamp + (hour * h) + timestamp = base_timestamp + (HOUR * h) VDiskController.create_snapshot(vdisk_guid=clone_vdisk.guid, metadata={'label': 'snapshot_{0}:30'.format(str(h)), 'is_consistent': True, 'timestamp': str(timestamp)}) - base_timestamp = self._make_timestamp(base, datetime.timedelta(1) * 2) - GenericController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) + base_timestamp = self._make_timestamp(base, DAY * 2) + GenericController.delete_snapshots(timestamp=base_timestamp + (MINUTE * 30)) self.assertIn(base_snapshot_guid, vdisk_1.snapshot_ids, 'Snapshot was deleted while there are still clones of it') - - def test_snapshot_automatic_consistent(self): + + @staticmethod + def _build_get_built_vdisk(): + # type: () -> VDisk """ - is_automatic: True, is_consistent: True --> Automatically created consistent snapshots should be deleted + Build the DAL structure and retrieve the vdisk + :return: VDisk object + :rtype: VDisk """ - minute = 60 - hour = minute * 60 structure = DalHelper.build_dal_structure( {'vpools': [1], 'vdisks': [(1, 1, 1, 1)], # (, , , ) @@ -137,271 +144,132 @@ def test_snapshot_automatic_consistent(self): 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)]} # (, , ) ) - base = datetime.datetime.now().date() - vdisk_1 = structure['vdisks'][1] + return structure['vdisks'][1] - label = 'c' - # Extra time to add to the hourly timestamps - additional_time = minute * 30 - # Hours to create a snapshot on - sticky_hours = [] - consistent_hours = [2] - inconsistent_hours = [] + def _create_validate_snapshots(self, vdisk, start_time, sticky_hours, consistent_hours, inconsistent_hours, + snapshot_time_offset=0, automatic_snapshots=True, number_of_days=35): + # type: (VDisk, datetime.date, List[int], List[int], List[int], int, bool, int) -> None + """ + Create and validate snapshot creation and deletion sequence + This is suitable to enforce the default policy which is: + > 1d | day 1 bucket | 24 | oldest of bucket | 24h + < 1d | 1d bucket | 1 | best of bucket | 1d + < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w + < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m + > 1m | delete + :param vdisk: VDisk to validate + :type vdisk: VDisk + :param start_time: Time when snapshots started to be made + :type start_time: datetime.datetime + :param sticky_hours: Hours that the sticky snapshots were made on + :type sticky_hours: List[int] + :param consistent_hours: Hours that the consistent snapshots were made on + :type consistent_hours: List[int] + :param inconsistent_hours: Hours that the inconsistent snapshots were made on + :type inconsistent_hours: List[int] + :param snapshot_time_offset: Offset time to create snapshot. Defaults to creating snapshot on the hour mark + :type snapshot_time_offset: int + :param automatic_snapshots: Indicate that the snapshots are made automatically (because of the scheduling) + :type automatic_snapshots: bool + """ # Snapshot details is_sticky = len(sticky_hours) > 0 is_consistent = len(consistent_hours) > 0 - is_automatic = True + label = 'c' if is_consistent else 'i' - for day in xrange(35): - base_timestamp = self._make_timestamp(base, datetime.timedelta(1) * day) + for day in xrange(number_of_days): + base_timestamp = self._make_timestamp(start_time, DAY * day) self._print_message('') self._print_message('Day cycle: {0}: {1}'.format(day, datetime.datetime.fromtimestamp(base_timestamp).strftime('%Y-%m-%d'))) self._print_message('- Deleting snapshots') - GenericController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) + # The absolute timestamp is used when providing one. Going back a day to skip a day similar to the scheduled task + delete_snapshot_timestamp = base_timestamp + (MINUTE * 30) - DAY.total_seconds() + GenericController.delete_snapshots(timestamp=delete_snapshot_timestamp) - self._validate(vdisk=vdisk_1, + self._validate(vdisk=vdisk, current_day=day, - base_date=base, + start_time=start_time, sticky_hours=sticky_hours, consistent_hours=consistent_hours, inconsistent_hours=inconsistent_hours) self._print_message('- Creating snapshots') for x in consistent_hours + inconsistent_hours: - timestamp = base_timestamp + (hour * x) + additional_time - VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, + timestamp = base_timestamp + (HOUR * x) + snapshot_time_offset + VDiskController.create_snapshot(vdisk_guid=vdisk.guid, metadata={'label': 'ss_{0}_{1}:00'.format(label, x), 'is_sticky': is_sticky, 'timestamp': str(timestamp), - 'is_automatic': is_automatic, + 'is_automatic': automatic_snapshots, 'is_consistent': is_consistent}) + def test_snapshot_automatic_consistent(self): + """ + is_automatic: True, is_consistent: True --> Automatically created consistent snapshots should be deleted + """ + self._create_validate_snapshots(vdisk=self._build_get_built_vdisk(), + start_time=datetime.datetime.now().date(), + sticky_hours=[], + consistent_hours=[2], + inconsistent_hours=[], + snapshot_time_offset=MINUTE * 30, # Extra time to add to the hourly timestamps + automatic_snapshots=True) + def test_snapshot_automatic_not_consistent(self): """ is_automatic: True, is_consistent: False --> Automatically created non-consistent snapshots should be deleted """ - minute = 60 - hour = minute * 60 - structure = DalHelper.build_dal_structure( - {'vpools': [1], - 'vdisks': [(1, 1, 1, 1)], # (, , , ) - 'mds_services': [(1, 1)], - 'storagerouters': [1], - 'storagedrivers': [(1, 1, 1)]} # (, , ) - ) - base = datetime.datetime.now().date() - vdisk_1 = structure['vdisks'][1] - - label = 'i' - # Extra time to add to the hourly timestamps - additional_time = 0 - # Hours to create a snapshot on - sticky_hours = [] - consistent_hours = [] - inconsistent_hours = [2] - # Snapshot details - is_sticky = len(sticky_hours) > 0 - is_consistent = len(consistent_hours) > 0 - is_automatic = True - - for day in xrange(35): - base_timestamp = self._make_timestamp(base, datetime.timedelta(1) * day) - self._print_message('') - self._print_message('Day cycle: {0}: {1}'.format(day, datetime.datetime.fromtimestamp(base_timestamp).strftime('%Y-%m-%d'))) - - self._print_message('- Deleting snapshots') - GenericController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) - - self._validate(vdisk=vdisk_1, - current_day=day, - base_date=base, - sticky_hours=sticky_hours, - consistent_hours=consistent_hours, - inconsistent_hours=inconsistent_hours) - - self._print_message('- Creating snapshots') - for x in consistent_hours + inconsistent_hours: - timestamp = base_timestamp + (hour * x) + additional_time - VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, - metadata={'label': 'ss_{0}_{1}:00'.format(label, x), - 'is_sticky': is_sticky, - 'timestamp': str(timestamp), - 'is_automatic': is_automatic, - 'is_consistent': is_consistent}) + self._create_validate_snapshots(vdisk=self._build_get_built_vdisk(), + start_time=datetime.datetime.now().date(), + sticky_hours=[], + consistent_hours=[], + inconsistent_hours=[2], + snapshot_time_offset=0, # Extra time to add to the hourly timestamps + automatic_snapshots=True) def test_snapshot_non_automatic_consistent(self): """ is_automatic: False, is_consistent: True --> Manually created consistent snapshots should be deleted """ - minute = 60 - hour = minute * 60 - structure = DalHelper.build_dal_structure( - {'vpools': [1], - 'vdisks': [(1, 1, 1, 1)], # (, , , ) - 'mds_services': [(1, 1)], - 'storagerouters': [1], - 'storagedrivers': [(1, 1, 1)]} # (, , ) - ) - base = datetime.datetime.now().date() - vdisk_1 = structure['vdisks'][1] - - label = 'c' - # Extra time to add to the hourly timestamps - additional_time = minute * 30 - # Hours to create a snapshot on - sticky_hours = [] - consistent_hours = [2] - inconsistent_hours = [] - # Snapshot details - is_sticky = len(sticky_hours) > 0 - is_consistent = len(consistent_hours) > 0 - is_automatic = False - - for day in xrange(35): - base_timestamp = self._make_timestamp(base, datetime.timedelta(1) * day) - self._print_message('') - self._print_message('Day cycle: {0}: {1}'.format(day, datetime.datetime.fromtimestamp(base_timestamp).strftime('%Y-%m-%d'))) - - self._print_message('- Deleting snapshots') - GenericController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) - - self._validate(vdisk=vdisk_1, - current_day=day, - base_date=base, - sticky_hours=sticky_hours, - consistent_hours=consistent_hours, - inconsistent_hours=inconsistent_hours) - - self._print_message('- Creating snapshots') - for x in consistent_hours + inconsistent_hours: - timestamp = base_timestamp + (hour * x) + additional_time - VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, - metadata={'label': 'ss_{0}_{1}:00'.format(label, x), - 'is_sticky': is_sticky, - 'timestamp': str(timestamp), - 'is_automatic': is_automatic, - 'is_consistent': is_consistent}) + self._create_validate_snapshots(vdisk=self._build_get_built_vdisk(), + start_time=datetime.datetime.now().date(), + sticky_hours=[], + consistent_hours=[2], + inconsistent_hours=[], + snapshot_time_offset=MINUTE * 30, # Extra time to add to the hourly timestamps + automatic_snapshots=False) def test_snapshot_not_automatic_not_consistent(self): """ is_automatic: False, is_consistent: False --> Manually created non-consistent snapshots should be deleted """ - minute = 60 - hour = minute * 60 - structure = DalHelper.build_dal_structure( - {'vpools': [1], - 'vdisks': [(1, 1, 1, 1)], # (, , , ) - 'mds_services': [(1, 1)], - 'storagerouters': [1], - 'storagedrivers': [(1, 1, 1)]} # (, , ) - ) - base = datetime.datetime.now().date() - vdisk_1 = structure['vdisks'][1] - - label = 'i' - # Extra time to add to the hourly timestamps - additional_time = 0 - # Hours to create a snapshot on - sticky_hours = [] - consistent_hours = [] - inconsistent_hours = [2] - # Snapshot details - is_sticky = len(sticky_hours) > 0 - is_consistent = len(consistent_hours) > 0 - is_automatic = False - - for day in xrange(35): - base_timestamp = self._make_timestamp(base, datetime.timedelta(1) * day) - self._print_message('') - self._print_message('Day cycle: {0}: {1}'.format(day, datetime.datetime.fromtimestamp(base_timestamp).strftime('%Y-%m-%d'))) - - self._print_message('- Deleting snapshots') - GenericController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) - - self._validate(vdisk=vdisk_1, - current_day=day, - base_date=base, - sticky_hours=sticky_hours, - consistent_hours=consistent_hours, - inconsistent_hours=inconsistent_hours) - - self._print_message('- Creating snapshots') - for x in consistent_hours + inconsistent_hours: - timestamp = base_timestamp + (hour * x) + additional_time - VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, - metadata={'label': 'ss_{0}_{1}:00'.format(label, x), - 'is_sticky': is_sticky, - 'timestamp': str(timestamp), - 'is_automatic': is_automatic, - 'is_consistent': is_consistent}) + self._create_validate_snapshots(vdisk=self._build_get_built_vdisk(), + start_time=datetime.datetime.now().date(), + sticky_hours=[], + consistent_hours=[], + inconsistent_hours=[2], + snapshot_time_offset=0, # Extra time to add to the hourly timestamps + automatic_snapshots=False) def test_snapshot_sticky(self): """ is_sticky: True --> Sticky snapshots of any kind should never be deleted (Only possible to delete manually) """ - minute = 60 - hour = minute * 60 - structure = DalHelper.build_dal_structure( - {'vpools': [1], - 'vdisks': [(1, 1, 1, 1)], # (, , , ) - 'mds_services': [(1, 1)], - 'storagerouters': [1], - 'storagedrivers': [(1, 1, 1)]} # (, , ) - ) - base = datetime.datetime.now().date() - vdisk_1 = structure['vdisks'][1] - - label = 'c' - # Extra time to add to the hourly timestamps - additional_time = minute * 30 - # Hours to create a snapshot on - sticky_hours = [2] - consistent_hours = [2] - inconsistent_hours = [] - # Snapshot details - is_sticky = len(sticky_hours) > 0 - is_consistent = len(consistent_hours) > 0 - is_automatic = False - - for day in xrange(35): - base_timestamp = self._make_timestamp(base, datetime.timedelta(1) * day) - self._print_message('') - self._print_message('Day cycle: {0}: {1}'.format(day, datetime.datetime.fromtimestamp(base_timestamp).strftime('%Y-%m-%d'))) - - self._print_message('- Deleting snapshots') - GenericController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) - - self._validate(vdisk=vdisk_1, - current_day=day, - base_date=base, - sticky_hours=sticky_hours, - consistent_hours=consistent_hours, - inconsistent_hours=inconsistent_hours) - - self._print_message('- Creating snapshots') - for x in consistent_hours + inconsistent_hours: - timestamp = base_timestamp + (hour * x) + additional_time - VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, - metadata={'label': 'ss_{0}_{1}:00'.format(label, x), - 'is_sticky': is_sticky, - 'timestamp': str(timestamp), - 'is_automatic': is_automatic, - 'is_consistent': is_consistent}) + self._create_validate_snapshots(vdisk=self._build_get_built_vdisk(), + start_time=datetime.datetime.now().date(), + sticky_hours=[2], + consistent_hours=[2], + inconsistent_hours=[], + snapshot_time_offset=MINUTE * 30, # Extra time to add to the hourly timestamps + automatic_snapshots=False) def test_happypath(self): """ Validates the happy path; Hourly snapshots are taken with a few manual consistent every now and then. The delete policy is executed every day """ - structure = DalHelper.build_dal_structure( - {'vpools': [1], - 'vdisks': [(1, 1, 1, 1)], # (, , , ) - 'mds_services': [(1, 1)], - 'storagerouters': [1], - 'storagedrivers': [(1, 1, 1)]} # (, , ) - ) - vdisk_1 = structure['vdisks'][1] + vdisk_1 = self._build_get_built_vdisk() [dynamic for dynamic in vdisk_1._dynamics if dynamic.name == 'snapshots'][0].timeout = 0 # Run the testing scenario @@ -409,25 +277,23 @@ def test_happypath(self): if travis is True: self._print_message('Running in Travis, reducing output.') base = datetime.datetime.now().date() - minute = 60 - hour = minute * 60 consistent_hours = [6, 12, 18] inconsistent_hours = xrange(2, 23) for day in xrange(0, 35): - base_timestamp = self._make_timestamp(base, datetime.timedelta(1) * day) + base_timestamp = self._make_timestamp(base, DAY * day) self._print_message('') self._print_message('Day cycle: {0}: {1}'.format(day, datetime.datetime.fromtimestamp(base_timestamp).strftime('%Y-%m-%d'))) # At the start of the day, delete snapshot policy runs at 00:30 self._print_message('- Deleting snapshots') - GenericController.delete_snapshots(timestamp=base_timestamp + (minute * 30)) + GenericController.delete_snapshots(timestamp=base_timestamp + (MINUTE * 30)) # Validate snapshots self._print_message('- Validating snapshots') self._validate(vdisk=vdisk_1, current_day=day, - base_date=base, + start_time=base, sticky_hours=[], consistent_hours=consistent_hours, inconsistent_hours=inconsistent_hours) @@ -437,13 +303,13 @@ def test_happypath(self): # - Create consistent snapshot at 6:30, 12:30, 18:30 self._print_message('- Creating snapshots') for h in inconsistent_hours: - timestamp = base_timestamp + (hour * h) + timestamp = base_timestamp + (HOUR * h) VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, metadata={'label': 'ss_i_{0}:00'.format(str(h)), 'is_consistent': False, 'timestamp': str(timestamp)}) if h in consistent_hours: - ts = (timestamp + (minute * 30)) + ts = (timestamp + (MINUTE * 30)) VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, metadata={'label': 'ss_c_{0}:30'.format(str(h)), 'is_consistent': True, @@ -456,14 +322,53 @@ def _print_message(self, message): if self.debug is True: print message - def _validate(self, vdisk, current_day, base_date, sticky_hours, consistent_hours, inconsistent_hours): + def _visualise_snapshots(self, vdisk, current_day, start_time): + # type: (VDisk, int, datetime.datetime) -> None + """ + Visualize the snapshots of the VDisk + :param vdisk: VDisk object + :type vdisk: VDisk + :param current_day: Number of the current day + :type current_day: int + :param start_time: Time when snapshots started to be made + :type start_time: datetime.datetime + :return: + """ + snapshots = {} + for snapshot in vdisk.snapshots: + snapshots[int(snapshot['timestamp'])] = snapshot + for day in xrange(0, current_day + 1): + timestamp = self._make_timestamp(start_time, DAY * day) + visual = '\t\t- {0} '.format(datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d')) + for t in xrange(timestamp, timestamp + HOUR * 24, MINUTE * 30): + if t in snapshots: + visual += 'S' if snapshots[t]['is_sticky'] else 'C' if snapshots[t]['is_consistent'] else 'R' + else: + visual += '-' + self._print_message(visual) + + def _validate(self, vdisk, current_day, start_time, sticky_hours, consistent_hours, inconsistent_hours): + # type: (VDisk, int, datetime.date, List[int], List[int], List[int]) -> None """ This validates assumes the same policy as currently implemented in the policy code itself. In case the policy strategy ever changes, this unittest should be adapted as well or rewritten to load the implemented policy + :param vdisk: VDisk to validate + :type vdisk: VDisk + :param current_day: Number of the current day + :type current_day: int + :param start_time: Time when snapshots started to be made + :type start_time: datetime.date + :param sticky_hours: Hours that the sticky snapshots were made on + :type sticky_hours: List[int] + :param consistent_hours: Hours that the consistent snapshots were made on + :type consistent_hours: List[int] + :param inconsistent_hours: Hours that the inconsistent snapshots were made on + :type inconsistent_hours: List[int] """ # Implemented policy: + # > 1d | day 1 bucket | 24 | oldest of bucket | 24h # < 1d | 1d bucket | 1 | best of bucket | 1d # < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w # < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m @@ -472,62 +377,52 @@ def _validate(self, vdisk, current_day, base_date, sticky_hours, consistent_hour minute = 60 hour = minute * 60 - self._print_message(' - {0}'.format(vdisk.name)) + self._print_message('\t- VDisk {0}'.format(vdisk.name)) # Visualisation if self.debug is True: - snapshots = {} - for snapshot in vdisk.snapshots: - snapshots[int(snapshot['timestamp'])] = snapshot - for day in xrange(0, current_day + 1): - timestamp = self._make_timestamp(base_date, datetime.timedelta(1) * day) - visual = ' - {0} '.format(datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d')) - for t in xrange(timestamp, timestamp + hour * 24, minute * 30): - if t in snapshots: - visual += 'S' if snapshots[t]['is_sticky'] else 'C' if snapshots[t]['is_consistent'] else 'R' - else: - visual += '-' - self._print_message(visual) - - sticky = [int(s['timestamp']) for s in vdisk.snapshots if s['is_sticky'] is True] - consistent = [int(s['timestamp']) for s in vdisk.snapshots if s['is_consistent'] is True] - inconsistent = [int(s['timestamp']) for s in vdisk.snapshots if s['is_consistent'] is False] - self._print_message(' - {0} consistent, {1} inconsistent, {2} sticky'.format(len(consistent), len(inconsistent), len(sticky))) + self._visualise_snapshots(vdisk, current_day, start_time) + + sticky = [int(s['timestamp']) for s in vdisk.snapshots if s['is_sticky']] + consistent = [int(s['timestamp']) for s in vdisk.snapshots if s['is_consistent']] + inconsistent = [int(s['timestamp']) for s in vdisk.snapshots if not s['is_consistent']] + self._print_message('\t\t- {0} consistent, {1} inconsistent, {2} sticky'.format(len(consistent), len(inconsistent), len(sticky))) # Check for correct amount of snapshots - amount_sticky = len(sticky_hours) * current_day + amount_sticky = len(sticky_hours) * current_day # Stickies do not get removed automatically amount_consistent = 0 amount_inconsistent = 0 - pointer = 0 - if pointer < current_day: + processed_days = 0 + # First 24h period + if processed_days < current_day: amount_consistent += len(consistent_hours) amount_inconsistent += len(inconsistent_hours) - pointer += 1 - while pointer < current_day and pointer <= 7: + processed_days += 1 + # One consistent snapshot per day + while processed_days < current_day and processed_days <= 7: if len(consistent_hours) > 0: - amount_consistent += 1 # One consistent snapshot per day + amount_consistent += 1 else: amount_inconsistent += 1 - pointer += 1 - while pointer < current_day and pointer <= 28: + processed_days += 1 + # One consistent snapshot per week + while processed_days < current_day and processed_days <= 28: if len(consistent_hours) > 0: - amount_consistent += 1 # One consistent snapshot per week + amount_consistent += 1 else: amount_inconsistent += 1 - pointer += 7 + processed_days += 7 self.assertEqual(first=len(sticky), second=amount_sticky, msg='Wrong amount of sticky snapshots: {0} vs expected {1}'.format(len(sticky), amount_sticky)) - if len(sticky) == 0: - self.assertEqual(first=len(consistent), - second=amount_consistent, + if not sticky: + self.assertEqual(first=len(consistent), second=amount_consistent, msg='Wrong amount of consistent snapshots: {0} vs expected {1}'.format(len(consistent), amount_consistent)) - self.assertEqual(first=len(inconsistent), - second=amount_inconsistent, + self.assertEqual(first=len(inconsistent), second=amount_inconsistent, msg='Wrong amount of inconsistent snapshots: {0} vs expected {1}'.format(len(inconsistent), amount_inconsistent)) # Check of the correctness of the snapshot timestamp - if len(consistent_hours) > 0: + if consistent_hours: sn_type = 'consistent' container = consistent time_diff = (hour * consistent_hours[-1]) + (minute * 30) @@ -538,30 +433,25 @@ def _validate(self, vdisk, current_day, base_date, sticky_hours, consistent_hour for day in xrange(0, current_day): for h in sticky_hours: - timestamp = self._make_timestamp(base_date, datetime.timedelta(1) * day) + (hour * h) + (minute * 30) - self.assertIn(member=timestamp, - container=sticky, + timestamp = self._make_timestamp(start_time, DAY * day) + (hour * h) + (minute * 30) + self.assertIn(member=timestamp, container=sticky, msg='Expected sticky snapshot for {0} at {1}'.format(vdisk.name, self._from_timestamp(timestamp))) if day == (current_day - 1): for h in inconsistent_hours: - timestamp = self._make_timestamp(base_date, datetime.timedelta(1) * day) + (hour * h) - self.assertIn(member=timestamp, - container=inconsistent, + timestamp = self._make_timestamp(start_time, DAY * day) + (hour * h) + self.assertIn(member=timestamp, container=inconsistent, msg='Expected hourly inconsistent snapshot for {0} at {1}'.format(vdisk.name, self._from_timestamp(timestamp))) for h in consistent_hours: - timestamp = self._make_timestamp(base_date, datetime.timedelta(1) * day) + (hour * h) + (minute * 30) - self.assertIn(member=timestamp, - container=consistent, + timestamp = self._make_timestamp(start_time, DAY * day) + (hour * h) + (minute * 30) + self.assertIn(member=timestamp, container=consistent, msg='Expected random consistent snapshot for {0} at {1}'.format(vdisk.name, self._from_timestamp(timestamp))) elif day > (current_day - 7): - timestamp = self._make_timestamp(base_date, datetime.timedelta(1) * day) + time_diff - self.assertIn(member=timestamp, - container=container, + timestamp = self._make_timestamp(start_time, DAY * day) + time_diff + self.assertIn(member=timestamp, container=container, msg='Expected daily {0} snapshot for {1} at {2}'.format(sn_type, vdisk.name, self._from_timestamp(timestamp))) elif day % 7 == 0 and day > 28: - timestamp = self._make_timestamp(base_date, datetime.timedelta(1) * day) + time_diff - self.assertIn(member=timestamp, - container=container, + timestamp = self._make_timestamp(start_time, DAY * day) + time_diff + self.assertIn(member=timestamp, container=container, msg='Expected weekly {0} snapshot for {1} at {2}'.format(sn_type, vdisk.name, self._from_timestamp(timestamp))) @staticmethod From 6301d18974518bc5f53493931de79e52b769cf51 Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Tue, 26 Mar 2019 17:05:56 +0100 Subject: [PATCH 08/18] Default policy did not skip the first 24 hours Added documentation Extra checks added to the scheduled task to prevent the policy being enacted on a wrong timestamp --- docs/snapshots.md | 20 ++++++----- ovs/constants/vdisk.py | 5 ++- ovs/lib/generic.py | 5 +-- ovs/lib/helpers/generic/snapshots.py | 6 ++-- ovs/lib/tests/generic_tests/test_snapshot.py | 36 +++++++++----------- 5 files changed, 35 insertions(+), 37 deletions(-) diff --git a/docs/snapshots.md b/docs/snapshots.md index 0ceedbdbf..6581daf3c 100644 --- a/docs/snapshots.md +++ b/docs/snapshots.md @@ -9,8 +9,7 @@ This automatic task will: - Enforce the retention policy The default retention policy is: -- an hourly snapshot is kept for yesterday -- a single snapshot is kept for the 6 days after that +- a single snapshot is kept for the first 7 days after that - Prioritizes consistent snapshots over older ones for the first day in the policy (which is 2 days back, starting from now) - A single snapshot is kept for the 2nd, 3rd and 4th week to have a single snapshot of the week for the first month @@ -39,10 +38,8 @@ This options takes in a list of day numbers. If we were to write out the default retention policy, it would look like: ``` -[# One per hour - {'nr_of_snapshots': 24, 'nr_of_days': 1}, - # one per day for rest of the week and opt for a consistent snapshot for the first day - {'nr_of_snapshots': 6, 'nr_of_days': 6, 'consistency_first': True, 'consistency_first_on': [1]}, +[# one per day for the week and opt for a consistent snapshot for the first day + {'nr_of_snapshots': 7, 'nr_of_days': 7, 'consistency_first': True, 'consistency_first_on': [1]}, # One per week for the rest of the month {'nr_of_snapshots': 3, 'nr_of_days': 21}] ``` @@ -52,7 +49,12 @@ Configuring it on different levels can be done using the API: - vPool level: <> - vDisk level: <> -Example: +##### Examples: +# @todo more examples +The examples assume that a snapshot was taken each hour. + +I wish to keep hourly snapshots from the first week +``` +[{'nr_of_days': 7, # A week spans 7 days + 'nr_of_snapshots': 168}] # Keep 24 snapshot for every day for 7 days: 7 * 24 ``` -Insert -``` \ No newline at end of file diff --git a/ovs/constants/vdisk.py b/ovs/constants/vdisk.py index f8a988166..50495a8b4 100644 --- a/ovs/constants/vdisk.py +++ b/ovs/constants/vdisk.py @@ -26,8 +26,7 @@ # Snapshot related # Note: the scheduled task will always skip the first 24 hours before enforcing the policy -SNAPSHOT_POLICY_DEFAULT = [{'nr_of_snapshots': 24, 'nr_of_days': 1}, # One per hour - # one per day for rest of the week and opt for a consistent snapshot for the first day - {'nr_of_snapshots': 6, 'nr_of_days': 6, 'consistency_first': True, 'consistency_first_on': [1]}, +SNAPSHOT_POLICY_DEFAULT = [# one per day for rest of the week and opt for a consistent snapshot for the first day + {'nr_of_snapshots': 7, 'nr_of_days': 7, 'consistency_first': True, 'consistency_first_on': [1]}, # One per week for the rest of the month {'nr_of_snapshots': 3, 'nr_of_days': 21}] diff --git a/ovs/lib/generic.py b/ovs/lib/generic.py index d04bbb8dd..b2827b6d0 100644 --- a/ovs/lib/generic.py +++ b/ovs/lib/generic.py @@ -84,12 +84,13 @@ def delete_snapshots(timestamp=None): > 1m | delete The configured policy can differ from this one. :param timestamp: Timestamp to determine whether snapshots should be kept or not, - if none provided, the current timestamp - 1 day is used. + if none provided, the current timestamp - 1 day is used. Used in unittesting only! The scheduled task will not remove snapshots of the current day this way! :type timestamp: float - :return: Dict with vdisk guid as key, deleted snapshot ids as value + :return: Dict with vdisk guid as key, deleted snapshot ids as value :rtype: dict """ + assert os.environ.get('RUNNING_UNITTESTS') == 'True' and timestamp is not None, 'Providing a timestamp is only possible during unittests' if timestamp is None: timestamp = time.time() - timedelta(1).total_seconds() diff --git a/ovs/lib/helpers/generic/snapshots.py b/ovs/lib/helpers/generic/snapshots.py index fa889f540..8c886ce02 100644 --- a/ovs/lib/helpers/generic/snapshots.py +++ b/ovs/lib/helpers/generic/snapshots.py @@ -147,18 +147,18 @@ def get_obsolete_snapshots(self, consistency_first=False, bucket_count=0): elif snapshot.timestamp < oldest.timestamp: oldest = snapshot snapshot_to_keep = oldest - _logger.info('Elected {} as the snapshot to keep within {}.'.format(snapshot_to_keep, self)) + _logger.debug('Elected {} as the snapshot to keep within {}.'.format(snapshot_to_keep, self)) obsolete_snapshots = [s for s in self.snapshots if s.timestamp != snapshot_to_keep.timestamp] else: # No end date for the interval, every snapshot is obsolete obsolete_snapshots = self.snapshots - _logger.info('Marking {} as obsolete within {} ({} in total)'.format(', '.join([str(s) for s in obsolete_snapshots]), self, len(obsolete_snapshots))) + _logger.debug('Marking {} as obsolete within {} ({} in total)'.format(', '.join([str(s) for s in obsolete_snapshots]), self, len(obsolete_snapshots))) return obsolete_snapshots def __str__(self): humanized_start = datetime.fromtimestamp(self.start).strftime('%Y-%m-%d %H:%M') humanized_end = datetime.fromtimestamp(self.end).strftime('%Y-%m-%d %H:%M') if self.end else self.end - return 'Bucket (start: {0}, end: {1}) with {2}'.format(humanized_start, humanized_end, self.snapshots) + return 'Bucket (start: {0}, end: {1}) with [{2}]'.format(humanized_start, humanized_end, ','.join(str(s) for s in self.snapshots)) class SnapshotManager(object): diff --git a/ovs/lib/tests/generic_tests/test_snapshot.py b/ovs/lib/tests/generic_tests/test_snapshot.py index 45696a411..e490b09f1 100644 --- a/ovs/lib/tests/generic_tests/test_snapshot.py +++ b/ovs/lib/tests/generic_tests/test_snapshot.py @@ -17,7 +17,6 @@ """ Generic test module """ -import os import time import datetime import unittest @@ -41,7 +40,6 @@ def setUp(self): """ (Re)Sets the stores on every test """ - # self.debug = True self.volatile, self.persistent = DalHelper.setup() def tearDown(self): @@ -264,7 +262,7 @@ def test_snapshot_sticky(self): snapshot_time_offset=MINUTE * 30, # Extra time to add to the hourly timestamps automatic_snapshots=False) - def test_happypath(self): + def test_happy_path(self): """ Validates the happy path; Hourly snapshots are taken with a few manual consistent every now and then. The delete policy is executed every day @@ -273,9 +271,6 @@ def test_happypath(self): [dynamic for dynamic in vdisk_1._dynamics if dynamic.name == 'snapshots'][0].timeout = 0 # Run the testing scenario - travis = 'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true' - if travis is True: - self._print_message('Running in Travis, reducing output.') base = datetime.datetime.now().date() consistent_hours = [6, 12, 18] inconsistent_hours = xrange(2, 23) @@ -287,7 +282,7 @@ def test_happypath(self): # At the start of the day, delete snapshot policy runs at 00:30 self._print_message('- Deleting snapshots') - GenericController.delete_snapshots(timestamp=base_timestamp + (MINUTE * 30)) + GenericController.delete_snapshots(timestamp=base_timestamp + (MINUTE * 30) - DAY.total_seconds()) # Validate snapshots self._print_message('- Validating snapshots') @@ -304,16 +299,17 @@ def test_happypath(self): self._print_message('- Creating snapshots') for h in inconsistent_hours: timestamp = base_timestamp + (HOUR * h) - VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, - metadata={'label': 'ss_i_{0}:00'.format(str(h)), - 'is_consistent': False, - 'timestamp': str(timestamp)}) + snapshot_id = VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, + metadata={'label': 'ss_i_{0}:00'.format(str(h)), + 'is_consistent': False, + 'timestamp': str(timestamp)}) + self._print_message('- Created inconsistent snapshot {} for vDisk {} on hour {}'.format(snapshot_id, vdisk_1.guid, h)) if h in consistent_hours: ts = (timestamp + (MINUTE * 30)) - VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, - metadata={'label': 'ss_c_{0}:30'.format(str(h)), - 'is_consistent': True, - 'timestamp': str(ts)}) + snapshot_id = VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, metadata={'label': 'ss_c_{0}:30'.format(str(h)), + 'is_consistent': True, + 'timestamp': str(ts)}) + self._print_message('- Created consistent snapshot {} for vDisk {} on hour {}'.format(snapshot_id, vdisk_1.guid, h)) ################## # HELPER METHODS # @@ -323,7 +319,7 @@ def _print_message(self, message): print message def _visualise_snapshots(self, vdisk, current_day, start_time): - # type: (VDisk, int, datetime.datetime) -> None + # type: (VDisk, int, datetime.date) -> None """ Visualize the snapshots of the VDisk :param vdisk: VDisk object @@ -416,10 +412,10 @@ def _validate(self, vdisk, current_day, start_time, sticky_hours, consistent_hou second=amount_sticky, msg='Wrong amount of sticky snapshots: {0} vs expected {1}'.format(len(sticky), amount_sticky)) if not sticky: - self.assertEqual(first=len(consistent), second=amount_consistent, - msg='Wrong amount of consistent snapshots: {0} vs expected {1}'.format(len(consistent), amount_consistent)) - self.assertEqual(first=len(inconsistent), second=amount_inconsistent, - msg='Wrong amount of inconsistent snapshots: {0} vs expected {1}'.format(len(inconsistent), amount_inconsistent)) + self.assertEqual(first=amount_consistent, second=len(consistent), + msg='Wrong amount of consistent snapshots') + self.assertEqual(first=amount_inconsistent, second=len(inconsistent), + msg='Wrong amount of inconsistent snapshots') # Check of the correctness of the snapshot timestamp if consistent_hours: From 6bcaa2cecf6b972024c36f6a18647f1b11455fe4 Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Tue, 26 Mar 2019 17:32:41 +0100 Subject: [PATCH 09/18] Snapshots with a clone won't be deleted anymore --- ovs/lib/helpers/generic/snapshots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ovs/lib/helpers/generic/snapshots.py b/ovs/lib/helpers/generic/snapshots.py index 8c886ce02..1e2ac9ca0 100644 --- a/ovs/lib/helpers/generic/snapshots.py +++ b/ovs/lib/helpers/generic/snapshots.py @@ -313,7 +313,7 @@ def delete_snapshots(self, timestamp): snapshot = Snapshot(vdisk_guid=vdisk.guid, **vdisk_snapshot) if snapshot.is_sticky: continue - if snapshot.vdisk_guid in parent_snapshots: + if snapshot.guid in parent_snapshots: _logger.info('Not deleting snapshot {0} because it has clones'.format(snapshot.vdisk_guid)) continue for bucket in bucket_chain: From ef50791f7c270e8583321721a333f9f8c7b44cfe Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Wed, 27 Mar 2019 17:33:10 +0100 Subject: [PATCH 10/18] Add retention policy configurability --- ovs/constants/vdisk.py | 2 + ovs/dal/hybrids/vdisk.py | 3 +- ovs/dal/hybrids/vpool.py | 3 +- ovs/lib/helpers/generic/snapshots.py | 29 +++++++++--- ovs/lib/tests/generic_tests/test_snapshot.py | 46 +++++++++++++++++++- 5 files changed, 73 insertions(+), 10 deletions(-) diff --git a/ovs/constants/vdisk.py b/ovs/constants/vdisk.py index 50495a8b4..5b26f04a7 100644 --- a/ovs/constants/vdisk.py +++ b/ovs/constants/vdisk.py @@ -17,6 +17,7 @@ """ VDisk Constants module. Contains constants related to vdisks """ +import os # General LOCK_NAMESPACE = 'ovs_locks' @@ -30,3 +31,4 @@ {'nr_of_snapshots': 7, 'nr_of_days': 7, 'consistency_first': True, 'consistency_first_on': [1]}, # One per week for the rest of the month {'nr_of_snapshots': 3, 'nr_of_days': 21}] +SNAPSHOT_POLICY_LOCATION = os.path.join(os.path.sep, 'ovs', 'cluster', 'snapshot_retention_policy') diff --git a/ovs/dal/hybrids/vdisk.py b/ovs/dal/hybrids/vdisk.py index 7c2a4b205..66a63603f 100644 --- a/ovs/dal/hybrids/vdisk.py +++ b/ovs/dal/hybrids/vdisk.py @@ -51,7 +51,8 @@ class VDisk(DataObject): Property('pagecache_ratio', float, default=1.0, doc='Ratio of the volume\'s metadata pages that needs to be cached'), Property('metadata', dict, default=dict(), doc='Contains fixed metadata about the volume (e.g. lba_size, ...)'), Property('cache_quota', dict, mandatory=False, doc='Maximum caching space(s) this volume can consume (in Bytes) per cache type. If not None, the caching(s) for this volume has been set manually'), - Property('scrubbing_information', dict, mandatory=False, doc='Scrubbing metadata set by scrubber with an expiration date')] + Property('scrubbing_information', dict, mandatory=False, doc='Scrubbing metadata set by scrubber with an expiration date'), + Property('snapshot_retention_policy', list, mandatory=False, doc='Snapshot retention policy configuration')] __relations = [Relation('vpool', VPool, 'vdisks'), Relation('parent_vdisk', None, 'child_vdisks', mandatory=False)] __dynamics = [Dynamic('dtl_status', str, 60), diff --git a/ovs/dal/hybrids/vpool.py b/ovs/dal/hybrids/vpool.py index d147f531f..85cb507b3 100644 --- a/ovs/dal/hybrids/vpool.py +++ b/ovs/dal/hybrids/vpool.py @@ -46,7 +46,8 @@ class VPool(DataObject): Property('metadata', dict, mandatory=False, doc='Metadata for the backends, as used by the Storage Drivers.'), Property('rdma_enabled', bool, default=False, doc='Has the vpool been configured to use RDMA for DTL transport, which is only possible if all storagerouters are RDMA capable'), Property('status', STATUSES.keys(), doc='Status of the vPool'), - Property('metadata_store_bits', int, mandatory=False, doc='StorageDrivers deployed for this vPool will make use of this amount of metadata store bits')] + Property('metadata_store_bits', int, mandatory=False, doc='StorageDrivers deployed for this vPool will make use of this amount of metadata store bits'), + Property('snapshot_retention_policy', list, mandatory=False, doc='Snapshot retention policy configuration')] __relations = [] __dynamics = [Dynamic('configuration', dict, 3600), Dynamic('statistics', dict, 4), diff --git a/ovs/lib/helpers/generic/snapshots.py b/ovs/lib/helpers/generic/snapshots.py index 1e2ac9ca0..2a58a1d33 100644 --- a/ovs/lib/helpers/generic/snapshots.py +++ b/ovs/lib/helpers/generic/snapshots.py @@ -16,11 +16,12 @@ import time from datetime import datetime, timedelta -from ovs.constants.vdisk import SNAPSHOT_POLICY_DEFAULT +from ovs.constants.vdisk import SNAPSHOT_POLICY_DEFAULT, SNAPSHOT_POLICY_LOCATION from ovs.dal.hybrids.vdisk import VDisk from ovs.dal.hybrids.vpool import VPool from ovs.dal.lists.vpoollist import VPoolList from ovs.dal.lists.vdisklist import VDiskList +from ovs.extensions.generic.configuration import Configuration from ovs.lib.vdisk import VDiskController from ovs.log.log_handler import LogHandler @@ -69,6 +70,19 @@ def from_configuration(cls, configuration): """ return [cls(**c) for c in configuration] + def __eq__(self, other): + # type: (RetentionPolicy) -> bool + """ + Equality operator + :param other: Other instance + :type other: RetentionPolicy + :return: True if equal else False + :rtype: bool + """ + if not isinstance(other, RetentionPolicy): + return NotImplemented('Comparing to other types is not implemented') + return vars(self) == vars(other) + class Snapshot(object): @@ -187,8 +201,7 @@ def get_retention_policy(): """ Retrieve the globally configured retention policy """ - # @todo retrieve the config path - return RetentionPolicy.from_configuration(SNAPSHOT_POLICY_DEFAULT) + return RetentionPolicy.from_configuration(Configuration.get(SNAPSHOT_POLICY_LOCATION, default=SNAPSHOT_POLICY_DEFAULT)) @classmethod def get_retention_policies_for_vpools(cls): @@ -202,7 +215,7 @@ def get_retention_policies_for_vpools(cls): for vpool in VPoolList.get_vpools(): policies_config = cls.get_retention_policy_vpool(vpool) if policies_config: - vpool_policies[vpool] = RetentionPolicy.from_configuration(policies_config) + vpool_policies[vpool] = policies_config return vpool_policies @staticmethod @@ -211,7 +224,9 @@ def get_retention_policy_vpool(vpool): """ Retrieve the retention policy for the VPool (if any) """ - # @todo Retrieve config key + snapshot_retention_policy = vpool.snapshot_retention_policy + if snapshot_retention_policy: + return RetentionPolicy.from_configuration(vpool.snapshot_retention_policy) return None @staticmethod @@ -220,7 +235,9 @@ def get_retention_policy_vdisk(vdisk): """ Retrieve the retention policy for the VDisk (if any) """ - # @todo retrieve config key + snapshot_retention_policy = vdisk.snapshot_retention_policy + if snapshot_retention_policy: + return RetentionPolicy.from_configuration(vdisk.snapshot_retention_policy) return None @staticmethod diff --git a/ovs/lib/tests/generic_tests/test_snapshot.py b/ovs/lib/tests/generic_tests/test_snapshot.py index e490b09f1..814110df1 100644 --- a/ovs/lib/tests/generic_tests/test_snapshot.py +++ b/ovs/lib/tests/generic_tests/test_snapshot.py @@ -20,10 +20,14 @@ import time import datetime import unittest +from ovs.constants.vdisk import SNAPSHOT_POLICY_LOCATION from ovs.dal.hybrids.vdisk import VDisk +from ovs.dal.lists.vpoollist import VPoolList from ovs.dal.tests.helpers import DalHelper +from ovs.extensions.generic.configuration import Configuration from ovs.lib.generic import GenericController from ovs.lib.vdisk import VDiskController +from ovs.lib.helpers.generic.snapshots import SnapshotManager, RetentionPolicy MINUTE = 60 HOUR = MINUTE * 60 @@ -150,7 +154,6 @@ def _create_validate_snapshots(self, vdisk, start_time, sticky_hours, consistent """ Create and validate snapshot creation and deletion sequence This is suitable to enforce the default policy which is: - > 1d | day 1 bucket | 24 | oldest of bucket | 24h < 1d | 1d bucket | 1 | best of bucket | 1d < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m @@ -364,7 +367,6 @@ def _validate(self, vdisk, current_day, start_time, sticky_hours, consistent_hou """ # Implemented policy: - # > 1d | day 1 bucket | 24 | oldest of bucket | 24h # < 1d | 1d bucket | 1 | best of bucket | 1d # < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w # < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m @@ -450,6 +452,46 @@ def _validate(self, vdisk, current_day, start_time, sticky_hours, consistent_hou self.assertIn(member=timestamp, container=container, msg='Expected weekly {0} snapshot for {1} at {2}'.format(sn_type, vdisk.name, self._from_timestamp(timestamp))) + def test_retention_policy_configurability(self): + """ + Test the different retention policy settings + :return: + """ + global_config = [{'nr_of_days': 30, 'nr_of_snapshots': 30}] + vpool_config = [{'nr_of_days': 7, 'nr_of_snapshots': 7}] + vdisk_config = [{'nr_of_days': 1, 'nr_of_snapshots': 1}] + + Configuration.set(SNAPSHOT_POLICY_LOCATION, global_config) + vdisk_1 = self._build_get_built_vdisk() + vpool_1 = VPoolList.get_vpools()[0] + + # Global configuration + snapshot_manager = SnapshotManager() + + policy_check = RetentionPolicy.from_configuration(global_config)[0] + policy = snapshot_manager.get_policy_to_enforce(vdisk_1)[0] + self.assertEqual(policy_check, policy) + + # VPool configuration + vpool_1.snapshot_retention_policy = vpool_config + vpool_1.save() + + snapshot_manager = SnapshotManager() + + policy_check = RetentionPolicy.from_configuration(vpool_config)[0] + policy = snapshot_manager.get_policy_to_enforce(vdisk_1)[0] + self.assertEqual(policy_check, policy) + + # VDisk Configuration + snapshot_manager = SnapshotManager() + + vdisk_1.snapshot_retention_policy = vdisk_config + vdisk_1.save() + + policy_check = RetentionPolicy.from_configuration(vdisk_config)[0] + policy = snapshot_manager.get_policy_to_enforce(vdisk_1)[0] + self.assertEqual(policy_check, policy) + @staticmethod def _make_timestamp(base, offset): return int(time.mktime((base + offset).timetuple())) From cb89e5d5bc9b4f5ca89e63e57e5649675dcb5f8f Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Wed, 3 Apr 2019 09:56:49 +0200 Subject: [PATCH 11/18] Think about more test scenarios --- ovs/lib/helpers/generic/snapshots.py | 11 ++-- ovs/lib/tests/generic_tests/test_snapshot.py | 57 +++++++++++++------- 2 files changed, 41 insertions(+), 27 deletions(-) diff --git a/ovs/lib/helpers/generic/snapshots.py b/ovs/lib/helpers/generic/snapshots.py index 2a58a1d33..f70b98808 100644 --- a/ovs/lib/helpers/generic/snapshots.py +++ b/ovs/lib/helpers/generic/snapshots.py @@ -26,6 +26,7 @@ from ovs.log.log_handler import LogHandler _logger = LogHandler.get('lib', name='generic tasks') +DAY = timedelta(1) class RetentionPolicy(object): @@ -134,7 +135,6 @@ def get_obsolete_snapshots(self, consistency_first=False, bucket_count=0): """ _ = consistency_first - obsolete_snapshots = None if self.end: snapshot_to_keep = None if self.retention_policy.consistency_first: @@ -265,23 +265,21 @@ def _get_snapshot_buckets(cls, start_time, policies): :type policies: RetentionPolicies :return: """ - day_delta = timedelta(1) # Convert to number of seconds in calculations buckets = [] processed_retention_days = 0 - offset = processed_retention_days * day_delta for policy in policies: # type: RetentionPolicy + offset = processed_retention_days * DAY number_of_days = policy.nr_of_days number_of_snapshots = policy.nr_of_snapshots - snapshot_timedelta = number_of_days * day_delta / number_of_snapshots + snapshot_timedelta = number_of_days * DAY / number_of_snapshots for i in xrange(0, number_of_snapshots): buckets.append(Bucket(start=cls.make_timestamp(start_time, offset + snapshot_timedelta * i), end=cls.make_timestamp(start_time, offset + snapshot_timedelta * (i + 1)), retention_policy=policy)) processed_retention_days += number_of_days - offset = processed_retention_days * day_delta # Always add a bucket which falls out of the configured retention - buckets.append(Bucket(start=cls.make_timestamp(start_time, processed_retention_days * day_delta), end=0)) + buckets.append(Bucket(start=cls.make_timestamp(start_time, processed_retention_days * DAY), end=0)) return buckets @staticmethod @@ -310,7 +308,6 @@ def delete_snapshots(self, timestamp): :return: Dict with vdisk guid as key, deleted snapshot ids as value :rtype: dict """ - # @todo think about backwards compatibility. The previous code would not account for the first day start_time = datetime.fromtimestamp(timestamp) # Get a list of all snapshots that are used as parents for clones diff --git a/ovs/lib/tests/generic_tests/test_snapshot.py b/ovs/lib/tests/generic_tests/test_snapshot.py index 814110df1..3e4285331 100644 --- a/ovs/lib/tests/generic_tests/test_snapshot.py +++ b/ovs/lib/tests/generic_tests/test_snapshot.py @@ -132,7 +132,7 @@ def test_clone_snapshot(self): self.assertIn(base_snapshot_guid, vdisk_1.snapshot_ids, 'Snapshot was deleted while there are still clones of it') @staticmethod - def _build_get_built_vdisk(): + def _build_vdisk(): # type: () -> VDisk """ Build the DAL structure and retrieve the vdisk @@ -209,7 +209,7 @@ def test_snapshot_automatic_consistent(self): """ is_automatic: True, is_consistent: True --> Automatically created consistent snapshots should be deleted """ - self._create_validate_snapshots(vdisk=self._build_get_built_vdisk(), + self._create_validate_snapshots(vdisk=self._build_vdisk(), start_time=datetime.datetime.now().date(), sticky_hours=[], consistent_hours=[2], @@ -221,7 +221,7 @@ def test_snapshot_automatic_not_consistent(self): """ is_automatic: True, is_consistent: False --> Automatically created non-consistent snapshots should be deleted """ - self._create_validate_snapshots(vdisk=self._build_get_built_vdisk(), + self._create_validate_snapshots(vdisk=self._build_vdisk(), start_time=datetime.datetime.now().date(), sticky_hours=[], consistent_hours=[], @@ -233,7 +233,7 @@ def test_snapshot_non_automatic_consistent(self): """ is_automatic: False, is_consistent: True --> Manually created consistent snapshots should be deleted """ - self._create_validate_snapshots(vdisk=self._build_get_built_vdisk(), + self._create_validate_snapshots(vdisk=self._build_vdisk(), start_time=datetime.datetime.now().date(), sticky_hours=[], consistent_hours=[2], @@ -245,7 +245,7 @@ def test_snapshot_not_automatic_not_consistent(self): """ is_automatic: False, is_consistent: False --> Manually created non-consistent snapshots should be deleted """ - self._create_validate_snapshots(vdisk=self._build_get_built_vdisk(), + self._create_validate_snapshots(vdisk=self._build_vdisk(), start_time=datetime.datetime.now().date(), sticky_hours=[], consistent_hours=[], @@ -257,7 +257,7 @@ def test_snapshot_sticky(self): """ is_sticky: True --> Sticky snapshots of any kind should never be deleted (Only possible to delete manually) """ - self._create_validate_snapshots(vdisk=self._build_get_built_vdisk(), + self._create_validate_snapshots(vdisk=self._build_vdisk(), start_time=datetime.datetime.now().date(), sticky_hours=[2], consistent_hours=[2], @@ -270,7 +270,7 @@ def test_happy_path(self): Validates the happy path; Hourly snapshots are taken with a few manual consistent every now and then. The delete policy is executed every day """ - vdisk_1 = self._build_get_built_vdisk() + vdisk_1 = self._build_vdisk() [dynamic for dynamic in vdisk_1._dynamics if dynamic.name == 'snapshots'][0].timeout = 0 # Run the testing scenario @@ -365,20 +365,16 @@ def _validate(self, vdisk, current_day, start_time, sticky_hours, consistent_hou :param inconsistent_hours: Hours that the inconsistent snapshots were made on :type inconsistent_hours: List[int] """ - # Implemented policy: # < 1d | 1d bucket | 1 | best of bucket | 1d # < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w # < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m # > 1m | delete - minute = 60 - hour = minute * 60 - self._print_message('\t- VDisk {0}'.format(vdisk.name)) # Visualisation - if self.debug is True: + if self.debug: self._visualise_snapshots(vdisk, current_day, start_time) sticky = [int(s['timestamp']) for s in vdisk.snapshots if s['is_sticky']] @@ -391,11 +387,12 @@ def _validate(self, vdisk, current_day, start_time, sticky_hours, consistent_hou amount_consistent = 0 amount_inconsistent = 0 processed_days = 0 - # First 24h period + # First 24h period which are skipped so all taken snapshots are kept if processed_days < current_day: amount_consistent += len(consistent_hours) amount_inconsistent += len(inconsistent_hours) processed_days += 1 + # One consistent snapshot per day while processed_days < current_day and processed_days <= 7: if len(consistent_hours) > 0: @@ -423,24 +420,24 @@ def _validate(self, vdisk, current_day, start_time, sticky_hours, consistent_hou if consistent_hours: sn_type = 'consistent' container = consistent - time_diff = (hour * consistent_hours[-1]) + (minute * 30) + time_diff = (HOUR * consistent_hours[-1]) + (MINUTE * 30) else: sn_type = 'inconsistent' container = inconsistent - time_diff = (hour * inconsistent_hours[-1]) + time_diff = (HOUR * inconsistent_hours[-1]) for day in xrange(0, current_day): for h in sticky_hours: - timestamp = self._make_timestamp(start_time, DAY * day) + (hour * h) + (minute * 30) + timestamp = self._make_timestamp(start_time, DAY * day) + (HOUR * h) + (MINUTE * 30) self.assertIn(member=timestamp, container=sticky, msg='Expected sticky snapshot for {0} at {1}'.format(vdisk.name, self._from_timestamp(timestamp))) if day == (current_day - 1): for h in inconsistent_hours: - timestamp = self._make_timestamp(start_time, DAY * day) + (hour * h) + timestamp = self._make_timestamp(start_time, DAY * day) + (HOUR * h) self.assertIn(member=timestamp, container=inconsistent, msg='Expected hourly inconsistent snapshot for {0} at {1}'.format(vdisk.name, self._from_timestamp(timestamp))) for h in consistent_hours: - timestamp = self._make_timestamp(start_time, DAY * day) + (hour * h) + (minute * 30) + timestamp = self._make_timestamp(start_time, DAY * day) + (HOUR * h) + (MINUTE * 30) self.assertIn(member=timestamp, container=consistent, msg='Expected random consistent snapshot for {0} at {1}'.format(vdisk.name, self._from_timestamp(timestamp))) elif day > (current_day - 7): @@ -452,7 +449,7 @@ def _validate(self, vdisk, current_day, start_time, sticky_hours, consistent_hou self.assertIn(member=timestamp, container=container, msg='Expected weekly {0} snapshot for {1} at {2}'.format(sn_type, vdisk.name, self._from_timestamp(timestamp))) - def test_retention_policy_configurability(self): + def test_retention_policy_configuration_levels(self): """ Test the different retention policy settings :return: @@ -462,7 +459,7 @@ def test_retention_policy_configurability(self): vdisk_config = [{'nr_of_days': 1, 'nr_of_snapshots': 1}] Configuration.set(SNAPSHOT_POLICY_LOCATION, global_config) - vdisk_1 = self._build_get_built_vdisk() + vdisk_1 = self._build_vdisk() vpool_1 = VPoolList.get_vpools()[0] # Global configuration @@ -492,6 +489,26 @@ def test_retention_policy_configurability(self): policy = snapshot_manager.get_policy_to_enforce(vdisk_1)[0] self.assertEqual(policy_check, policy) + def test_retention_policy_overlap(self): + """ + Test the application of the retention policy settings with overlapping timespans + """ + global_config = [{'nr_of_days': 1, 'nr_of_snapshots': 1}, + {'nr_of_days': 2, 'nr_of_snapshots': 1, 'consistent_first': True}] + # Day 1 will have 1 consistent and 1 inconsistent snapshot. Inconsistent is older + # Day 2 will have 2 inconsistent snapshots + # The goal is to have both buckets still retain their snapshots + # The bucket logic will distribute all snapshots to buckets that can fit them. Both buckets have the same start day + # Bucket 1 (1 day) will have the consistent and inconsistent one + # Bucket 2 (2 days) will have all the snapshots + # Bucket 1 will choose the oldest, discarding the consistent one when removing + # Bucket 2 will choose the consistent one above all else + # In the end, bucket 2 won't have a snapshot + Configuration.set(SNAPSHOT_POLICY_LOCATION, global_config) + vdisk_1 = self._build_vdisk() + + raise NotImplementedError() + @staticmethod def _make_timestamp(base, offset): return int(time.mktime((base + offset).timetuple())) From a6e1d179397cba68f5ff43fb14d803cbafbd23bd Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Wed, 3 Apr 2019 18:32:37 +0200 Subject: [PATCH 12/18] Test overlapping Assertion only triggers on non-unittest environment --- ovs/lib/generic.py | 4 +++- ovs/lib/helpers/generic/snapshots.py | 2 ++ ovs/lib/tests/generic_tests/test_snapshot.py | 21 +++++++++++++++++--- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/ovs/lib/generic.py b/ovs/lib/generic.py index b2827b6d0..859bf621c 100644 --- a/ovs/lib/generic.py +++ b/ovs/lib/generic.py @@ -90,7 +90,9 @@ def delete_snapshots(timestamp=None): :return: Dict with vdisk guid as key, deleted snapshot ids as value :rtype: dict """ - assert os.environ.get('RUNNING_UNITTESTS') == 'True' and timestamp is not None, 'Providing a timestamp is only possible during unittests' + if os.environ.get('RUNNING_UNITTESTS') == 'False': + assert timestamp is None, 'Providing a timestamp is only possible during unittests' + if timestamp is None: timestamp = time.time() - timedelta(1).total_seconds() diff --git a/ovs/lib/helpers/generic/snapshots.py b/ovs/lib/helpers/generic/snapshots.py index f70b98808..b9f3cc2f1 100644 --- a/ovs/lib/helpers/generic/snapshots.py +++ b/ovs/lib/helpers/generic/snapshots.py @@ -258,6 +258,8 @@ def _get_snapshot_buckets(cls, start_time, policies): # type: (datetime, List[RetentionPolicy]) -> List[Bucket] """ Retrieve the bucket distribution based on the policies + There is no overlapping period possible. + Eg [{'nr_of_days': 1, 'nr_of_snapshots': 1}, {'nr_of_days': 2, 'nr_of_snapshots': 1}] spans three days, not two There is always an additional bucket to keep track of older snapshots :param start_time: Datetime to start counting from :type start_time: datetime diff --git a/ovs/lib/tests/generic_tests/test_snapshot.py b/ovs/lib/tests/generic_tests/test_snapshot.py index 3e4285331..f58ef2e1c 100644 --- a/ovs/lib/tests/generic_tests/test_snapshot.py +++ b/ovs/lib/tests/generic_tests/test_snapshot.py @@ -494,7 +494,8 @@ def test_retention_policy_overlap(self): Test the application of the retention policy settings with overlapping timespans """ global_config = [{'nr_of_days': 1, 'nr_of_snapshots': 1}, - {'nr_of_days': 2, 'nr_of_snapshots': 1, 'consistent_first': True}] + {'nr_of_days': 2, 'nr_of_snapshots': 1, 'consistency_first': True}] + # The first theory (invalid one, but good to write down nonetheless: # Day 1 will have 1 consistent and 1 inconsistent snapshot. Inconsistent is older # Day 2 will have 2 inconsistent snapshots # The goal is to have both buckets still retain their snapshots @@ -504,10 +505,24 @@ def test_retention_policy_overlap(self): # Bucket 1 will choose the oldest, discarding the consistent one when removing # Bucket 2 will choose the consistent one above all else # In the end, bucket 2 won't have a snapshot + + # After reviewing the code: no overlap is possible as it increments the days that are processed. + # It doesn't review every period by itself, which would be a nightmare on it's own Configuration.set(SNAPSHOT_POLICY_LOCATION, global_config) vdisk_1 = self._build_vdisk() - - raise NotImplementedError() + start_time = datetime.datetime.now().date() + snapshots = [] + for day, snapshot_consistencies in {1: [True, False], + 2: [False, False]}.iteritems(): + day_timestamp = self._make_timestamp(start_time, day * DAY * -1) + for index, consistency in enumerate(snapshot_consistencies): + snapshot_timestamp = day_timestamp + (HOUR * index) + snapshots.append(VDiskController.create_snapshot(vdisk_guid=vdisk_1.guid, + metadata={'label': 'snapshot_{0}:30'.format(str(index)), + 'is_consistent': consistency, + 'timestamp': str(snapshot_timestamp)})) + GenericController.delete_snapshots() + self.assertEqual(2, len(vdisk_1._snapshot_ids())) @staticmethod def _make_timestamp(base, offset): From abb23be8ce6dfbcd461e67718145df8e91cbb695 Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Wed, 17 Apr 2019 18:11:12 +0200 Subject: [PATCH 13/18] Add more examples Add API to configure the retention policy --- docs/snapshots.md | 41 ++++++++++++++-- ovs/lib/helpers/generic/snapshots.py | 53 +++++++++++++++++++-- webapps/api/backend/views/storagerouters.py | 40 ++++++++++++++++ webapps/api/backend/views/vdisks.py | 24 +++++++++- webapps/api/backend/views/vpools.py | 22 +++++++++ 5 files changed, 171 insertions(+), 9 deletions(-) diff --git a/docs/snapshots.md b/docs/snapshots.md index 6581daf3c..7e93ba12f 100644 --- a/docs/snapshots.md +++ b/docs/snapshots.md @@ -26,6 +26,7 @@ It can be customized on: The notation of the policy is a list containing policies. A policies consists minimally of `nr_of_snapshots`, which is the the number of snapshots to have over the given `nr_of_days`, and `nr_of_days` which is the number of days to span the `nr_of_snapshots` over. This notation allows for some fine grained control while also being easy to configure. +Since we are working with days, *monthly and weekly policies will not follow the calendar days!* There are two additional options available: `consistency_first` which indicates that: @@ -36,6 +37,7 @@ If a policy interval spans multiple days, the `consistency_first_on` can be conf to apply the `consistency_first` rules This options takes in a list of day numbers. + If we were to write out the default retention policy, it would look like: ``` [# one per day for the week and opt for a consistent snapshot for the first day @@ -45,16 +47,45 @@ If we were to write out the default retention policy, it would look like: ``` Configuring it on different levels can be done using the API: -- Global level: <> -- vPool level: <> -- vDisk level: <> +- Global level: POST to: `'/storagerouters//global_snapshot_retention_policy'` +- vPool level: POST to: `/vpools//snapshot_retention_policy` +- vDisk level: POST to: `/vdisks//snapshot_retention_policy` ##### Examples: -# @todo more examples -The examples assume that a snapshot was taken each hour. +The examples simplify a week as 7 days and months as 4 * 7 days. I wish to keep hourly snapshots from the first week ``` [{'nr_of_days': 7, # A week spans 7 days 'nr_of_snapshots': 168}] # Keep 24 snapshot for every day for 7 days: 7 * 24 ``` +I wish to keep hourly snapshots from the first week and one for every week for the whole year +``` +[ # First policy + {'nr_of_days': 7, # A week spans 7 days + 'nr_of_snapshots': 7 * 24}, # Keep 24 snapshot for every day for 7 days: 7 * 24 + # Second policy + {'nr_of_days': 7 * (52 - 1), # The first week is already covered by the previous policy, so 52 - 1 weeks remaining + 'nr_of_snapshots': 1 * (52 - 1)} +] +``` + +A production use case could be: +``` +[ # First policy - keep the first 24 snapshots + {'nr_of_days': 1, + 'nr_of_snapshots': 24 }, + # Second policy - Keep 4 snapshots a day for the remaining week (6 leftover days) + {'nr_of_days': 6, + 'nr_of_snapshots': 4 * 6}, + # Third policy - keep 1 snapshot per day for the 3 weeks to come + {'nr_of_days': 3 * 7, + 'nr_of_snapshots': 3 * 7] + # Fourth policy - keep 1 snapshot per week for the next 5 months + {'nr_of_days': 4 * 7 * 5, # Use the week notation to avoid issues (4 * 7 days = month) + 'nr_of_snapshots': 5 * 7 + # Fift policy - first 6 months are configured by now - Keep a snapshot every 6 month until 2 years have passed + {'nr_of_days': (4 * 7) * (6 * 3), + 'nr_of_snapshots': 3} + ] + ``` \ No newline at end of file diff --git a/ovs/lib/helpers/generic/snapshots.py b/ovs/lib/helpers/generic/snapshots.py index b9f3cc2f1..60e6a48dd 100644 --- a/ovs/lib/helpers/generic/snapshots.py +++ b/ovs/lib/helpers/generic/snapshots.py @@ -64,7 +64,6 @@ def from_configuration(cls, configuration): one snapshot per hour the first day one snapshot per day the rest of the week one snapshot per week the rest of the month - one older snapshot snapshot will always be stored for an interval older then the longest interval passed in the config :param configuration: Configuration to use :type configuration: List[Dict[str, int]] :return: List[RetentionPolicy] @@ -88,8 +87,21 @@ def __eq__(self, other): class Snapshot(object): def __init__(self, guid, timestamp, label, is_consistent, is_automatic, is_sticky, in_backend, stored, vdisk_guid, *args, **kwargs): + # type: (str, int, str, bool, bool, bool, bool, int, str, *any, **any) -> None """ Initialize a snapshot object + :param guid: ID of the snapshot + :type guid: str + :param timestamp: Timestamp of the snapshot + :type timestamp: int + :param label: Snapshot label + :type label: str + :param is_consistent: Indicator that the snapshot is consistent + :type is_consistent: bool + :param is_automatic: Indicator that the snapshot is created automatically + :type is_automatic: bool + :param is_sticky: Indicator that the snapshot is a sticky one + :type """ self.guid = guid self.timestamp = int(timestamp) @@ -102,13 +114,29 @@ def __init__(self, guid, timestamp, label, is_consistent, is_automatic, is_stick self.vdisk_guid = vdisk_guid def __str__(self): + """ + String representation + """ prop_strings = ['{}: {}'.format(prop, val) for prop, val in vars(self).iteritems()] prop_strings.append('humanized timestamp: {}'.format(datetime.fromtimestamp(self.timestamp).strftime('%Y-%m-%d %H:%M'))) return 'Snapshot for vDisk {0} ({1})'.format(self.vdisk_guid, ', '.join(prop_strings)) class Bucket(object): + """ + Represents a bucket that holds items within a time frame + """ def __init__(self, start, end, retention_policy=None): + # type: (int, int, RetentionPolicy) -> None + """ + Initialize a bucket + :param start: Start timestamp + :type start: int + :param end: End timestamp. 0 indicates that it has no end + :type end: int + :param retention_policy: Optional: associated retention policy. Used to determine the obsolete snapshots within the bucket + :type retention_policy: RetentionPolicy + """ self.start = start self.end = end self.snapshots = [] @@ -116,11 +144,27 @@ def __init__(self, start, end, retention_policy=None): def is_snapshot_in_interval(self, snapshot): # type: (Snapshot) -> bool + """ + Determine if a snapshot fits within the current time interval + :param snapshot: Snapshot to check + :type snapshot: Snapshot + :return: True if the snapshot fits else False + :rtype: bool + """ return self.start >= snapshot.timestamp > self.end def try_add_snapshot(self, snapshot): + # type: (Snapshot) -> bool + """ + Try to add the snapshot to the bucket + :param snapshot: Snapshot to try + :return: True if the snapshot could be added else False + :rtype: bool + """ if self.is_snapshot_in_interval(snapshot): self.snapshots.append(snapshot) + return True + return False def get_obsolete_snapshots(self, consistency_first=False, bucket_count=0): # type: (bool, int) -> List[Snapshot] @@ -162,7 +206,7 @@ def get_obsolete_snapshots(self, consistency_first=False, bucket_count=0): oldest = snapshot snapshot_to_keep = oldest _logger.debug('Elected {} as the snapshot to keep within {}.'.format(snapshot_to_keep, self)) - obsolete_snapshots = [s for s in self.snapshots if s.timestamp != snapshot_to_keep.timestamp] + obsolete_snapshots = [s for s in self.snapshots if s != snapshot_to_keep] else: # No end date for the interval, every snapshot is obsolete obsolete_snapshots = self.snapshots @@ -170,6 +214,9 @@ def get_obsolete_snapshots(self, consistency_first=False, bucket_count=0): return obsolete_snapshots def __str__(self): + """ + Stringified representation + """ humanized_start = datetime.fromtimestamp(self.start).strftime('%Y-%m-%d %H:%M') humanized_end = datetime.fromtimestamp(self.end).strftime('%Y-%m-%d %H:%M') if self.end else self.end return 'Bucket (start: {0}, end: {1}) with [{2}]'.format(humanized_start, humanized_end, ','.join(str(s) for s in self.snapshots)) @@ -263,7 +310,7 @@ def _get_snapshot_buckets(cls, start_time, policies): There is always an additional bucket to keep track of older snapshots :param start_time: Datetime to start counting from :type start_time: datetime - :param policies + :param policies: Retention policies to enforce :type policies: RetentionPolicies :return: """ diff --git a/webapps/api/backend/views/storagerouters.py b/webapps/api/backend/views/storagerouters.py index 00be16e4d..049e0a5c6 100644 --- a/webapps/api/backend/views/storagerouters.py +++ b/webapps/api/backend/views/storagerouters.py @@ -25,11 +25,13 @@ from api.backend.decorators import required_roles, return_list, return_object, return_task, return_simple, load, log from api.backend.exceptions import HttpNotAcceptableException from api.backend.serializers.serializers import FullSerializer +from ovs.constants.vdisk import SNAPSHOT_POLICY_LOCATION, SNAPSHOT_POLICY_DEFAULT from ovs.dal.datalist import DataList from ovs.dal.hybrids.domain import Domain from ovs.dal.hybrids.storagerouter import StorageRouter from ovs.dal.hybrids.j_storagerouterdomain import StorageRouterDomain from ovs.dal.lists.storagerouterlist import StorageRouterList +from ovs.extensions.generic.configuration import Configuration from ovs.extensions.storage.volatilefactory import VolatileFactory from ovs.lib.disk import DiskController from ovs.lib.mdsservice import MDSServiceController @@ -38,6 +40,7 @@ from ovs.lib.storagerouter import StorageRouterController from ovs.lib.update import UpdateController from ovs.lib.vdisk import VDiskController +from ovs.lib.helpers.generic.snapshots import RetentionPolicy class StorageRouterViewSet(viewsets.ViewSet): @@ -651,3 +654,40 @@ def get_update_information(self): - prerequisites that have not been met """ return UpdateController.get_update_information_all.delay() + + @action() + @log() + @required_roles(['read', 'write', 'manage']) + @return_simple() + @load(StorageRouter) + def snapshot_retention_policy(self, storagerouter, policy): + """ + Set the snapshot retention policy on the vDisk level + :param storagerouter: The given Storagerouter. Not used but necessary because of the lack of generic routes + :type storagerouter: StorageRouter + :param policy: Retention policy to set + :type policy: List[Dict[str, int]] + :return: None + """ + _ = storagerouter + try: + RetentionPolicy.from_configuration(policy) + except: + raise ValueError('Policy is not properly formatted') + Configuration.set(SNAPSHOT_POLICY_LOCATION, policy) + + @link() + @log() + @required_roles(['read', 'manage']) + @return_simple() + @load(StorageRouter) + def global_snapshot_retention_policy(self, storagerouter): + """ + Get the snapshot retention policy on the vDisk level + :param storagerouter: The given Storagerouter. Not used but necessary because of the lack of generic routes + :type storagerouter: StorageRouter + :return: The snapshot policy + :rtype: List[Dict[str, int]] + """ + _ = storagerouter + return Configuration.get(SNAPSHOT_POLICY_LOCATION, default=SNAPSHOT_POLICY_DEFAULT) diff --git a/webapps/api/backend/views/vdisks.py b/webapps/api/backend/views/vdisks.py index 68ac512a7..4c25060f5 100644 --- a/webapps/api/backend/views/vdisks.py +++ b/webapps/api/backend/views/vdisks.py @@ -21,7 +21,7 @@ from rest_framework import viewsets from rest_framework.decorators import action, link from rest_framework.permissions import IsAuthenticated -from api.backend.decorators import load, log, required_roles, return_list, return_object, return_task +from api.backend.decorators import load, log, required_roles, return_list, return_object, return_task, return_simple from api.backend.exceptions import HttpNotAcceptableException from ovs.dal.datalist import DataList from ovs.dal.hybrids.diskpartition import DiskPartition @@ -32,6 +32,7 @@ from ovs.dal.lists.vdisklist import VDiskList from ovs.lib.generic import GenericController from ovs.lib.vdisk import VDiskController +from ovs.lib.helpers.generic.snapshots import RetentionPolicy class VDiskViewSet(viewsets.ViewSet): @@ -519,3 +520,24 @@ def scrub(self, vdisk, storagerouter_guid=None): :rtype: celery.result.AsyncResult """ return GenericController.execute_scrub.delay(vdisk_guids=[vdisk.guid], storagerouter_guid=storagerouter_guid, manual=True) + + @action() + @log() + @required_roles(['read', 'write', 'manage']) + @return_simple() + @load(VPool) + def snapshot_retention_policy(self, vdisk, policy): + """ + Set the snapshot retention policy on the vDisk level + :param vdisk: the vDisk to scrub + :type vdisk: VDisk + :param policy: Retention policy to set + :type policy: List[Dict[str, int]] + :return: None + """ + try: + RetentionPolicy.from_configuration(policy) + except: + raise ValueError('Policy is not properly formatted') + vdisk.snapshot_retention_policy = policy + vdisk.save() diff --git a/webapps/api/backend/views/vpools.py b/webapps/api/backend/views/vpools.py index 84ec527aa..922cd43b9 100644 --- a/webapps/api/backend/views/vpools.py +++ b/webapps/api/backend/views/vpools.py @@ -31,6 +31,7 @@ from ovs.lib.generic import GenericController from ovs.lib.storagerouter import StorageRouterController from ovs.lib.vdisk import VDiskController +from ovs.lib.helpers.generic.snapshots import RetentionPolicy class VPoolViewSet(viewsets.ViewSet): @@ -228,3 +229,24 @@ def scrub_all_vdisks(self, vpool): :rtype: celery.result.AsyncResult """ return GenericController.execute_scrub.delay(vpool_guids=[vpool.guid], manual=True) + + @action() + @log() + @required_roles(['read', 'write', 'manage']) + @return_simple() + @load(VPool) + def snapshot_retention_policy(self, vpool, policy): + """ + Set the snapshot retention policy on the vpool level + :param vpool: The GUID of the vPool for which all vDisks need to be scrubbed + :type vpool: ovs.dal.hybrids.vpool.VPool + :param policy: Retention policy to set + :type policy: List[Dict[str, int]] + :return: None + """ + try: + RetentionPolicy.from_configuration(policy) + except: + raise ValueError('Policy is not properly formatted') + vpool.snapshot_retention_policy = policy + vpool.save() From 9a73aaa8842aed2b16bd600a900bcd64ef570ea6 Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Wed, 17 Apr 2019 18:26:26 +0200 Subject: [PATCH 14/18] Add rtype for unittesting --- webapps/api/backend/views/storagerouters.py | 1 + webapps/api/backend/views/vdisks.py | 1 + webapps/api/backend/views/vpools.py | 1 + 3 files changed, 3 insertions(+) diff --git a/webapps/api/backend/views/storagerouters.py b/webapps/api/backend/views/storagerouters.py index 049e0a5c6..bc5eb292e 100644 --- a/webapps/api/backend/views/storagerouters.py +++ b/webapps/api/backend/views/storagerouters.py @@ -668,6 +668,7 @@ def snapshot_retention_policy(self, storagerouter, policy): :param policy: Retention policy to set :type policy: List[Dict[str, int]] :return: None + :rtype: None """ _ = storagerouter try: diff --git a/webapps/api/backend/views/vdisks.py b/webapps/api/backend/views/vdisks.py index 4c25060f5..387d72914 100644 --- a/webapps/api/backend/views/vdisks.py +++ b/webapps/api/backend/views/vdisks.py @@ -534,6 +534,7 @@ def snapshot_retention_policy(self, vdisk, policy): :param policy: Retention policy to set :type policy: List[Dict[str, int]] :return: None + :rtype: None """ try: RetentionPolicy.from_configuration(policy) diff --git a/webapps/api/backend/views/vpools.py b/webapps/api/backend/views/vpools.py index 922cd43b9..e94db124f 100644 --- a/webapps/api/backend/views/vpools.py +++ b/webapps/api/backend/views/vpools.py @@ -243,6 +243,7 @@ def snapshot_retention_policy(self, vpool, policy): :param policy: Retention policy to set :type policy: List[Dict[str, int]] :return: None + :rtype: None """ try: RetentionPolicy.from_configuration(policy) From b36c6c22575fb6c5f81685ec904b4f3b14b3164c Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Tue, 23 Apr 2019 09:24:14 +0200 Subject: [PATCH 15/18] Setup validation --- ovs/lib/helpers/generic/snapshots.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ovs/lib/helpers/generic/snapshots.py b/ovs/lib/helpers/generic/snapshots.py index 60e6a48dd..d9f947b88 100644 --- a/ovs/lib/helpers/generic/snapshots.py +++ b/ovs/lib/helpers/generic/snapshots.py @@ -21,6 +21,7 @@ from ovs.dal.hybrids.vpool import VPool from ovs.dal.lists.vpoollist import VPoolList from ovs.dal.lists.vdisklist import VDiskList +from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs.extensions.generic.configuration import Configuration from ovs.lib.vdisk import VDiskController from ovs.log.log_handler import LogHandler @@ -30,6 +31,12 @@ class RetentionPolicy(object): + + PARAMS = {'nr_of_snapshots': (int, {'min': 1}, True), + 'nr_of_days': (int, {'min': 1}, True), + 'consistency_first': (bool, None, True), + 'consistency_first_on': (list, int, True)} + def __init__(self, nr_of_snapshots, nr_of_days, consistency_first=False, consistency_first_on=None): # type: (int, int, bool, List[int]) -> None """ @@ -46,6 +53,7 @@ def __init__(self, nr_of_snapshots, nr_of_days, consistency_first=False, consist if consistency_first_on is None: consistency_first_on = [] + ExtensionsToolbox.verify_required_params(actual_params=locals(), required_params=self.PARAMS) self.nr_of_snapshots = nr_of_snapshots self.nr_of_days = nr_of_days self.consistency_first = consistency_first From 82f5c3f63c60eb7c283c9a43328c5baeb7181ef6 Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Fri, 3 May 2019 16:38:26 +0200 Subject: [PATCH 16/18] Merge to latest 2.9 with snapshot delete exception handling --- ovs/lib/helpers/generic/snapshots.py | 35 ++++++++++++++------ ovs/lib/tests/generic_tests/test_snapshot.py | 1 + webapps/api/backend/decorators.py | 4 +-- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/ovs/lib/helpers/generic/snapshots.py b/ovs/lib/helpers/generic/snapshots.py index d9f947b88..754f15b37 100644 --- a/ovs/lib/helpers/generic/snapshots.py +++ b/ovs/lib/helpers/generic/snapshots.py @@ -16,7 +16,7 @@ import time from datetime import datetime, timedelta -from ovs.constants.vdisk import SNAPSHOT_POLICY_DEFAULT, SNAPSHOT_POLICY_LOCATION +from ovs.constants.vdisk import SNAPSHOT_POLICY_DEFAULT, SNAPSHOT_POLICY_LOCATION, SCRUB_VDISK_EXCEPTION_MESSAGE from ovs.dal.hybrids.vdisk import VDisk from ovs.dal.hybrids.vpool import VPool from ovs.dal.lists.vpoollist import VPoolList @@ -364,6 +364,7 @@ def delete_snapshots(self, timestamp): :type timestamp: float :return: Dict with vdisk guid as key, deleted snapshot ids as value :rtype: dict + :raises: RuntimeError if any exception occurred during the snapshot deletion """ start_time = datetime.fromtimestamp(timestamp) @@ -389,19 +390,33 @@ def delete_snapshots(self, timestamp): continue for bucket in bucket_chain: bucket.try_add_snapshot(snapshot) - bucket_chains.append(bucket_chain) + bucket_chains.append((vdisk, bucket_chain)) # Delete obsolete snapshots + exceptions = [] removed_snapshot_map = {} - for index, bucket_chain in enumerate(bucket_chains): + for index, vdisk_bucket_chain in enumerate(bucket_chains): + vdisk, bucket_chain = vdisk_bucket_chain # @todo this consistency first behaviour changed with the new implementation # There are now buckets based on hourly intervals which means the consistency of the first day is not guaranteed (unless the config is specified that way) # consistency_first = index == 0 - for bucket in bucket_chain: - obsolete_snapshots = bucket.get_obsolete_snapshots(False, index) - for snapshot in obsolete_snapshots: - deleted_snapshots = removed_snapshot_map.get(snapshot.vdisk_guid, []) - VDiskController.delete_snapshot(vdisk_guid=snapshot.vdisk_guid, snapshot_id=snapshot.guid) - deleted_snapshots.append(snapshot.guid) - removed_snapshot_map[snapshot.vdisk_guid] = deleted_snapshots + try: + for bucket in bucket_chain: + obsolete_snapshots = bucket.get_obsolete_snapshots(False, index) + for snapshot in obsolete_snapshots: + deleted_snapshots = removed_snapshot_map.get(snapshot.vdisk_guid, []) + VDiskController.delete_snapshot(vdisk_guid=snapshot.vdisk_guid, snapshot_id=snapshot.guid) + deleted_snapshots.append(snapshot.guid) + removed_snapshot_map[snapshot.vdisk_guid] = deleted_snapshots + except RuntimeError as ex: + vdisk_log = ' for VDisk with guid {}'.format(vdisk.guid) + if SCRUB_VDISK_EXCEPTION_MESSAGE in ex.message: + _logger.warning('Being scrubbed exception occurred while deleting snapshots{}'.format(vdisk_log)) + else: + _logger.exception('Exception occurred while deleting snapshots{}'.format(vdisk_log)) + exceptions.append(ex) + + if exceptions: + raise RuntimeError('Exceptions occurred while deleting snapshots: \n- {}'.format('\n- '.join((str(ex) for ex in exceptions)))) + return removed_snapshot_map diff --git a/ovs/lib/tests/generic_tests/test_snapshot.py b/ovs/lib/tests/generic_tests/test_snapshot.py index 7fc101917..78f2ddcc4 100644 --- a/ovs/lib/tests/generic_tests/test_snapshot.py +++ b/ovs/lib/tests/generic_tests/test_snapshot.py @@ -375,6 +375,7 @@ def raise_an_exception(*args, **kwargs): StorageRouterClient.delete_snapshot_callbacks[vdisk_1.volume_id] = {snapshot_id: raise_an_exception} GenericController.delete_snapshots() + self.assertEqual(2, len(vdisk_1.snapshot_ids), "No snapshots should be removed for vdisk 1") ################## # HELPER METHODS # diff --git a/webapps/api/backend/decorators.py b/webapps/api/backend/decorators.py index bfc44807d..a7fb2dfd1 100644 --- a/webapps/api/backend/decorators.py +++ b/webapps/api/backend/decorators.py @@ -136,7 +136,7 @@ def validate_get_version(request): :type Request: Union[WSGIRequest, Request] :return: The parsed and non parsed request :rtype: Tuple[int, str] - :exception: HttpNotAcceptableException when the version is not within the supported versions of the api + :raises: HttpNotAcceptableException when the version is not within the supported versions of the api """ version_match = regex.match(request.META['HTTP_ACCEPT']) if version_match is not None: @@ -257,7 +257,7 @@ def load_dataobject_instance(passed_kwargs): :type passed_kwargs: Dict[str, any] :return: The loaded instance (if any) :rtype: Union[DataObject, None] - :exception HttpNotFoundException if the requested object could not be found + :raises HttpNotFoundException if the requested object could not be found """ instance = None if 'pk' in passed_kwargs and object_type is not None: From 65c4c9b2642c4bd4009b022ac49ac3d2d805f582 Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Fri, 3 May 2019 16:49:13 +0200 Subject: [PATCH 17/18] Correct small format issue --- ovs/lib/helpers/mds/safety.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ovs/lib/helpers/mds/safety.py b/ovs/lib/helpers/mds/safety.py index 31263cd95..5190aa648 100644 --- a/ovs/lib/helpers/mds/safety.py +++ b/ovs/lib/helpers/mds/safety.py @@ -96,7 +96,7 @@ def validate_vdisk(self): self.metadata_backend_config_start = self.vdisk.info['metadata_backend_config'] if self.vdisk.info['metadata_backend_config'] == {}: - raise RuntimeError('Configured MDS layout for vDisk {0} could not be retrieved}, cannot update MDS configuration'.format(self.vdisk.guid)) + raise RuntimeError('Configured MDS layout for vDisk {0} could not be retrieved, cannot update MDS configuration'.format(self.vdisk.guid)) def map_mds_services_by_socket(self): """ From 6624925449cf8b01d6df1bad4f6aae30881f5f89 Mon Sep 17 00:00:00 2001 From: Jeffrey Devloo Date: Thu, 23 May 2019 14:51:09 +0200 Subject: [PATCH 18/18] Merge 2.9 and add the concurrency change back into the refactored snapshot manager --- ovs/lib/generic.py | 36 +++++++++- ovs/lib/helpers/generic/snapshots.py | 73 ++++++++++++++------ ovs/lib/tests/generic_tests/test_snapshot.py | 62 ++++++++++------- 3 files changed, 120 insertions(+), 51 deletions(-) diff --git a/ovs/lib/generic.py b/ovs/lib/generic.py index 1b21cfa75..1070e2b98 100644 --- a/ovs/lib/generic.py +++ b/ovs/lib/generic.py @@ -20,11 +20,15 @@ import os import copy import time +from celery import group +from celery.utils import uuid +from celery.result import GroupResult from datetime import timedelta from threading import Thread -from ovs.constants.vdisk import SCRUB_VDISK_EXCEPTION_MESSAGE from ovs.dal.hybrids.servicetype import ServiceType +from ovs.dal.hybrids.storagedriver import StorageDriver from ovs.dal.lists.servicelist import ServiceList +from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vdisklist import VDiskList from ovs.extensions.db.arakooninstaller import ArakoonClusterConfig @@ -74,7 +78,28 @@ def snapshot_all_vdisks(): @staticmethod @ovs_task(name='ovs.generic.delete_snapshots', schedule=Schedule(minute='1', hour='2'), ensure_single_info={'mode': 'DEFAULT'}) def delete_snapshots(timestamp=None): - # type: (float) -> Dict[str, List[str]] + # type: (float) -> GroupResult + """ + Delete snapshots based on the retention policy + Offloads concurrency to celery + Returns a GroupResult. Waiting for the result can be done using result.get() + :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used + :type timestamp: float + :return: The GroupResult + :rtype: GroupResult + """ + if os.environ.get('RUNNING_UNITTESTS') == 'False': + assert timestamp is None, 'Providing a timestamp is only possible during unittests' + + # The result cannot be fetched in this task + group_id = uuid() + return group(GenericController.delete_snapshots_storagedriver.s(storagedriver.guid, timestamp, group_id) + for storagedriver in StorageDriverList.get_storagedrivers()).apply_async(task_id=group_id) + + @staticmethod + @ovs_task(name='ovs.generic.delete_snapshots_storagedriver', ensure_single_info={'mode': 'DEDUPED', 'ignore_arguments': ['timestamp', 'group_id']}) + def delete_snapshots_storagedriver(storagedriver_guid, timestamp=None, group_id=None): + # type: (str, float, str) -> Dict[str, List[str]] """ Delete snapshots & scrubbing policy @@ -84,10 +109,14 @@ def delete_snapshots(timestamp=None): < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m > 1m | delete The configured policy can differ from this one. + :param storagedriver_guid: Guid of the StorageDriver to remove snapshots on + :type storagedriver_guid: str :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, the current timestamp - 1 day is used. Used in unittesting only! The scheduled task will not remove snapshots of the current day this way! :type timestamp: float + :param group_id: ID of the group task. Used to identify which snapshot deletes were called during the scheduled task + :type group_id: str :return: Dict with vdisk guid as key, deleted snapshot ids as value :rtype: dict """ @@ -98,7 +127,8 @@ def delete_snapshots(timestamp=None): timestamp = time.time() - timedelta(1).total_seconds() GenericController._logger.info('Delete snapshots started') - snapshot_manager = SnapshotManager() + storagedriver = StorageDriver(storagedriver_guid) + snapshot_manager = SnapshotManager(storagedriver, group_id) return snapshot_manager.delete_snapshots(timestamp) @staticmethod diff --git a/ovs/lib/helpers/generic/snapshots.py b/ovs/lib/helpers/generic/snapshots.py index 754f15b37..75adca5a7 100644 --- a/ovs/lib/helpers/generic/snapshots.py +++ b/ovs/lib/helpers/generic/snapshots.py @@ -17,6 +17,7 @@ import time from datetime import datetime, timedelta from ovs.constants.vdisk import SNAPSHOT_POLICY_DEFAULT, SNAPSHOT_POLICY_LOCATION, SCRUB_VDISK_EXCEPTION_MESSAGE +from ovs.dal.hybrids.storagedriver import StorageDriver from ovs.dal.hybrids.vdisk import VDisk from ovs.dal.hybrids.vpool import VPool from ovs.dal.lists.vpoollist import VPoolList @@ -232,12 +233,23 @@ def __str__(self): class SnapshotManager(object): """ - Manages snapshots of all vdisks + Manages snapshots of all vdisks on the storagedriver """ - def __init__(self): + def __init__(self, storagedriver, group_id=None): + # type: (StorageDriver, str) -> None + """ + :param storagedriver: StorageDriver to remove snapshots on + :type storagedriver: StorageDriver + :param group_id: ID of the group task. Used to identify which snapshot deletes were called during the scheduled task + :type group_id: str + """ + + self.storagedriver = storagedriver + self.group_id = group_id + self.log_id = 'Group job {} - '.format(group_id) if group_id else '' self.global_policy = self.get_retention_policy() - self.vpool_policies = self.get_retention_policies_for_vpools() + self.vpool_policy = self.get_retention_policy_vpool(storagedriver.vpool) def get_policy_to_enforce(self, vdisk): # type: (VDisk) -> List[RetentionPolicy] @@ -248,7 +260,7 @@ def get_policy_to_enforce(self, vdisk): :return: Policy to enforce :rtype: List[RetentionPolicy] """ - return self.get_retention_policy_vdisk(vdisk) or self.vpool_policies.get(vdisk.vpool) or self.global_policy + return self.get_retention_policy_vdisk(vdisk) or self.vpool_policy or self.global_policy @staticmethod def get_retention_policy(): @@ -349,6 +361,17 @@ def is_vdisk_running(vdisk): """ return vdisk.info['object_type'] in ['BASE'] + def group_format_log(self, message): + # type: (str) -> str + """ + Adds group information to the log message + :param message: Message to log + :type message: str + :return: The formatted message + :rtype: str + """ + return '{}{}'.format(self.log_id, message) + def delete_snapshots(self, timestamp): # type: (float) -> Dict[str, List[str]] """ @@ -373,27 +396,31 @@ def delete_snapshots(self, timestamp): # Distribute all snapshots into buckets. These buckets specify an interval and are ordered young to old bucket_chains = [] - for vdisk in VDiskList.get_vdisks(): - if not self.is_vdisk_running(vdisk): - continue - vdisk.invalidate_dynamics('being_scrubbed') - if vdisk.being_scrubbed: - continue - - bucket_chain = self._get_snapshot_buckets(start_time, self.get_policy_to_enforce(vdisk)) - for vdisk_snapshot in vdisk.snapshots: - snapshot = Snapshot(vdisk_guid=vdisk.guid, **vdisk_snapshot) - if snapshot.is_sticky: + exceptions = [] + for vdisk_guid in self.storagedriver.vdisks_guids: + try: + vdisk = VDisk(vdisk_guid) + if not self.is_vdisk_running(vdisk): continue - if snapshot.guid in parent_snapshots: - _logger.info('Not deleting snapshot {0} because it has clones'.format(snapshot.vdisk_guid)) + vdisk.invalidate_dynamics('being_scrubbed') + if vdisk.being_scrubbed: continue - for bucket in bucket_chain: - bucket.try_add_snapshot(snapshot) - bucket_chains.append((vdisk, bucket_chain)) + + bucket_chain = self._get_snapshot_buckets(start_time, self.get_policy_to_enforce(vdisk)) + for vdisk_snapshot in vdisk.snapshots: + snapshot = Snapshot(vdisk_guid=vdisk.guid, **vdisk_snapshot) + if snapshot.is_sticky: + continue + if snapshot.guid in parent_snapshots: + _logger.info(self.group_format_log('Not deleting snapshot {0} because it has clones'.format(snapshot.vdisk_guid))) + continue + for bucket in bucket_chain: + bucket.try_add_snapshot(snapshot) + bucket_chains.append((vdisk, bucket_chain)) + except Exception as ex: + exceptions.append(ex) # Delete obsolete snapshots - exceptions = [] removed_snapshot_map = {} for index, vdisk_bucket_chain in enumerate(bucket_chains): vdisk, bucket_chain = vdisk_bucket_chain @@ -411,9 +438,9 @@ def delete_snapshots(self, timestamp): except RuntimeError as ex: vdisk_log = ' for VDisk with guid {}'.format(vdisk.guid) if SCRUB_VDISK_EXCEPTION_MESSAGE in ex.message: - _logger.warning('Being scrubbed exception occurred while deleting snapshots{}'.format(vdisk_log)) + _logger.warning(self.group_format_log('Being scrubbed exception occurred while deleting snapshots{}'.format(vdisk_log))) else: - _logger.exception('Exception occurred while deleting snapshots{}'.format(vdisk_log)) + _logger.exception(self.group_format_log('Exception occurred while deleting snapshots{}'.format(vdisk_log))) exceptions.append(ex) if exceptions: diff --git a/ovs/lib/tests/generic_tests/test_snapshot.py b/ovs/lib/tests/generic_tests/test_snapshot.py index 78f2ddcc4..a0b80eff2 100644 --- a/ovs/lib/tests/generic_tests/test_snapshot.py +++ b/ovs/lib/tests/generic_tests/test_snapshot.py @@ -21,6 +21,7 @@ import datetime import unittest from ovs.constants.vdisk import SNAPSHOT_POLICY_LOCATION, SCRUB_VDISK_EXCEPTION_MESSAGE +from ovs.dal.hybrids.storagedriver import StorageDriver from ovs.dal.hybrids.vdisk import VDisk from ovs.dal.lists.vpoollist import VPoolList from ovs.dal.tests.helpers import DalHelper @@ -102,6 +103,7 @@ def test_clone_snapshot(self): 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)]} # (, , ) ) + storagedriver_1 = structure['storagedrivers'][1] vdisk_1 = structure['vdisks'][1] [dynamic for dynamic in vdisk_1._dynamics if dynamic.name == 'snapshots'][0].timeout = 0 @@ -130,16 +132,17 @@ def test_clone_snapshot(self): 'is_consistent': True, 'timestamp': str(timestamp)}) base_timestamp = self._make_timestamp(base, DAY * 2) - GenericController.delete_snapshots(timestamp=base_timestamp + (MINUTE * 30)) + GenericController.delete_snapshots_storagedriver(storagedriver_guid=storagedriver_1.guid, + timestamp=base_timestamp + (MINUTE * 30)) self.assertIn(base_snapshot_guid, vdisk_1.snapshot_ids, 'Snapshot was deleted while there are still clones of it') @staticmethod - def _build_vdisk(): - # type: () -> VDisk + def _build_vdisk_structure(): + # type: () -> Tuple[Dict[any, any], VDisk, StorageDriver] """ Build the DAL structure and retrieve the vdisk :return: VDisk object - :rtype: VDisk + :rtype: Tuple[Dict[any, any], VDisk, StorageDriver] """ structure = DalHelper.build_dal_structure( {'vpools': [1], @@ -148,9 +151,11 @@ def _build_vdisk(): 'storagerouters': [1], 'storagedrivers': [(1, 1, 1)]} # (, , ) ) - return structure['vdisks'][1] + storagedriver = structure['storagedrivers'][1] + storagedriver.invalidate_dynamics(['vdisks_guids']) + return structure, structure['vdisks'][1], storagedriver - def _create_validate_snapshots(self, vdisk, start_time, sticky_hours, consistent_hours, inconsistent_hours, + def _create_validate_snapshots(self, vdisk_structure, start_time, sticky_hours, consistent_hours, inconsistent_hours, snapshot_time_offset=0, automatic_snapshots=True, number_of_days=35): # type: (VDisk, datetime.date, List[int], List[int], List[int], int, bool, int) -> None """ @@ -160,8 +165,8 @@ def _create_validate_snapshots(self, vdisk, start_time, sticky_hours, consistent < 1w | 1d bucket | 6 | oldest of bucket | 7d = 1w < 1m | 1w bucket | 3 | oldest of bucket | 4w = 1m > 1m | delete - :param vdisk: VDisk to validate - :type vdisk: VDisk + :param vdisk_structure: Structure to use + :type vdisk_structure: Dict[any, any] :param start_time: Time when snapshots started to be made :type start_time: datetime.datetime :param sticky_hours: Hours that the sticky snapshots were made on @@ -175,6 +180,7 @@ def _create_validate_snapshots(self, vdisk, start_time, sticky_hours, consistent :param automatic_snapshots: Indicate that the snapshots are made automatically (because of the scheduling) :type automatic_snapshots: bool """ + structure, vdisk, storagedriver = vdisk_structure # Snapshot details is_sticky = len(sticky_hours) > 0 is_consistent = len(consistent_hours) > 0 @@ -188,7 +194,8 @@ def _create_validate_snapshots(self, vdisk, start_time, sticky_hours, consistent self._print_message('- Deleting snapshots') # The absolute timestamp is used when providing one. Going back a day to skip a day similar to the scheduled task delete_snapshot_timestamp = base_timestamp + (MINUTE * 30) - DAY.total_seconds() - GenericController.delete_snapshots(timestamp=delete_snapshot_timestamp) + GenericController.delete_snapshots_storagedriver(storagedriver_guid=storagedriver.guid, + timestamp=delete_snapshot_timestamp) self._validate(vdisk=vdisk, current_day=day, @@ -211,7 +218,7 @@ def test_snapshot_automatic_consistent(self): """ is_automatic: True, is_consistent: True --> Automatically created consistent snapshots should be deleted """ - self._create_validate_snapshots(vdisk=self._build_vdisk(), + self._create_validate_snapshots(vdisk_structure=self._build_vdisk_structure(), start_time=datetime.datetime.now().date(), sticky_hours=[], consistent_hours=[2], @@ -223,7 +230,7 @@ def test_snapshot_automatic_not_consistent(self): """ is_automatic: True, is_consistent: False --> Automatically created non-consistent snapshots should be deleted """ - self._create_validate_snapshots(vdisk=self._build_vdisk(), + self._create_validate_snapshots(vdisk_structure=self._build_vdisk_structure(), start_time=datetime.datetime.now().date(), sticky_hours=[], consistent_hours=[], @@ -235,7 +242,7 @@ def test_snapshot_non_automatic_consistent(self): """ is_automatic: False, is_consistent: True --> Manually created consistent snapshots should be deleted """ - self._create_validate_snapshots(vdisk=self._build_vdisk(), + self._create_validate_snapshots(vdisk_structure=self._build_vdisk_structure(), start_time=datetime.datetime.now().date(), sticky_hours=[], consistent_hours=[2], @@ -247,7 +254,7 @@ def test_snapshot_not_automatic_not_consistent(self): """ is_automatic: False, is_consistent: False --> Manually created non-consistent snapshots should be deleted """ - self._create_validate_snapshots(vdisk=self._build_vdisk(), + self._create_validate_snapshots(vdisk_structure=self._build_vdisk_structure(), start_time=datetime.datetime.now().date(), sticky_hours=[], consistent_hours=[], @@ -259,7 +266,7 @@ def test_snapshot_sticky(self): """ is_sticky: True --> Sticky snapshots of any kind should never be deleted (Only possible to delete manually) """ - self._create_validate_snapshots(vdisk=self._build_vdisk(), + self._create_validate_snapshots(vdisk_structure=self._build_vdisk_structure(), start_time=datetime.datetime.now().date(), sticky_hours=[2], consistent_hours=[2], @@ -272,7 +279,7 @@ def test_happy_path(self): Validates the happy path; Hourly snapshots are taken with a few manual consistent every now and then. The delete policy is executed every day """ - vdisk_1 = self._build_vdisk() + structure, vdisk_1, storagedriver_1 = self._build_vdisk_structure() [dynamic for dynamic in vdisk_1._dynamics if dynamic.name == 'snapshots'][0].timeout = 0 # Run the testing scenario @@ -287,7 +294,8 @@ def test_happy_path(self): # At the start of the day, delete snapshot policy runs at 00:30 self._print_message('- Deleting snapshots') - GenericController.delete_snapshots(timestamp=base_timestamp + (MINUTE * 30) - DAY.total_seconds()) + GenericController.delete_snapshots_storagedriver(storagedriver_guid=storagedriver_1.guid, + timestamp=base_timestamp + (MINUTE * 30) - DAY.total_seconds()) # Validate snapshots self._print_message('- Validating snapshots') @@ -332,6 +340,8 @@ def raise_an_exception(*args, **kwargs): ) vdisk_1, vdisk_2 = structure['vdisks'].values() + storagedriver_1 = structure['storagedrivers'][1] + storagedriver_1.invalidate_dynamics(['vdisks_guids']) vdisks = [vdisk_1, vdisk_2] for vdisk in vdisks: @@ -345,7 +355,7 @@ def raise_an_exception(*args, **kwargs): if vdisk == vdisk_1: StorageRouterClient.delete_snapshot_callbacks[vdisk.volume_id] = {snapshot_id: raise_an_exception} with self.assertRaises(RuntimeError): - GenericController.delete_snapshots() + GenericController.delete_snapshots_storagedriver(storagedriver_1.guid) self.assertEqual(1, len(vdisk_2.snapshot_ids), 'One snapshot should be removed for vdisk 2') self.assertEqual(2, len(vdisk_1.snapshot_ids), 'No snapshots should be removed for vdisk 1') @@ -364,6 +374,8 @@ def raise_an_exception(*args, **kwargs): 'storagedrivers': [(1, 1, 1)]} # (, , ) ) vdisk_1 = structure['vdisks'][1] + storagedriver_1 = structure['storagedrivers'][1] + storagedriver_1.invalidate_dynamics(['vdisks_guids']) [dynamic for dynamic in vdisk_1._dynamics if dynamic.name == 'snapshots'][0].timeout = 0 for i in xrange(0, 2): @@ -374,7 +386,7 @@ def raise_an_exception(*args, **kwargs): snapshot_id = VDiskController.create_snapshot(vdisk_1.guid, metadata) StorageRouterClient.delete_snapshot_callbacks[vdisk_1.volume_id] = {snapshot_id: raise_an_exception} - GenericController.delete_snapshots() + GenericController.delete_snapshots_storagedriver(storagedriver_1.guid) self.assertEqual(2, len(vdisk_1.snapshot_ids), "No snapshots should be removed for vdisk 1") ################## @@ -522,11 +534,11 @@ def test_retention_policy_configuration_levels(self): vdisk_config = [{'nr_of_days': 1, 'nr_of_snapshots': 1}] Configuration.set(SNAPSHOT_POLICY_LOCATION, global_config) - vdisk_1 = self._build_vdisk() - vpool_1 = VPoolList.get_vpools()[0] + structure, vdisk_1, storagedriver_1 = self._build_vdisk_structure() + vpool_1 = storagedriver_1.vpool # Global configuration - snapshot_manager = SnapshotManager() + snapshot_manager = SnapshotManager(storagedriver_1) policy_check = RetentionPolicy.from_configuration(global_config)[0] policy = snapshot_manager.get_policy_to_enforce(vdisk_1)[0] @@ -536,14 +548,14 @@ def test_retention_policy_configuration_levels(self): vpool_1.snapshot_retention_policy = vpool_config vpool_1.save() - snapshot_manager = SnapshotManager() + snapshot_manager = SnapshotManager(storagedriver_1) policy_check = RetentionPolicy.from_configuration(vpool_config)[0] policy = snapshot_manager.get_policy_to_enforce(vdisk_1)[0] self.assertEqual(policy_check, policy) # VDisk Configuration - snapshot_manager = SnapshotManager() + snapshot_manager = SnapshotManager(storagedriver_1) vdisk_1.snapshot_retention_policy = vdisk_config vdisk_1.save() @@ -572,7 +584,7 @@ def test_retention_policy_overlap(self): # After reviewing the code: no overlap is possible as it increments the days that are processed. # It doesn't review every period by itself, which would be a nightmare on it's own Configuration.set(SNAPSHOT_POLICY_LOCATION, global_config) - vdisk_1 = self._build_vdisk() + structure, vdisk_1, storagedriver_1 = self._build_vdisk_structure() start_time = datetime.datetime.now().date() snapshots = [] for day, snapshot_consistencies in {1: [True, False], @@ -584,7 +596,7 @@ def test_retention_policy_overlap(self): metadata={'label': 'snapshot_{0}:30'.format(str(index)), 'is_consistent': consistency, 'timestamp': str(snapshot_timestamp)})) - GenericController.delete_snapshots() + GenericController.delete_snapshots_storagedriver(storagedriver_1.guid) self.assertEqual(2, len(vdisk_1._snapshot_ids())) @staticmethod