diff --git a/drivers/LinstorSR.py b/drivers/LinstorSR.py index 18ee32117..5cad13409 100755 --- a/drivers/LinstorSR.py +++ b/drivers/LinstorSR.py @@ -26,6 +26,7 @@ from linstorvolumemanager import LinstorVolumeManager from linstorvolumemanager import LinstorVolumeManagerError from linstorvolumemanager import PERSISTENT_PREFIX + from linstorvolumemanager import write_controller_uri_cache LINSTOR_AVAILABLE = True except ImportError: @@ -420,6 +421,7 @@ def connect(): try: util.SMlog('Connecting from config to LINSTOR controller using: {}'.format(ip)) create_linstor(controller_uri, attempt_count=0) + write_controller_uri_cache(controller_uri) return controller_uri except: pass diff --git a/drivers/linstor-manager b/drivers/linstor-manager index 9cef5f825..17399e503 100755 --- a/drivers/linstor-manager +++ b/drivers/linstor-manager @@ -350,8 +350,7 @@ def detach(session, args): vdi_uuid = args['vdiUuid'] group_name = args['groupName'] - linstor = LinstorVolumeManager( - get_controller_uri(), + linstor = LinstorVolumeManager.create_from_cache( group_name, logger=util.SMlog ) @@ -405,8 +404,7 @@ def get_vhd_info(session, args): group_name = args['groupName'] include_parent = util.strtobool(args['includeParent']) - linstor = LinstorVolumeManager( - get_controller_uri(), + linstor = LinstorVolumeManager.create_from_cache( group_name, logger=util.SMlog ) @@ -440,8 +438,7 @@ def get_parent(session, args): device_path = args['devicePath'] group_name = args['groupName'] - linstor = LinstorVolumeManager( - get_controller_uri(), + linstor = LinstorVolumeManager.create_from_cache( group_name, logger=util.SMlog ) @@ -594,8 +591,7 @@ def deflate(session, args): zeroize = util.strtobool(args['zeroize']) group_name = args['groupName'] - linstor = LinstorVolumeManager( - get_controller_uri(), + linstor = LinstorVolumeManager.create_from_cache( group_name, logger=util.SMlog ) @@ -619,8 +615,7 @@ def lock_vdi(session, args): if locked: lock.acquire() - linstor = LinstorVolumeManager( - get_controller_uri(), + linstor = LinstorVolumeManager.create_from_cache( group_name, logger=util.SMlog ) @@ -678,8 +673,7 @@ def add_host(session, args): ) # 3. Ensure node doesn't exist. - linstor = LinstorVolumeManager( - get_controller_uri(), + linstor = LinstorVolumeManager.create_from_cache( group_name, logger=util.SMlog ) @@ -781,8 +775,7 @@ def remove_host(session, args): pbds[pbd_ref] = pbd # 2. Remove node. - linstor = LinstorVolumeManager( - get_controller_uri(), + linstor = LinstorVolumeManager.create_from_cache( group_name, logger=util.SMlog ) @@ -1127,8 +1120,7 @@ def create_node_interface(session, args): ip_addr = get_ip_addr_of_pif(session, pif_uuid) - linstor = LinstorVolumeManager( - get_controller_uri(), + linstor = LinstorVolumeManager.create_from_cache( group_name, logger=util.SMlog ) @@ -1144,8 +1136,7 @@ def destroy_node_interface(session, args): hostname = args['hostname'] name = args['name'] - linstor = LinstorVolumeManager( - get_controller_uri(), + linstor = LinstorVolumeManager.create_from_cache( group_name, logger=util.SMlog ) @@ -1164,8 +1155,7 @@ def modify_node_interface(session, args): ip_addr = get_ip_addr_of_pif(session, pif_uuid) - linstor = LinstorVolumeManager( - get_controller_uri(), + linstor = LinstorVolumeManager.create_from_cache( group_name, logger=util.SMlog ) @@ -1180,8 +1170,7 @@ def list_node_interfaces(session, args): group_name = args['groupName'] hostname = args['hostname'] - linstor = LinstorVolumeManager( - get_controller_uri(), + linstor = LinstorVolumeManager.create_from_cache( group_name, logger=util.SMlog ) @@ -1195,8 +1184,7 @@ def get_node_preferred_interface(session, args): group_name = args['groupName'] hostname = args['hostname'] - linstor = LinstorVolumeManager( - get_controller_uri(), + linstor = LinstorVolumeManager.create_from_cache( group_name, logger=util.SMlog ) @@ -1211,8 +1199,7 @@ def set_node_preferred_interface(session, args): hostname = args['hostname'] name = args['name'] - linstor = LinstorVolumeManager( - get_controller_uri(), + linstor = LinstorVolumeManager.create_from_cache( group_name, logger=util.SMlog ) diff --git a/drivers/linstorjournaler.py b/drivers/linstorjournaler.py index 2475ae1e9..a29722139 100755 --- a/drivers/linstorjournaler.py +++ b/drivers/linstorjournaler.py @@ -16,8 +16,13 @@ # -from linstorvolumemanager import \ - get_controller_uri, LinstorVolumeManager, LinstorVolumeManagerError +from linstorvolumemanager import ( + delete_controller_uri_cache, + get_controller_uri, + LinstorVolumeManager, + LinstorVolumeManagerError, +) + import linstor import re import util @@ -160,8 +165,10 @@ def connect(uri): try: return connect(uri) - except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError): + except LinstorVolumeManagerError: pass + except linstor.errors.LinstorNetworkError: + delete_controller_uri_cache(uri) return util.retry( lambda: connect(None), diff --git a/drivers/linstorvolumemanager.py b/drivers/linstorvolumemanager.py index 0db16e962..d0a1405da 100755 --- a/drivers/linstorvolumemanager.py +++ b/drivers/linstorvolumemanager.py @@ -17,7 +17,9 @@ from sm_typing import override +import contextlib import errno +import flock import json import linstor import os.path @@ -43,6 +45,10 @@ DRBD_BY_RES_PATH = '/dev/drbd/by-res/' +CONTROLLER_CACHE_DIRECTORY = os.environ.get('TMPDIR', '/tmp') + '/linstor' +CONTROLLER_CACHE_FILE = 'controller_uri' +CONTROLLER_CACHE_PATH = "{}/{}".format(CONTROLLER_CACHE_DIRECTORY, CONTROLLER_CACHE_FILE) + PLUGIN = 'linstor-manager' @@ -196,17 +202,123 @@ def _get_controller_uri(): # Not found, maybe we are trying to create the SR... pass -def get_controller_uri(): - retries = 0 - while True: - uri = _get_controller_uri() - if uri: - return uri - retries += 1 - if retries >= 10: - break - time.sleep(1) +@contextlib.contextmanager +def shared_reader(path): + with open(path, 'r') as f: + lock = flock.ReadLock(f.fileno()) + lock.lock() + try: + yield f + finally: + lock.unlock() + + +@contextlib.contextmanager +def excl_writer(path): + with open(path, 'r+') as f: + lock = flock.WriteLock(f.fileno()) + lock.lock() + try: + yield f + finally: + lock.unlock() + + +def _read_controller_uri_from_file(f): + try: + return f.read().strip() + except Exception as e: + util.SMlog('Unable to read controller URI cache file at `{}`: {}'.format(CONTROLLER_CACHE_PATH, e)) + + +def _write_controller_uri_to_file(uri, f): + try: + f.seek(0) + f.write(uri) + f.truncate() + except Exception as e: + util.SMlog('Unable to write URI cache file at `{}` : {}'.format(CONTROLLER_CACHE_PATH, e)) + + +def _delete_controller_uri_from_file(f): + try: + f.seek(0) + f.truncate() + except Exception as e: + util.SMlog('Unable to delete URI cache file at `{}` : {}'.format(CONTROLLER_CACHE_PATH, e)) + + +def read_controller_uri_cache(): + try: + with shared_reader(CONTROLLER_CACHE_PATH) as f: + return _read_controller_uri_from_file(f) + except FileNotFoundError: + pass + except Exception as e: + util.SMlog('Unable to read controller URI cache file at `{}`: {}'.format(CONTROLLER_CACHE_PATH, e)) + + +def write_controller_uri_cache(uri): + try: + with excl_writer(CONTROLLER_CACHE_PATH) as f: + _write_controller_uri_to_file(uri, f) + except FileNotFoundError: + if os.path.exists(CONTROLLER_CACHE_DIRECTORY): + raise + os.makedirs(CONTROLLER_CACHE_DIRECTORY) + os.chmod(CONTROLLER_CACHE_DIRECTORY, 0o700) + return write_controller_uri_cache(uri) + except Exception as e: + util.SMlog('Unable to write URI cache file at `{}` : {}'.format(CONTROLLER_CACHE_PATH, e)) + + +def delete_controller_uri_cache(uri=None): + try: + with excl_writer(CONTROLLER_CACHE_PATH) as f: + if uri and uri != _read_controller_uri_from_file(f): + return + f.seek(0) + f.truncate() + except FileNotFoundError: + pass + except Exception as e: + util.SMlog('Unable to delete URI cache file at `{}` : {}'.format(CONTROLLER_CACHE_PATH, e)) + + +def build_controller_uri_cache(): + uri = '' + try: + with excl_writer(CONTROLLER_CACHE_PATH) as f: + uri = _read_controller_uri_from_file(f) + if uri: + return uri + uri = _get_controller_uri() + if not uri: + for retries in range(9): + time.sleep(1) + uri = _get_controller_uri() + if uri: + break + if uri: + _write_controller_uri_to_file(uri, f) + except FileNotFoundError: + if os.path.exists(CONTROLLER_CACHE_DIRECTORY): + raise + os.makedirs(CONTROLLER_CACHE_DIRECTORY) + os.chmod(CONTROLLER_CACHE_DIRECTORY, 0o700) + return build_controller_uri_cache() + except Exception as e: + util.SMlog('Unable to write URI cache file at `{}` : {}'.format(CONTROLLER_CACHE_PATH, e)) + + return uri + + +def get_controller_uri(): + uri = read_controller_uri_cache() + if not uri: + uri = build_controller_uri_cache() + return uri def get_controller_node_name(): @@ -429,6 +541,34 @@ def __init__( self._volume_info_cache_dirty = True self._build_volumes(repair=repair) + @staticmethod + def create_from_cache( + group_name, repair=False, logger=default_logger.__func__, + attempt_count=30 + ): + """ + Attempt to create a LinstorVolumeManager from cached data. + If it fails, refresh the cache and retry once. + + :param str group_name: The SR goup name to use. + :param bool repair: If true we try to remove bad volumes due to a crash + or unexpected behavior. + :param function logger: Function to log messages. + :param int attempt_count: Number of attempts to join the controller. + """ + uri = read_controller_uri_cache() + if not uri: + uri = build_controller_uri_cache() + if not uri: + raise LinstorVolumeManagerError( + "Unable to retrieve a valid controller URI from cache or after rebuild." + ) + + return LinstorVolumeManager( + uri, group_name, repair=repair, + logger=logger, attempt_count=attempt_count + ) + @property def group_name(self): """ @@ -1772,6 +1912,7 @@ def create_sr( DATABASE_PATH, mount=False ) + delete_controller_uri_cache() return sr @classmethod @@ -2615,8 +2756,10 @@ def connect(uri): try: return connect(uri) - except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError): + except LinstorVolumeManagerError: pass + except linstor.errors.LinstorNetworkError: + delete_controller_uri_cache(uri) if not keep_uri_unmodified: uri = None diff --git a/drivers/tapdisk-pause b/drivers/tapdisk-pause index f98257a23..437d74b1c 100755 --- a/drivers/tapdisk-pause +++ b/drivers/tapdisk-pause @@ -31,7 +31,7 @@ import lvmcache try: from linstorvhdutil import LinstorVhdUtil - from linstorvolumemanager import get_controller_uri, LinstorVolumeManager + from linstorvolumemanager import LinstorVolumeManager LINSTOR_AVAILABLE = True except ImportError: LINSTOR_AVAILABLE = False @@ -163,8 +163,7 @@ class Tapdisk: dconf = session.xenapi.PBD.get_device_config(pbd) group_name = dconf['group-name'] - linstor = LinstorVolumeManager( - get_controller_uri(), + linstor = LinstorVolumeManager.create_from_cache( group_name, logger=util.SMlog )