Coverage for drivers/linstorvolumemanager.py : 10%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python3
2#
3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16#
18from sm_typing import override
20import errno
21import json
22import linstor
23import os.path
24import re
25import shutil
26import socket
27import stat
28import time
29import util
30import uuid
32# Persistent prefix to add to RAW persistent volumes.
33PERSISTENT_PREFIX = 'xcp-persistent-'
35# Contains the data of the "/var/lib/linstor" directory.
36DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database'
37DATABASE_SIZE = 1 << 30 # 1GB.
38DATABASE_PATH = '/var/lib/linstor'
39DATABASE_MKFS = 'mkfs.ext4'
41REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary")
42REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$')
44DRBD_BY_RES_PATH = '/dev/drbd/by-res/'
46PLUGIN = 'linstor-manager'
49# ==============================================================================
51def get_local_volume_openers(resource_name, volume):
52 if not resource_name or volume is None:
53 raise Exception('Cannot get DRBD openers without resource name and/or volume.')
55 path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format(
56 resource_name, volume
57 )
59 with open(path, 'r') as openers:
60 # Not a big cost, so read all lines directly.
61 lines = openers.readlines()
63 result = {}
65 opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)')
66 for line in lines:
67 match = opener_re.match(line)
68 assert match
70 groups = match.groups()
71 process_name = groups[0]
72 pid = groups[1]
73 open_duration_ms = groups[2]
74 result[pid] = {
75 'process-name': process_name,
76 'open-duration': open_duration_ms
77 }
79 return json.dumps(result)
81def get_all_volume_openers(resource_name, volume):
82 PLUGIN_CMD = 'getDrbdOpeners'
84 volume = str(volume)
85 openers = {}
87 # Make sure this call never stucks because this function can be called
88 # during HA init and in this case we can wait forever.
89 session = util.timeout_call(10, util.get_localAPI_session)
91 hosts = session.xenapi.host.get_all_records()
92 for host_ref, host_record in hosts.items():
93 node_name = host_record['hostname']
94 try:
95 if not session.xenapi.host_metrics.get_record(
96 host_record['metrics']
97 )['live']:
98 # Ensure we call plugin on online hosts only.
99 continue
101 openers[node_name] = json.loads(
102 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {
103 'resourceName': resource_name,
104 'volume': volume
105 })
106 )
107 except Exception as e:
108 util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format(
109 resource_name, node_name, e
110 ))
112 return openers
115# ==============================================================================
117def round_up(value, divisor):
118 assert divisor
119 divisor = int(divisor)
120 return ((int(value) + divisor - 1) // divisor) * divisor
123def round_down(value, divisor):
124 assert divisor
125 value = int(value)
126 return value - (value % int(divisor))
129# ==============================================================================
131def get_remote_host_ip(node_name):
132 (ret, stdout, stderr) = util.doexec([
133 'drbdsetup', 'show', DATABASE_VOLUME_NAME, '--json'
134 ])
135 if ret != 0:
136 return
138 try:
139 conf = json.loads(stdout)
140 if not conf:
141 return
143 for connection in conf[0]['connections']:
144 if connection['net']['_name'] == node_name:
145 value = connection['path']['_remote_host']
146 res = REG_DRBDSETUP_IP.match(value)
147 if res:
148 return res.groups()[0]
149 break
150 except Exception:
151 pass
154def _get_controller_uri():
155 PLUGIN_CMD = 'hasControllerRunning'
157 # Try to find controller using drbdadm.
158 (ret, stdout, stderr) = util.doexec([
159 'drbdadm', 'status', DATABASE_VOLUME_NAME
160 ])
161 if ret == 0:
162 # If we are here, the database device exists locally.
164 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
165 # Nice case, we have the controller running on this local host.
166 return 'linstor://localhost'
168 # Try to find the host using DRBD connections.
169 res = REG_DRBDADM_PRIMARY.search(stdout)
170 if res:
171 node_name = res.groups()[0]
172 ip = get_remote_host_ip(node_name)
173 if ip:
174 return 'linstor://' + ip
176 # Worst case: we use many hosts in the pool (>= 4), so we can't find the
177 # primary using drbdadm because we don't have all connections to the
178 # replicated volume. `drbdadm status xcp-persistent-database` returns
179 # 3 connections by default.
180 try:
181 session = util.timeout_call(10, util.get_localAPI_session)
183 for host_ref, host_record in session.xenapi.host.get_all_records().items():
184 node_name = host_record['hostname']
185 try:
186 if util.strtobool(
187 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {})
188 ):
189 return 'linstor://' + host_record['address']
190 except Exception as e:
191 # Can throw and exception if a host is offline. So catch it.
192 util.SMlog('Unable to search controller on `{}`: {}'.format(
193 node_name, e
194 ))
195 except:
196 # Not found, maybe we are trying to create the SR...
197 pass
199def get_controller_uri():
200 retries = 0
201 while True:
202 uri = _get_controller_uri()
203 if uri:
204 return uri
206 retries += 1
207 if retries >= 10:
208 break
209 time.sleep(1)
212def get_controller_node_name():
213 PLUGIN_CMD = 'hasControllerRunning'
215 (ret, stdout, stderr) = util.doexec([
216 'drbdadm', 'status', DATABASE_VOLUME_NAME
217 ])
219 if ret == 0:
220 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
221 return 'localhost'
223 res = REG_DRBDADM_PRIMARY.search(stdout)
224 if res:
225 return res.groups()[0]
227 session = util.timeout_call(5, util.get_localAPI_session)
229 for host_ref, host_record in session.xenapi.host.get_all_records().items():
230 node_name = host_record['hostname']
231 try:
232 if not session.xenapi.host_metrics.get_record(
233 host_record['metrics']
234 )['live']:
235 continue
237 if util.strtobool(session.xenapi.host.call_plugin(
238 host_ref, PLUGIN, PLUGIN_CMD, {}
239 )):
240 return node_name
241 except Exception as e:
242 util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format(
243 node_name, e
244 ))
247def demote_drbd_resource(node_name, resource_name):
248 PLUGIN_CMD = 'demoteDrbdResource'
250 session = util.timeout_call(5, util.get_localAPI_session)
252 for host_ref, host_record in session.xenapi.host.get_all_records().items():
253 if host_record['hostname'] != node_name:
254 continue
256 try:
257 session.xenapi.host.call_plugin(
258 host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name}
259 )
260 except Exception as e:
261 util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format(
262 resource_name, node_name, e
263 ))
264 raise Exception(
265 'Can\'t demote resource `{}`, unable to find node `{}`'
266 .format(resource_name, node_name)
267 )
269# ==============================================================================
271class LinstorVolumeManagerError(Exception):
272 ERR_GENERIC = 0,
273 ERR_VOLUME_EXISTS = 1,
274 ERR_VOLUME_NOT_EXISTS = 2,
275 ERR_VOLUME_DESTROY = 3,
276 ERR_GROUP_NOT_EXISTS = 4
278 def __init__(self, message, code=ERR_GENERIC):
279 super(LinstorVolumeManagerError, self).__init__(message)
280 self._code = code
282 @property
283 def code(self):
284 return self._code
287# ==============================================================================
289# Note:
290# If a storage pool is not accessible after a network change:
291# linstor node interface modify <NODE> default --ip <IP>
294class LinstorVolumeManager(object):
295 """
296 API to manager LINSTOR volumes in XCP-ng.
297 A volume in this context is a physical part of the storage layer.
298 """
300 __slots__ = (
301 '_linstor', '_logger', '_redundancy',
302 '_base_group_name', '_group_name', '_ha_group_name',
303 '_volumes', '_storage_pools', '_storage_pools_time',
304 '_kv_cache', '_resource_cache', '_volume_info_cache',
305 '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty'
306 )
308 DEV_ROOT_PATH = DRBD_BY_RES_PATH
310 # Default sector size.
311 BLOCK_SIZE = 512
313 # List of volume properties.
314 PROP_METADATA = 'metadata'
315 PROP_NOT_EXISTS = 'not-exists'
316 PROP_VOLUME_NAME = 'volume-name'
317 PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp'
319 # A volume can only be locked for a limited duration.
320 # The goal is to give enough time to slaves to execute some actions on
321 # a device before an UUID update or a coalesce for example.
322 # Expiration is expressed in seconds.
323 LOCKED_EXPIRATION_DELAY = 1 * 60
325 # Used when volume uuid is being updated.
326 PROP_UPDATING_UUID_SRC = 'updating-uuid-src'
328 # States of property PROP_NOT_EXISTS.
329 STATE_EXISTS = '0'
330 STATE_NOT_EXISTS = '1'
331 STATE_CREATING = '2'
333 # Property namespaces.
334 NAMESPACE_SR = 'xcp/sr'
335 NAMESPACE_VOLUME = 'xcp/volume'
337 # Regex to match properties.
338 REG_PROP = '^([^/]+)/{}$'
340 REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA))
341 REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS))
342 REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME))
343 REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC))
345 # Prefixes of SR/VOLUME in the LINSTOR DB.
346 # A LINSTOR (resource, group, ...) name cannot start with a number.
347 # So we add a prefix behind our SR/VOLUME uuids.
348 PREFIX_SR = 'xcp-sr-'
349 PREFIX_HA = 'xcp-ha-'
350 PREFIX_VOLUME = 'xcp-volume-'
352 # Limit request number when storage pool info is asked, we fetch
353 # the current pool status after N elapsed seconds.
354 STORAGE_POOLS_FETCH_INTERVAL = 15
356 @staticmethod
357 def default_logger(*args):
358 print(args)
360 # --------------------------------------------------------------------------
361 # API.
362 # --------------------------------------------------------------------------
364 class VolumeInfo(object):
365 __slots__ = (
366 'name',
367 'allocated_size', # Allocated size, place count is not used.
368 'virtual_size', # Total virtual available size of this volume
369 # (i.e. the user size at creation).
370 'diskful' # Array of nodes that have a diskful volume.
371 )
373 def __init__(self, name):
374 self.name = name
375 self.allocated_size = 0
376 self.virtual_size = 0
377 self.diskful = []
379 @override
380 def __repr__(self) -> str:
381 return 'VolumeInfo("{}", {}, {}, {})'.format(
382 self.name, self.allocated_size, self.virtual_size,
383 self.diskful
384 )
386 # --------------------------------------------------------------------------
388 def __init__(
389 self, uri, group_name, repair=False, logger=default_logger.__func__,
390 attempt_count=30
391 ):
392 """
393 Create a new LinstorVolumeManager object.
394 :param str uri: URI to communicate with the LINSTOR controller.
395 :param str group_name: The SR group name to use.
396 :param bool repair: If true we try to remove bad volumes due to a crash
397 or unexpected behavior.
398 :param function logger: Function to log messages.
399 :param int attempt_count: Number of attempts to join the controller.
400 """
402 self._linstor = self._create_linstor_instance(
403 uri, attempt_count=attempt_count
404 )
407 mismatched_nodes = [
408 node for node in self._linstor.node_list().pop().nodes if node.connection_status == "VERSION_MISMATCH"
409 ]
411 if mismatched_nodes:
412 raise LinstorVolumeManagerError(
413 "Some linstor nodes are not using the same version. " +
414 f"Incriminated nodes are: {','.join([node.name for node in mismatched_nodes])}"
415 )
417 self._base_group_name = group_name
419 # Ensure group exists.
420 group_name = self._build_group_name(group_name)
421 groups = self._linstor.resource_group_list_raise([group_name]).resource_groups
422 if not groups:
423 raise LinstorVolumeManagerError(
424 'Unable to find `{}` Linstor SR'.format(group_name)
425 )
427 # Ok. ;)
428 self._logger = logger
429 self._redundancy = groups[0].select_filter.place_count
430 self._group_name = group_name
431 self._ha_group_name = self._build_ha_group_name(self._base_group_name)
432 self._volumes = set()
433 self._storage_pools_time = 0
435 # To increase performance and limit request count to LINSTOR services,
436 # we use caches.
437 self._kv_cache = self._create_kv_cache()
438 self._resource_cache = None
439 self._resource_cache_dirty = True
440 self._volume_info_cache = None
441 self._volume_info_cache_dirty = True
442 self._build_volumes(repair=repair)
444 @property
445 def group_name(self):
446 """
447 Give the used group name.
448 :return: The group name.
449 :rtype: str
450 """
451 return self._base_group_name
453 @property
454 def redundancy(self):
455 """
456 Give the used redundancy.
457 :return: The redundancy.
458 :rtype: int
459 """
460 return self._redundancy
462 @property
463 def volumes(self):
464 """
465 Give the volumes uuid set.
466 :return: The volumes uuid set.
467 :rtype: set(str)
468 """
469 return self._volumes
471 @property
472 def max_volume_size_allowed(self):
473 """
474 Give the max volume size currently available in B.
475 :return: The current size.
476 :rtype: int
477 """
479 candidates = self._find_best_size_candidates()
480 if not candidates:
481 raise LinstorVolumeManagerError(
482 'Failed to get max volume size allowed'
483 )
485 size = candidates[0].max_volume_size
486 if size < 0:
487 raise LinstorVolumeManagerError(
488 'Invalid max volume size allowed given: {}'.format(size)
489 )
490 return self.round_down_volume_size(size * 1024)
492 @property
493 def physical_size(self):
494 """
495 Give the total physical size of the SR.
496 :return: The physical size.
497 :rtype: int
498 """
499 return self._compute_size('total_capacity')
501 @property
502 def physical_free_size(self):
503 """
504 Give the total free physical size of the SR.
505 :return: The physical free size.
506 :rtype: int
507 """
508 return self._compute_size('free_capacity')
510 @property
511 def allocated_volume_size(self):
512 """
513 Give the allocated size for all volumes. The place count is not
514 used here. When thick lvm is used, the size for one volume should
515 be equal to the virtual volume size. With thin lvm, the size is equal
516 or lower to the volume size.
517 :return: The allocated size of all volumes.
518 :rtype: int
519 """
521 # Paths: /res_name/vol_number/size
522 sizes = {}
524 for resource in self._get_resource_cache().resources:
525 if resource.name not in sizes:
526 current = sizes[resource.name] = {}
527 else:
528 current = sizes[resource.name]
530 for volume in resource.volumes:
531 # We ignore diskless pools of the form "DfltDisklessStorPool".
532 if volume.storage_pool_name != self._group_name:
533 continue
535 allocated_size = max(volume.allocated_size, 0)
536 current_allocated_size = current.get(volume.number) or -1
537 if allocated_size > current_allocated_size:
538 current[volume.number] = allocated_size
540 total_size = 0
541 for volumes in sizes.values():
542 for size in volumes.values():
543 total_size += size
545 return total_size * 1024
547 def get_min_physical_size(self):
548 """
549 Give the minimum physical size of the SR.
550 I.e. the size of the smallest disk + the number of pools.
551 :return: The physical min size.
552 :rtype: tuple(int, int)
553 """
554 size = None
555 pool_count = 0
556 for pool in self._get_storage_pools(force=True):
557 space = pool.free_space
558 if space:
559 pool_count += 1
560 current_size = space.total_capacity
561 if current_size < 0:
562 raise LinstorVolumeManagerError(
563 'Failed to get pool total_capacity attr of `{}`'
564 .format(pool.node_name)
565 )
566 if size is None or current_size < size:
567 size = current_size
568 return (pool_count, (size or 0) * 1024)
570 @property
571 def metadata(self):
572 """
573 Get the metadata of the SR.
574 :return: Dictionary that contains metadata.
575 :rtype: dict(str, dict)
576 """
578 sr_properties = self._get_sr_properties()
579 metadata = sr_properties.get(self.PROP_METADATA)
580 if metadata is not None:
581 metadata = json.loads(metadata)
582 if isinstance(metadata, dict):
583 return metadata
584 raise LinstorVolumeManagerError(
585 'Expected dictionary in SR metadata: {}'.format(
586 self._group_name
587 )
588 )
590 return {}
592 @metadata.setter
593 def metadata(self, metadata):
594 """
595 Set the metadata of the SR.
596 :param dict metadata: Dictionary that contains metadata.
597 """
599 assert isinstance(metadata, dict)
600 sr_properties = self._get_sr_properties()
601 sr_properties[self.PROP_METADATA] = json.dumps(metadata)
603 @property
604 def disconnected_hosts(self):
605 """
606 Get the list of disconnected hosts.
607 :return: Set that contains disconnected hosts.
608 :rtype: set(str)
609 """
611 disconnected_hosts = set()
612 for pool in self._get_storage_pools():
613 for report in pool.reports:
614 if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \
615 linstor.consts.WARN_NOT_CONNECTED:
616 disconnected_hosts.add(pool.node_name)
617 break
618 return disconnected_hosts
620 def check_volume_exists(self, volume_uuid):
621 """
622 Check if a volume exists in the SR.
623 :return: True if volume exists.
624 :rtype: bool
625 """
626 return volume_uuid in self._volumes
628 def create_volume(
629 self,
630 volume_uuid,
631 size,
632 persistent=True,
633 volume_name=None,
634 high_availability=False
635 ):
636 """
637 Create a new volume on the SR.
638 :param str volume_uuid: The volume uuid to use.
639 :param int size: volume size in B.
640 :param bool persistent: If false the volume will be unavailable
641 on the next constructor call LinstorSR(...).
642 :param str volume_name: If set, this name is used in the LINSTOR
643 database instead of a generated name.
644 :param bool high_availability: If set, the volume is created in
645 the HA group.
646 :return: The current device path of the volume.
647 :rtype: str
648 """
650 self._logger('Creating LINSTOR volume {}...'.format(volume_uuid))
651 if not volume_name:
652 volume_name = self.build_volume_name(util.gen_uuid())
653 volume_properties = self._create_volume_with_properties(
654 volume_uuid,
655 volume_name,
656 size,
657 True, # place_resources
658 high_availability
659 )
661 # Volume created! Now try to find the device path.
662 try:
663 self._logger(
664 'Find device path of LINSTOR volume {}...'.format(volume_uuid)
665 )
666 device_path = self._find_device_path(volume_uuid, volume_name)
667 if persistent:
668 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
669 self._volumes.add(volume_uuid)
670 self._logger(
671 'LINSTOR volume {} created!'.format(volume_uuid)
672 )
673 return device_path
674 except Exception:
675 # There is an issue to find the path.
676 # At this point the volume has just been created, so force flag can be used.
677 self._destroy_volume(volume_uuid, force=True)
678 raise
680 def mark_volume_as_persistent(self, volume_uuid):
681 """
682 Mark volume as persistent if created with persistent=False.
683 :param str volume_uuid: The volume uuid to mark.
684 """
686 self._ensure_volume_exists(volume_uuid)
688 # Mark volume as persistent.
689 volume_properties = self._get_volume_properties(volume_uuid)
690 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
692 def destroy_volume(self, volume_uuid):
693 """
694 Destroy a volume.
695 :param str volume_uuid: The volume uuid to destroy.
696 """
698 self._ensure_volume_exists(volume_uuid)
699 self.ensure_volume_is_not_locked(volume_uuid)
701 # Mark volume as destroyed.
702 volume_properties = self._get_volume_properties(volume_uuid)
703 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
705 try:
706 self._volumes.remove(volume_uuid)
707 self._destroy_volume(volume_uuid)
708 except Exception as e:
709 raise LinstorVolumeManagerError(
710 str(e),
711 LinstorVolumeManagerError.ERR_VOLUME_DESTROY
712 )
714 def lock_volume(self, volume_uuid, locked=True):
715 """
716 Prevent modifications of the volume properties during
717 "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked
718 when used. This method is useful to attach/detach correctly a volume on
719 a slave. Without it the GC can rename a volume, in this case the old
720 volume path can be used by a slave...
721 :param str volume_uuid: The volume uuid to protect/unprotect.
722 :param bool locked: Lock/unlock the volume.
723 """
725 self._ensure_volume_exists(volume_uuid)
727 self._logger(
728 '{} volume {} as locked'.format(
729 'Mark' if locked else 'Unmark',
730 volume_uuid
731 )
732 )
734 volume_properties = self._get_volume_properties(volume_uuid)
735 if locked:
736 volume_properties[
737 self.PROP_IS_READONLY_TIMESTAMP
738 ] = str(time.time())
739 elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties:
740 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
742 def ensure_volume_is_not_locked(self, volume_uuid, timeout=None):
743 """
744 Ensure a volume is not locked. Wait if necessary.
745 :param str volume_uuid: The volume uuid to check.
746 :param int timeout: If the volume is always locked after the expiration
747 of the timeout, an exception is thrown.
748 """
749 return self.ensure_volume_list_is_not_locked([volume_uuid], timeout)
751 def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None):
752 checked = set()
753 for volume_uuid in volume_uuids:
754 if volume_uuid in self._volumes:
755 checked.add(volume_uuid)
757 if not checked:
758 return
760 waiting = False
762 volume_properties = self._get_kv_cache()
764 start = time.time()
765 while True:
766 # Can't delete in for loop, use a copy of the list.
767 remaining = checked.copy()
768 for volume_uuid in checked:
769 volume_properties.namespace = \
770 self._build_volume_namespace(volume_uuid)
771 timestamp = volume_properties.get(
772 self.PROP_IS_READONLY_TIMESTAMP
773 )
774 if timestamp is None:
775 remaining.remove(volume_uuid)
776 continue
778 now = time.time()
779 if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY:
780 self._logger(
781 'Remove readonly timestamp on {}'.format(volume_uuid)
782 )
783 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
784 remaining.remove(volume_uuid)
785 continue
787 if not waiting:
788 self._logger(
789 'Volume {} is locked, waiting...'.format(volume_uuid)
790 )
791 waiting = True
792 break
794 if not remaining:
795 break
796 checked = remaining
798 if timeout is not None and now - start > timeout:
799 raise LinstorVolumeManagerError(
800 'volume `{}` is locked and timeout has been reached'
801 .format(volume_uuid),
802 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
803 )
805 # We must wait to use the volume. After that we can modify it
806 # ONLY if the SR is locked to avoid bad reads on the slaves.
807 time.sleep(1)
808 volume_properties = self._create_kv_cache()
810 if waiting:
811 self._logger('No volume locked now!')
813 def remove_volume_if_diskless(self, volume_uuid):
814 """
815 Remove disless path from local node.
816 :param str volume_uuid: The volume uuid to remove.
817 """
819 self._ensure_volume_exists(volume_uuid)
821 volume_properties = self._get_volume_properties(volume_uuid)
822 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
824 node_name = socket.gethostname()
826 for resource in self._get_resource_cache().resources:
827 if resource.name == volume_name and resource.node_name == node_name:
828 if linstor.consts.FLAG_TIE_BREAKER in resource.flags:
829 return
830 break
832 result = self._linstor.resource_delete_if_diskless(
833 node_name=node_name, rsc_name=volume_name
834 )
835 if not linstor.Linstor.all_api_responses_no_error(result):
836 raise LinstorVolumeManagerError(
837 'Unable to delete diskless path of `{}` on node `{}`: {}'
838 .format(volume_name, node_name, ', '.join(
839 [str(x) for x in result]))
840 )
842 def introduce_volume(self, volume_uuid):
843 pass # TODO: Implement me.
845 def resize_volume(self, volume_uuid, new_size):
846 """
847 Resize a volume.
848 :param str volume_uuid: The volume uuid to resize.
849 :param int new_size: New size in B.
850 """
852 volume_name = self.get_volume_name(volume_uuid)
853 self.ensure_volume_is_not_locked(volume_uuid)
854 new_size = self.round_up_volume_size(new_size) // 1024
856 retry_count = 30
857 while True:
858 result = self._linstor.volume_dfn_modify(
859 rsc_name=volume_name,
860 volume_nr=0,
861 size=new_size
862 )
864 self._mark_resource_cache_as_dirty()
866 error_str = self._get_error_str(result)
867 if not error_str:
868 break
870 # After volume creation, DRBD volume can be unusable during many seconds.
871 # So we must retry the definition change if the device is not up to date.
872 # Often the case for thick provisioning.
873 if retry_count and error_str.find('non-UpToDate DRBD device') >= 0:
874 time.sleep(2)
875 retry_count -= 1
876 continue
878 raise LinstorVolumeManagerError(
879 'Could not resize volume `{}` from SR `{}`: {}'
880 .format(volume_uuid, self._group_name, error_str)
881 )
883 def get_volume_name(self, volume_uuid):
884 """
885 Get the name of a particular volume.
886 :param str volume_uuid: The volume uuid of the name to get.
887 :return: The volume name.
888 :rtype: str
889 """
891 self._ensure_volume_exists(volume_uuid)
892 volume_properties = self._get_volume_properties(volume_uuid)
893 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
894 if volume_name:
895 return volume_name
896 raise LinstorVolumeManagerError(
897 'Failed to get volume name of {}'.format(volume_uuid)
898 )
900 def get_volume_size(self, volume_uuid):
901 """
902 Get the size of a particular volume.
903 :param str volume_uuid: The volume uuid of the size to get.
904 :return: The volume size.
905 :rtype: int
906 """
908 volume_name = self.get_volume_name(volume_uuid)
909 dfns = self._linstor.resource_dfn_list_raise(
910 query_volume_definitions=True,
911 filter_by_resource_definitions=[volume_name]
912 ).resource_definitions
914 size = dfns[0].volume_definitions[0].size
915 if size < 0:
916 raise LinstorVolumeManagerError(
917 'Failed to get volume size of: {}'.format(volume_uuid)
918 )
919 return size * 1024
921 def set_auto_promote_timeout(self, volume_uuid, timeout):
922 """
923 Define the blocking time of open calls when a DRBD
924 is already open on another host.
925 :param str volume_uuid: The volume uuid to modify.
926 """
928 volume_name = self.get_volume_name(volume_uuid)
929 result = self._linstor.resource_dfn_modify(volume_name, {
930 'DrbdOptions/Resource/auto-promote-timeout': timeout
931 })
932 error_str = self._get_error_str(result)
933 if error_str:
934 raise LinstorVolumeManagerError(
935 'Could not change the auto promote timeout of `{}`: {}'
936 .format(volume_uuid, error_str)
937 )
939 def set_drbd_ha_properties(self, volume_name, enabled=True):
940 """
941 Set or not HA DRBD properties required by drbd-reactor and
942 by specific volumes.
943 :param str volume_name: The volume to modify.
944 :param bool enabled: Enable or disable HA properties.
945 """
947 properties = {
948 'DrbdOptions/auto-quorum': 'disabled',
949 'DrbdOptions/Resource/auto-promote': 'no',
950 'DrbdOptions/Resource/on-no-data-accessible': 'io-error',
951 'DrbdOptions/Resource/on-no-quorum': 'io-error',
952 'DrbdOptions/Resource/on-suspended-primary-outdated': 'force-secondary',
953 'DrbdOptions/Resource/quorum': 'majority'
954 }
955 if enabled:
956 result = self._linstor.resource_dfn_modify(volume_name, properties)
957 else:
958 result = self._linstor.resource_dfn_modify(volume_name, {}, delete_props=list(properties.keys()))
960 error_str = self._get_error_str(result)
961 if error_str:
962 raise LinstorVolumeManagerError(
963 'Could not modify HA DRBD properties on volume `{}`: {}'
964 .format(volume_name, error_str)
965 )
967 def get_volume_info(self, volume_uuid):
968 """
969 Get the volume info of a particular volume.
970 :param str volume_uuid: The volume uuid of the volume info to get.
971 :return: The volume info.
972 :rtype: VolumeInfo
973 """
975 volume_name = self.get_volume_name(volume_uuid)
976 return self._get_volumes_info()[volume_name]
978 def get_device_path(self, volume_uuid):
979 """
980 Get the dev path of a volume, create a diskless if necessary.
981 :param str volume_uuid: The volume uuid to get the dev path.
982 :return: The current device path of the volume.
983 :rtype: str
984 """
986 volume_name = self.get_volume_name(volume_uuid)
987 return self._find_device_path(volume_uuid, volume_name)
989 def get_volume_uuid_from_device_path(self, device_path):
990 """
991 Get the volume uuid of a device_path.
992 :param str device_path: The dev path to find the volume uuid.
993 :return: The volume uuid of the local device path.
994 :rtype: str
995 """
997 expected_volume_name = \
998 self.get_volume_name_from_device_path(device_path)
1000 volume_names = self.get_volumes_with_name()
1001 for volume_uuid, volume_name in volume_names.items():
1002 if volume_name == expected_volume_name:
1003 return volume_uuid
1005 raise LinstorVolumeManagerError(
1006 'Unable to find volume uuid from dev path `{}`'.format(device_path)
1007 )
1009 def get_volume_name_from_device_path(self, device_path):
1010 """
1011 Get the volume name of a device_path.
1012 :param str device_path: The dev path to find the volume name.
1013 :return: The volume name of the device path.
1014 :rtype: str
1015 """
1017 # Assume that we have a path like this:
1018 # - "/dev/drbd/by-res/xcp-volume-<UUID>/0"
1019 # - "../xcp-volume-<UUID>/0"
1020 if device_path.startswith(DRBD_BY_RES_PATH):
1021 prefix_len = len(DRBD_BY_RES_PATH)
1022 else:
1023 assert device_path.startswith('../')
1024 prefix_len = 3
1026 res_name_end = device_path.find('/', prefix_len)
1027 assert res_name_end != -1
1028 return device_path[prefix_len:res_name_end]
1030 def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False):
1031 """
1032 Change the uuid of a volume.
1033 :param str volume_uuid: The volume to modify.
1034 :param str new_volume_uuid: The new volume uuid to use.
1035 :param bool force: If true we doesn't check if volume_uuid is in the
1036 volume list. I.e. the volume can be marked as deleted but the volume
1037 can still be in the LINSTOR KV store if the deletion has failed.
1038 In specific cases like "undo" after a failed clone we must rename a bad
1039 deleted VDI.
1040 """
1042 self._logger(
1043 'Trying to update volume UUID {} to {}...'
1044 .format(volume_uuid, new_volume_uuid)
1045 )
1046 assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value'
1048 if not force:
1049 self._ensure_volume_exists(volume_uuid)
1050 self.ensure_volume_is_not_locked(volume_uuid)
1052 if new_volume_uuid in self._volumes:
1053 raise LinstorVolumeManagerError(
1054 'Volume `{}` already exists'.format(new_volume_uuid),
1055 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
1056 )
1058 volume_properties = self._get_volume_properties(volume_uuid)
1059 if volume_properties.get(self.PROP_UPDATING_UUID_SRC):
1060 raise LinstorVolumeManagerError(
1061 'Cannot update volume uuid {}: invalid state'
1062 .format(volume_uuid)
1063 )
1065 # 1. Copy in temp variables metadata and volume_name.
1066 metadata = volume_properties.get(self.PROP_METADATA)
1067 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
1069 # 2. Switch to new volume namespace.
1070 volume_properties.namespace = self._build_volume_namespace(
1071 new_volume_uuid
1072 )
1074 if list(volume_properties.items()):
1075 raise LinstorVolumeManagerError(
1076 'Cannot update volume uuid {} to {}: '
1077 .format(volume_uuid, new_volume_uuid) +
1078 'this last one is not empty'
1079 )
1081 try:
1082 # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC.
1083 # If we crash after that, the new properties can be removed
1084 # properly.
1085 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
1086 volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid
1088 # 4. Copy the properties.
1089 # Note: On new volumes, during clone for example, the metadata
1090 # may be missing. So we must test it to avoid this error:
1091 # "None has to be a str/unicode, but is <type 'NoneType'>"
1092 if metadata:
1093 volume_properties[self.PROP_METADATA] = metadata
1094 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1096 # 5. Ok!
1097 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
1098 except Exception as err:
1099 try:
1100 # Clear the new volume properties in case of failure.
1101 assert volume_properties.namespace == \
1102 self._build_volume_namespace(new_volume_uuid)
1103 volume_properties.clear()
1104 except Exception as e:
1105 self._logger(
1106 'Failed to clear new volume properties: {} (ignoring...)'
1107 .format(e)
1108 )
1109 raise LinstorVolumeManagerError(
1110 'Failed to copy volume properties: {}'.format(err)
1111 )
1113 try:
1114 # 6. After this point, it's ok we can remove the
1115 # PROP_UPDATING_UUID_SRC property and clear the src properties
1116 # without problems.
1118 # 7. Switch to old volume namespace.
1119 volume_properties.namespace = self._build_volume_namespace(
1120 volume_uuid
1121 )
1122 volume_properties.clear()
1124 # 8. Switch a last time to new volume namespace.
1125 volume_properties.namespace = self._build_volume_namespace(
1126 new_volume_uuid
1127 )
1128 volume_properties.pop(self.PROP_UPDATING_UUID_SRC)
1129 except Exception as e:
1130 raise LinstorVolumeManagerError(
1131 'Failed to clear volume properties '
1132 'after volume uuid update: {}'.format(e)
1133 )
1135 try:
1136 self._volumes.remove(volume_uuid)
1137 except KeyError:
1138 # Can be missing if we are building the volume set attr AND
1139 # we are processing a deleted resource.
1140 assert force
1142 self._volumes.add(new_volume_uuid)
1144 self._logger(
1145 'UUID update succeeded of {} to {}! (properties={})'
1146 .format(
1147 volume_uuid, new_volume_uuid,
1148 self._get_filtered_properties(volume_properties)
1149 )
1150 )
1152 def update_volume_name(self, volume_uuid, volume_name):
1153 """
1154 Change the volume name of a volume.
1155 :param str volume_uuid: The volume to modify.
1156 :param str volume_name: The volume_name to use.
1157 """
1159 self._ensure_volume_exists(volume_uuid)
1160 self.ensure_volume_is_not_locked(volume_uuid)
1161 if not volume_name.startswith(self.PREFIX_VOLUME):
1162 raise LinstorVolumeManagerError(
1163 'Volume name `{}` must be start with `{}`'
1164 .format(volume_name, self.PREFIX_VOLUME)
1165 )
1167 if volume_name not in self._fetch_resource_names():
1168 raise LinstorVolumeManagerError(
1169 'Volume `{}` doesn\'t exist'.format(volume_name)
1170 )
1172 volume_properties = self._get_volume_properties(volume_uuid)
1173 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1175 def get_usage_states(self, volume_uuid):
1176 """
1177 Check if a volume is currently used.
1178 :param str volume_uuid: The volume uuid to check.
1179 :return: A dictionnary that contains states.
1180 :rtype: dict(str, bool or None)
1181 """
1183 states = {}
1185 volume_name = self.get_volume_name(volume_uuid)
1186 for resource_state in self._linstor.resource_list_raise(
1187 filter_by_resources=[volume_name]
1188 ).resource_states:
1189 states[resource_state.node_name] = resource_state.in_use
1191 return states
1193 def get_volume_openers(self, volume_uuid):
1194 """
1195 Get openers of a volume.
1196 :param str volume_uuid: The volume uuid to monitor.
1197 :return: A dictionnary that contains openers.
1198 :rtype: dict(str, obj)
1199 """
1200 return get_all_volume_openers(self.get_volume_name(volume_uuid), '0')
1202 def get_volumes_with_name(self):
1203 """
1204 Give a volume dictionnary that contains names actually owned.
1205 :return: A volume/name dict.
1206 :rtype: dict(str, str)
1207 """
1208 return self._get_volumes_by_property(self.REG_VOLUME_NAME)
1210 def get_volumes_with_info(self):
1211 """
1212 Give a volume dictionnary that contains VolumeInfos.
1213 :return: A volume/VolumeInfo dict.
1214 :rtype: dict(str, VolumeInfo)
1215 """
1217 volumes = {}
1219 all_volume_info = self._get_volumes_info()
1220 volume_names = self.get_volumes_with_name()
1221 for volume_uuid, volume_name in volume_names.items():
1222 if volume_name:
1223 volume_info = all_volume_info.get(volume_name)
1224 if volume_info:
1225 volumes[volume_uuid] = volume_info
1226 continue
1228 # Well I suppose if this volume is not available,
1229 # LINSTOR has been used directly without using this API.
1230 volumes[volume_uuid] = self.VolumeInfo('')
1232 return volumes
1234 def get_volumes_with_metadata(self):
1235 """
1236 Give a volume dictionnary that contains metadata.
1237 :return: A volume/metadata dict.
1238 :rtype: dict(str, dict)
1239 """
1241 volumes = {}
1243 metadata = self._get_volumes_by_property(self.REG_METADATA)
1244 for volume_uuid, volume_metadata in metadata.items():
1245 if volume_metadata:
1246 volume_metadata = json.loads(volume_metadata)
1247 if isinstance(volume_metadata, dict):
1248 volumes[volume_uuid] = volume_metadata
1249 continue
1250 raise LinstorVolumeManagerError(
1251 'Expected dictionary in volume metadata: {}'
1252 .format(volume_uuid)
1253 )
1255 volumes[volume_uuid] = {}
1257 return volumes
1259 def get_volume_metadata(self, volume_uuid):
1260 """
1261 Get the metadata of a volume.
1262 :return: Dictionary that contains metadata.
1263 :rtype: dict
1264 """
1266 self._ensure_volume_exists(volume_uuid)
1267 volume_properties = self._get_volume_properties(volume_uuid)
1268 metadata = volume_properties.get(self.PROP_METADATA)
1269 if metadata:
1270 metadata = json.loads(metadata)
1271 if isinstance(metadata, dict):
1272 return metadata
1273 raise LinstorVolumeManagerError(
1274 'Expected dictionary in volume metadata: {}'
1275 .format(volume_uuid)
1276 )
1277 return {}
1279 def set_volume_metadata(self, volume_uuid, metadata):
1280 """
1281 Set the metadata of a volume.
1282 :param dict metadata: Dictionary that contains metadata.
1283 """
1285 self._ensure_volume_exists(volume_uuid)
1286 self.ensure_volume_is_not_locked(volume_uuid)
1288 assert isinstance(metadata, dict)
1289 volume_properties = self._get_volume_properties(volume_uuid)
1290 volume_properties[self.PROP_METADATA] = json.dumps(metadata)
1292 def update_volume_metadata(self, volume_uuid, metadata):
1293 """
1294 Update the metadata of a volume. It modify only the given keys.
1295 It doesn't remove unreferenced key instead of set_volume_metadata.
1296 :param dict metadata: Dictionary that contains metadata.
1297 """
1299 self._ensure_volume_exists(volume_uuid)
1300 self.ensure_volume_is_not_locked(volume_uuid)
1302 assert isinstance(metadata, dict)
1303 volume_properties = self._get_volume_properties(volume_uuid)
1305 current_metadata = json.loads(
1306 volume_properties.get(self.PROP_METADATA, '{}')
1307 )
1308 if not isinstance(metadata, dict):
1309 raise LinstorVolumeManagerError(
1310 'Expected dictionary in volume metadata: {}'
1311 .format(volume_uuid)
1312 )
1314 for key, value in metadata.items():
1315 current_metadata[key] = value
1316 volume_properties[self.PROP_METADATA] = json.dumps(current_metadata)
1318 def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True):
1319 """
1320 Clone a volume. Do not copy the data, this method creates a new volume
1321 with the same size.
1322 :param str volume_uuid: The volume to clone.
1323 :param str clone_uuid: The cloned volume.
1324 :param bool persistent: If false the volume will be unavailable
1325 on the next constructor call LinstorSR(...).
1326 :return: The current device path of the cloned volume.
1327 :rtype: str
1328 """
1330 volume_name = self.get_volume_name(volume_uuid)
1331 self.ensure_volume_is_not_locked(volume_uuid)
1333 # 1. Find ideal nodes + size to use.
1334 ideal_node_names, size = self._get_volume_node_names_and_size(
1335 volume_name
1336 )
1337 if size <= 0:
1338 raise LinstorVolumeManagerError(
1339 'Invalid size of {} for volume `{}`'.format(size, volume_name)
1340 )
1342 # 2. Create clone!
1343 return self.create_volume(clone_uuid, size, persistent)
1345 def remove_resourceless_volumes(self):
1346 """
1347 Remove all volumes without valid or non-empty name
1348 (i.e. without LINSTOR resource). It's different than
1349 LinstorVolumeManager constructor that takes a `repair` param that
1350 removes volumes with `PROP_NOT_EXISTS` to 1.
1351 """
1353 resource_names = self._fetch_resource_names()
1354 for volume_uuid, volume_name in self.get_volumes_with_name().items():
1355 if not volume_name or volume_name not in resource_names:
1356 # Don't force, we can be sure of what's happening.
1357 self.destroy_volume(volume_uuid)
1359 def destroy(self):
1360 """
1361 Destroy this SR. Object should not be used after that.
1362 :param bool force: Try to destroy volumes before if true.
1363 """
1365 # 1. Ensure volume list is empty. No cost.
1366 if self._volumes:
1367 raise LinstorVolumeManagerError(
1368 'Cannot destroy LINSTOR volume manager: '
1369 'It exists remaining volumes'
1370 )
1372 # 2. Fetch ALL resource names.
1373 # This list may therefore contain volumes created outside
1374 # the scope of the driver.
1375 resource_names = self._fetch_resource_names(ignore_deleted=False)
1376 try:
1377 resource_names.remove(DATABASE_VOLUME_NAME)
1378 except KeyError:
1379 # Really strange to reach that point.
1380 # Normally we always have the database volume in the list.
1381 pass
1383 # 3. Ensure the resource name list is entirely empty...
1384 if resource_names:
1385 raise LinstorVolumeManagerError(
1386 'Cannot destroy LINSTOR volume manager: '
1387 'It exists remaining volumes (created externally or being deleted)'
1388 )
1390 # 4. Destroying...
1391 controller_is_running = self._controller_is_running()
1392 uri = 'linstor://localhost'
1393 try:
1394 if controller_is_running:
1395 self._start_controller(start=False)
1397 # 4.1. Umount LINSTOR database.
1398 self._mount_database_volume(
1399 self.build_device_path(DATABASE_VOLUME_NAME),
1400 mount=False,
1401 force=True
1402 )
1404 # 4.2. Refresh instance.
1405 self._start_controller(start=True)
1406 self._linstor = self._create_linstor_instance(
1407 uri, keep_uri_unmodified=True
1408 )
1410 # 4.3. Destroy database volume.
1411 self._destroy_resource(DATABASE_VOLUME_NAME)
1413 # 4.4. Refresh linstor connection.
1414 # Without we get this error:
1415 # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.."
1416 # Because the deletion of the databse was not seen by Linstor for some reason.
1417 # It seems a simple refresh of the Linstor connection make it aware of the deletion.
1418 self._linstor.disconnect()
1419 self._linstor.connect()
1421 # 4.5. Destroy remaining drbd nodes on hosts.
1422 # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups.
1423 # It needs to be done locally by each host so we go through the linstor-manager plugin.
1424 # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with:
1425 # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it."
1426 session = util.timeout_call(5, util.get_localAPI_session)
1427 for host_ref in session.xenapi.host.get_all():
1428 try:
1429 response = session.xenapi.host.call_plugin(
1430 host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name}
1431 )
1432 except Exception as e:
1433 util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e))
1435 # 4.6. Destroy group and storage pools.
1436 self._destroy_resource_group(self._linstor, self._group_name)
1437 self._destroy_resource_group(self._linstor, self._ha_group_name)
1438 for pool in self._get_storage_pools(force=True):
1439 self._destroy_storage_pool(
1440 self._linstor, pool.name, pool.node_name
1441 )
1442 except Exception as e:
1443 self._start_controller(start=controller_is_running)
1444 raise e
1446 try:
1447 self._start_controller(start=False)
1448 for file in os.listdir(DATABASE_PATH):
1449 if file != 'lost+found':
1450 os.remove(DATABASE_PATH + '/' + file)
1451 except Exception as e:
1452 util.SMlog(
1453 'Ignoring failure after LINSTOR SR destruction: {}'
1454 .format(e)
1455 )
1457 def find_up_to_date_diskful_nodes(self, volume_uuid):
1458 """
1459 Find all nodes that contain a specific volume using diskful disks.
1460 The disk must be up to data to be used.
1461 :param str volume_uuid: The volume to use.
1462 :return: The available nodes.
1463 :rtype: tuple(set(str), str)
1464 """
1466 volume_name = self.get_volume_name(volume_uuid)
1468 in_use_by = None
1469 node_names = set()
1471 resource_states = filter(
1472 lambda resource_state: resource_state.name == volume_name,
1473 self._get_resource_cache().resource_states
1474 )
1476 for resource_state in resource_states:
1477 volume_state = resource_state.volume_states[0]
1478 if volume_state.disk_state == 'UpToDate':
1479 node_names.add(resource_state.node_name)
1480 if resource_state.in_use:
1481 in_use_by = resource_state.node_name
1483 return (node_names, in_use_by)
1485 def invalidate_resource_cache(self):
1486 """
1487 If resources are impacted by external commands like vhdutil,
1488 it's necessary to call this function to invalidate current resource
1489 cache.
1490 """
1491 self._mark_resource_cache_as_dirty()
1493 def has_node(self, node_name):
1494 """
1495 Check if a node exists in the LINSTOR database.
1496 :rtype: bool
1497 """
1498 result = self._linstor.node_list()
1499 error_str = self._get_error_str(result)
1500 if error_str:
1501 raise LinstorVolumeManagerError(
1502 'Failed to list nodes using `{}`: {}'
1503 .format(node_name, error_str)
1504 )
1505 return bool(result[0].node(node_name))
1507 def create_node(self, node_name, ip):
1508 """
1509 Create a new node in the LINSTOR database.
1510 :param str node_name: Node name to use.
1511 :param str ip: Host IP to communicate.
1512 """
1513 result = self._linstor.node_create(
1514 node_name,
1515 linstor.consts.VAL_NODE_TYPE_CMBD,
1516 ip
1517 )
1518 errors = self._filter_errors(result)
1519 if errors:
1520 error_str = self._get_error_str(errors)
1521 raise LinstorVolumeManagerError(
1522 'Failed to create node `{}`: {}'.format(node_name, error_str)
1523 )
1525 def destroy_node(self, node_name):
1526 """
1527 Destroy a node in the LINSTOR database.
1528 :param str node_name: Node name to remove.
1529 """
1530 result = self._linstor.node_delete(node_name)
1531 errors = self._filter_errors(result)
1532 if errors:
1533 error_str = self._get_error_str(errors)
1534 raise LinstorVolumeManagerError(
1535 'Failed to destroy node `{}`: {}'.format(node_name, error_str)
1536 )
1538 def create_node_interface(self, node_name, name, ip):
1539 """
1540 Create a new node interface in the LINSTOR database.
1541 :param str node_name: Node name of the interface to use.
1542 :param str name: Interface to create.
1543 :param str ip: IP of the interface.
1544 """
1545 result = self._linstor.netinterface_create(node_name, name, ip)
1546 errors = self._filter_errors(result)
1547 if errors:
1548 error_str = self._get_error_str(errors)
1549 raise LinstorVolumeManagerError(
1550 'Failed to create node interface on `{}`: {}'.format(node_name, error_str)
1551 )
1553 def destroy_node_interface(self, node_name, name):
1554 """
1555 Destroy a node interface in the LINSTOR database.
1556 :param str node_name: Node name of the interface to remove.
1557 :param str name: Interface to remove.
1558 """
1560 if name == 'default':
1561 raise LinstorVolumeManagerError(
1562 'Unable to delete the default interface of a node!'
1563 )
1565 result = self._linstor.netinterface_delete(node_name, name)
1566 errors = self._filter_errors(result)
1567 if errors:
1568 error_str = self._get_error_str(errors)
1569 raise LinstorVolumeManagerError(
1570 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str)
1571 )
1573 def modify_node_interface(self, node_name, name, ip):
1574 """
1575 Modify a node interface in the LINSTOR database. Create it if necessary.
1576 :param str node_name: Node name of the interface to use.
1577 :param str name: Interface to modify or create.
1578 :param str ip: IP of the interface.
1579 """
1580 result = self._linstor.netinterface_create(node_name, name, ip)
1581 errors = self._filter_errors(result)
1582 if not errors:
1583 return
1585 if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]):
1586 result = self._linstor.netinterface_modify(node_name, name, ip)
1587 errors = self._filter_errors(result)
1588 if not errors:
1589 return
1591 error_str = self._get_error_str(errors)
1592 raise LinstorVolumeManagerError(
1593 'Unable to modify interface on `{}`: {}'.format(node_name, error_str)
1594 )
1596 def list_node_interfaces(self, node_name):
1597 """
1598 List all node interfaces.
1599 :param str node_name: Node name to use to list interfaces.
1600 :rtype: list
1601 :
1602 """
1603 result = self._linstor.net_interface_list(node_name)
1604 if not result:
1605 raise LinstorVolumeManagerError(
1606 'Unable to list interfaces on `{}`: no list received'.format(node_name)
1607 )
1609 interfaces = {}
1610 for interface in result:
1611 interface = interface._rest_data
1612 interfaces[interface['name']] = {
1613 'address': interface['address'],
1614 'active': interface['is_active']
1615 }
1616 return interfaces
1618 def get_node_preferred_interface(self, node_name):
1619 """
1620 Get the preferred interface used by a node.
1621 :param str node_name: Node name of the interface to get.
1622 :rtype: str
1623 """
1624 try:
1625 nodes = self._linstor.node_list_raise([node_name]).nodes
1626 if nodes:
1627 properties = nodes[0].props
1628 return properties.get('PrefNic', 'default')
1629 return nodes
1630 except Exception as e:
1631 raise LinstorVolumeManagerError(
1632 'Failed to get preferred interface: `{}`'.format(e)
1633 )
1635 def set_node_preferred_interface(self, node_name, name):
1636 """
1637 Set the preferred interface to use on a node.
1638 :param str node_name: Node name of the interface.
1639 :param str name: Preferred interface to use.
1640 """
1641 result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name})
1642 errors = self._filter_errors(result)
1643 if errors:
1644 error_str = self._get_error_str(errors)
1645 raise LinstorVolumeManagerError(
1646 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str)
1647 )
1649 def get_nodes_info(self):
1650 """
1651 Get all nodes + statuses, used or not by the pool.
1652 :rtype: dict(str, dict)
1653 """
1654 try:
1655 nodes = {}
1656 for node in self._linstor.node_list_raise().nodes:
1657 nodes[node.name] = node.connection_status
1658 return nodes
1659 except Exception as e:
1660 raise LinstorVolumeManagerError(
1661 'Failed to get all nodes: `{}`'.format(e)
1662 )
1664 def get_storage_pools_info(self):
1665 """
1666 Give all storage pools of current group name.
1667 :rtype: dict(str, list)
1668 """
1669 storage_pools = {}
1670 for pool in self._get_storage_pools(force=True):
1671 if pool.node_name not in storage_pools:
1672 storage_pools[pool.node_name] = []
1674 size = -1
1675 capacity = -1
1677 space = pool.free_space
1678 if space:
1679 size = space.free_capacity
1680 if size < 0:
1681 size = -1
1682 else:
1683 size *= 1024
1684 capacity = space.total_capacity
1685 if capacity <= 0:
1686 capacity = -1
1687 else:
1688 capacity *= 1024
1690 storage_pools[pool.node_name].append({
1691 'name': pool.name,
1692 'linstor-uuid': pool.uuid,
1693 'free-size': size,
1694 'capacity': capacity
1695 })
1697 return storage_pools
1699 def get_resources_info(self):
1700 """
1701 Give all resources of current group name.
1702 :rtype: dict(str, list)
1703 """
1704 resources = {}
1705 resource_list = self._get_resource_cache()
1706 volume_names = self.get_volumes_with_name()
1707 for resource in resource_list.resources:
1708 if resource.name not in resources:
1709 resources[resource.name] = { 'nodes': {}, 'uuid': '' }
1710 resource_nodes = resources[resource.name]['nodes']
1712 resource_nodes[resource.node_name] = {
1713 'volumes': [],
1714 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags,
1715 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags
1716 }
1717 resource_volumes = resource_nodes[resource.node_name]['volumes']
1719 for volume in resource.volumes:
1720 # We ignore diskless pools of the form "DfltDisklessStorPool".
1721 if volume.storage_pool_name != self._group_name:
1722 continue
1724 usable_size = volume.usable_size
1725 if usable_size < 0:
1726 usable_size = -1
1727 else:
1728 usable_size *= 1024
1730 allocated_size = volume.allocated_size
1731 if allocated_size < 0:
1732 allocated_size = -1
1733 else:
1734 allocated_size *= 1024
1736 resource_volumes.append({
1737 'storage-pool-name': volume.storage_pool_name,
1738 'linstor-uuid': volume.uuid,
1739 'number': volume.number,
1740 'device-path': volume.device_path,
1741 'usable-size': usable_size,
1742 'allocated-size': allocated_size
1743 })
1745 for resource_state in resource_list.resource_states:
1746 resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name]
1747 resource['in-use'] = resource_state.in_use
1749 volumes = resource['volumes']
1750 for volume_state in resource_state.volume_states:
1751 volume = next((x for x in volumes if x['number'] == volume_state.number), None)
1752 if volume:
1753 volume['disk-state'] = volume_state.disk_state
1755 for volume_uuid, volume_name in volume_names.items():
1756 resource = resources.get(volume_name)
1757 if resource:
1758 resource['uuid'] = volume_uuid
1760 return resources
1762 def get_database_path(self):
1763 """
1764 Get the database path.
1765 :return: The current database path.
1766 :rtype: str
1767 """
1768 return self._request_database_path(self._linstor, activate=True)
1770 @classmethod
1771 def get_all_group_names(cls, base_name):
1772 """
1773 Get all group names. I.e. list of current group + HA.
1774 :param str base_name: The SR group_name to use.
1775 :return: List of group names.
1776 :rtype: list
1777 """
1778 return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)]
1780 @classmethod
1781 def create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__):
1782 """
1783 Create a new SR on the given nodes.
1784 :param str group_name: The SR group_name to use.
1785 :param set(str) ips: Node ips.
1786 :param int redundancy: How many copy of volumes should we store?
1787 :param bool thin_provisioning: Use thin or thick provisioning.
1788 :param function logger: Function to log messages.
1789 :return: A new LinstorSr instance.
1790 :rtype: LinstorSr
1791 """
1793 try:
1794 cls._start_controller(start=True)
1795 sr = cls._create_sr(group_name, ips, redundancy, thin_provisioning, logger)
1796 finally:
1797 # Controller must be stopped and volume unmounted because
1798 # it is the role of the drbd-reactor daemon to do the right
1799 # actions.
1800 cls._start_controller(start=False)
1801 cls._mount_volume(
1802 cls.build_device_path(DATABASE_VOLUME_NAME),
1803 DATABASE_PATH,
1804 mount=False
1805 )
1806 return sr
1808 @classmethod
1809 def _create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__):
1810 # 1. Check if SR already exists.
1811 uri = 'linstor://localhost'
1813 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1815 node_names = list(ips.keys())
1816 for node_name, ip in ips.items():
1817 while True:
1818 # Try to create node.
1819 result = lin.node_create(
1820 node_name,
1821 linstor.consts.VAL_NODE_TYPE_CMBD,
1822 ip
1823 )
1825 errors = cls._filter_errors(result)
1826 if cls._check_errors(
1827 errors, [linstor.consts.FAIL_EXISTS_NODE]
1828 ):
1829 # If it already exists, remove, then recreate.
1830 result = lin.node_delete(node_name)
1831 error_str = cls._get_error_str(result)
1832 if error_str:
1833 raise LinstorVolumeManagerError(
1834 'Failed to remove old node `{}`: {}'
1835 .format(node_name, error_str)
1836 )
1837 elif not errors:
1838 break # Created!
1839 else:
1840 raise LinstorVolumeManagerError(
1841 'Failed to create node `{}` with ip `{}`: {}'.format(
1842 node_name, ip, cls._get_error_str(errors)
1843 )
1844 )
1846 driver_pool_name = group_name
1847 base_group_name = group_name
1848 group_name = cls._build_group_name(group_name)
1849 storage_pool_name = group_name
1850 pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools
1851 if pools:
1852 existing_node_names = [pool.node_name for pool in pools]
1853 raise LinstorVolumeManagerError(
1854 'Unable to create SR `{}`. It already exists on node(s): {}'
1855 .format(group_name, existing_node_names)
1856 )
1858 if lin.resource_group_list_raise(
1859 cls.get_all_group_names(base_group_name)
1860 ).resource_groups:
1861 if not lin.resource_dfn_list_raise().resource_definitions:
1862 backup_path = cls._create_database_backup_path()
1863 logger(
1864 'Group name already exists `{}` without LVs. '
1865 'Ignoring and moving the config files in {}'.format(group_name, backup_path)
1866 )
1867 cls._move_files(DATABASE_PATH, backup_path)
1868 else:
1869 raise LinstorVolumeManagerError(
1870 'Unable to create SR `{}`: The group name already exists'
1871 .format(group_name)
1872 )
1874 if thin_provisioning:
1875 driver_pool_parts = driver_pool_name.split('/')
1876 if not len(driver_pool_parts) == 2:
1877 raise LinstorVolumeManagerError(
1878 'Invalid group name using thin provisioning. '
1879 'Expected format: \'VG/LV`\''
1880 )
1882 # 2. Create storage pool on each node + resource group.
1883 reg_volume_group_not_found = re.compile(
1884 ".*Volume group '.*' not found$"
1885 )
1887 i = 0
1888 try:
1889 # 2.a. Create storage pools.
1890 storage_pool_count = 0
1891 while i < len(node_names):
1892 node_name = node_names[i]
1894 result = lin.storage_pool_create(
1895 node_name=node_name,
1896 storage_pool_name=storage_pool_name,
1897 storage_driver='LVM_THIN' if thin_provisioning else 'LVM',
1898 driver_pool_name=driver_pool_name
1899 )
1901 errors = linstor.Linstor.filter_api_call_response_errors(
1902 result
1903 )
1904 if errors:
1905 if len(errors) == 1 and errors[0].is_error(
1906 linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR
1907 ) and reg_volume_group_not_found.match(errors[0].message):
1908 logger(
1909 'Volume group `{}` not found on `{}`. Ignoring...'
1910 .format(group_name, node_name)
1911 )
1912 cls._destroy_storage_pool(lin, storage_pool_name, node_name)
1913 else:
1914 error_str = cls._get_error_str(result)
1915 raise LinstorVolumeManagerError(
1916 'Could not create SP `{}` on node `{}`: {}'
1917 .format(group_name, node_name, error_str)
1918 )
1919 else:
1920 storage_pool_count += 1
1921 i += 1
1923 if not storage_pool_count:
1924 raise LinstorVolumeManagerError(
1925 'Unable to create SR `{}`: No VG group found'.format(
1926 group_name,
1927 )
1928 )
1930 # 2.b. Create resource groups.
1931 ha_group_name = cls._build_ha_group_name(base_group_name)
1932 cls._create_resource_group(
1933 lin,
1934 group_name,
1935 storage_pool_name,
1936 redundancy,
1937 True
1938 )
1939 cls._create_resource_group(
1940 lin,
1941 ha_group_name,
1942 storage_pool_name,
1943 3,
1944 True
1945 )
1947 # 3. Create the LINSTOR database volume and mount it.
1948 try:
1949 logger('Creating database volume...')
1950 volume_path = cls._create_database_volume(
1951 lin, ha_group_name, storage_pool_name, node_names, redundancy
1952 )
1953 except LinstorVolumeManagerError as e:
1954 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
1955 logger('Destroying database volume after creation fail...')
1956 cls._force_destroy_database_volume(lin, group_name)
1957 raise
1959 try:
1960 logger('Mounting database volume...')
1962 # First we must disable the controller to move safely the
1963 # LINSTOR config.
1964 cls._start_controller(start=False)
1966 cls._mount_database_volume(volume_path)
1967 except Exception as e:
1968 # Ensure we are connected because controller has been
1969 # restarted during mount call.
1970 logger('Destroying database volume after mount fail...')
1972 try:
1973 cls._start_controller(start=True)
1974 except Exception:
1975 pass
1977 lin = cls._create_linstor_instance(
1978 uri, keep_uri_unmodified=True
1979 )
1980 cls._force_destroy_database_volume(lin, group_name)
1981 raise e
1983 cls._start_controller(start=True)
1984 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1986 # 4. Remove storage pools/resource/volume group in the case of errors.
1987 except Exception as e:
1988 logger('Destroying resource group and storage pools after fail...')
1989 try:
1990 cls._destroy_resource_group(lin, group_name)
1991 cls._destroy_resource_group(lin, ha_group_name)
1992 except Exception as e2:
1993 logger('Failed to destroy resource group: {}'.format(e2))
1994 pass
1995 j = 0
1996 i = min(i, len(node_names) - 1)
1997 while j <= i:
1998 try:
1999 cls._destroy_storage_pool(lin, storage_pool_name, node_names[j])
2000 except Exception as e2:
2001 logger('Failed to destroy resource group: {}'.format(e2))
2002 pass
2003 j += 1
2004 raise e
2006 # 5. Return new instance.
2007 instance = cls.__new__(cls)
2008 instance._linstor = lin
2009 instance._logger = logger
2010 instance._redundancy = redundancy
2011 instance._base_group_name = base_group_name
2012 instance._group_name = group_name
2013 instance._volumes = set()
2014 instance._storage_pools_time = 0
2015 instance._kv_cache = instance._create_kv_cache()
2016 instance._resource_cache = None
2017 instance._resource_cache_dirty = True
2018 instance._volume_info_cache = None
2019 instance._volume_info_cache_dirty = True
2020 return instance
2022 @classmethod
2023 def build_device_path(cls, volume_name):
2024 """
2025 Build a device path given a volume name.
2026 :param str volume_name: The volume name to use.
2027 :return: A valid or not device path.
2028 :rtype: str
2029 """
2031 return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name)
2033 @classmethod
2034 def build_volume_name(cls, base_name):
2035 """
2036 Build a volume name given a base name (i.e. a UUID).
2037 :param str base_name: The volume name to use.
2038 :return: A valid or not device path.
2039 :rtype: str
2040 """
2041 return '{}{}'.format(cls.PREFIX_VOLUME, base_name)
2043 @classmethod
2044 def round_up_volume_size(cls, volume_size):
2045 """
2046 Align volume size on higher multiple of BLOCK_SIZE.
2047 :param int volume_size: The volume size to align.
2048 :return: An aligned volume size.
2049 :rtype: int
2050 """
2051 return round_up(volume_size, cls.BLOCK_SIZE)
2053 @classmethod
2054 def round_down_volume_size(cls, volume_size):
2055 """
2056 Align volume size on lower multiple of BLOCK_SIZE.
2057 :param int volume_size: The volume size to align.
2058 :return: An aligned volume size.
2059 :rtype: int
2060 """
2061 return round_down(volume_size, cls.BLOCK_SIZE)
2063 # --------------------------------------------------------------------------
2064 # Private helpers.
2065 # --------------------------------------------------------------------------
2067 def _create_kv_cache(self):
2068 self._kv_cache = self._create_linstor_kv('/')
2069 self._kv_cache_dirty = False
2070 return self._kv_cache
2072 def _get_kv_cache(self):
2073 if self._kv_cache_dirty:
2074 self._kv_cache = self._create_kv_cache()
2075 return self._kv_cache
2077 def _create_resource_cache(self):
2078 self._resource_cache = self._linstor.resource_list_raise()
2079 self._resource_cache_dirty = False
2080 return self._resource_cache
2082 def _get_resource_cache(self):
2083 if self._resource_cache_dirty:
2084 self._resource_cache = self._create_resource_cache()
2085 return self._resource_cache
2087 def _mark_resource_cache_as_dirty(self):
2088 self._resource_cache_dirty = True
2089 self._volume_info_cache_dirty = True
2091 # --------------------------------------------------------------------------
2093 def _ensure_volume_exists(self, volume_uuid):
2094 if volume_uuid not in self._volumes:
2095 raise LinstorVolumeManagerError(
2096 'volume `{}` doesn\'t exist'.format(volume_uuid),
2097 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
2098 )
2100 def _find_best_size_candidates(self):
2101 result = self._linstor.resource_group_qmvs(self._group_name)
2102 error_str = self._get_error_str(result)
2103 if error_str:
2104 raise LinstorVolumeManagerError(
2105 'Failed to get max volume size allowed of SR `{}`: {}'.format(
2106 self._group_name,
2107 error_str
2108 )
2109 )
2110 return result[0].candidates
2112 def _fetch_resource_names(self, ignore_deleted=True):
2113 resource_names = set()
2114 dfns = self._linstor.resource_dfn_list_raise().resource_definitions
2115 for dfn in dfns:
2116 if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and (
2117 ignore_deleted or
2118 linstor.consts.FLAG_DELETE not in dfn.flags
2119 ):
2120 resource_names.add(dfn.name)
2121 return resource_names
2123 def _get_volumes_info(self, volume_name=None):
2124 all_volume_info = {}
2126 if not self._volume_info_cache_dirty:
2127 return self._volume_info_cache
2129 def process_resource(resource):
2130 if resource.name not in all_volume_info:
2131 current = all_volume_info[resource.name] = self.VolumeInfo(
2132 resource.name
2133 )
2134 else:
2135 current = all_volume_info[resource.name]
2137 if linstor.consts.FLAG_DISKLESS not in resource.flags:
2138 current.diskful.append(resource.node_name)
2140 for volume in resource.volumes:
2141 # We ignore diskless pools of the form "DfltDisklessStorPool".
2142 if volume.storage_pool_name != self._group_name:
2143 continue
2144 # Only fetch first volume.
2145 if volume.number != 0:
2146 continue
2148 allocated_size = volume.allocated_size
2149 if allocated_size > current.allocated_size:
2150 current.allocated_size = allocated_size
2152 usable_size = volume.usable_size
2153 if usable_size > 0 and (
2154 usable_size < current.virtual_size or
2155 not current.virtual_size
2156 ):
2157 current.virtual_size = usable_size
2159 try:
2160 for resource in self._get_resource_cache().resources:
2161 process_resource(resource)
2162 for volume in all_volume_info.values():
2163 if volume.allocated_size <= 0:
2164 raise LinstorVolumeManagerError('Failed to get allocated size of `{}`'.format(resource.name))
2166 if volume.virtual_size <= 0:
2167 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(volume.name))
2169 volume.allocated_size *= 1024
2170 volume.virtual_size *= 1024
2171 except LinstorVolumeManagerError:
2172 self._mark_resource_cache_as_dirty()
2173 raise
2175 self._volume_info_cache_dirty = False
2176 self._volume_info_cache = all_volume_info
2178 return all_volume_info
2180 def _get_volume_node_names_and_size(self, volume_name):
2181 node_names = set()
2182 size = -1
2183 for resource in self._linstor.resource_list_raise(
2184 filter_by_resources=[volume_name]
2185 ).resources:
2186 for volume in resource.volumes:
2187 # We ignore diskless pools of the form "DfltDisklessStorPool".
2188 if volume.storage_pool_name != self._group_name:
2189 continue
2191 node_names.add(resource.node_name)
2193 usable_size = volume.usable_size
2194 if usable_size <= 0:
2195 continue
2197 if size < 0:
2198 size = usable_size
2199 else:
2200 size = min(size, usable_size)
2202 if size <= 0:
2203 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(resource.name))
2205 return (node_names, size * 1024)
2207 def _compute_size(self, attr):
2208 capacity = 0
2209 for pool in self._get_storage_pools(force=True):
2210 space = pool.free_space
2211 if space:
2212 size = getattr(space, attr)
2213 if size < 0:
2214 raise LinstorVolumeManagerError(
2215 'Failed to get pool {} attr of `{}`'
2216 .format(attr, pool.node_name)
2217 )
2218 capacity += size
2219 return capacity * 1024
2221 def _get_node_names(self):
2222 node_names = set()
2223 for pool in self._get_storage_pools():
2224 node_names.add(pool.node_name)
2225 return node_names
2227 def _get_storage_pools(self, force=False):
2228 cur_time = time.time()
2229 elsaped_time = cur_time - self._storage_pools_time
2231 if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL:
2232 self._storage_pools = self._linstor.storage_pool_list_raise(
2233 filter_by_stor_pools=[self._group_name]
2234 ).storage_pools
2235 self._storage_pools_time = time.time()
2237 return self._storage_pools
2239 def _create_volume(
2240 self,
2241 volume_uuid,
2242 volume_name,
2243 size,
2244 place_resources,
2245 high_availability
2246 ):
2247 size = self.round_up_volume_size(size)
2248 self._mark_resource_cache_as_dirty()
2250 group_name = self._ha_group_name if high_availability else self._group_name
2251 def create_definition():
2252 first_attempt = True
2253 while True:
2254 try:
2255 self._check_volume_creation_errors(
2256 self._linstor.resource_group_spawn(
2257 rsc_grp_name=group_name,
2258 rsc_dfn_name=volume_name,
2259 vlm_sizes=['{}B'.format(size)],
2260 definitions_only=True
2261 ),
2262 volume_uuid,
2263 self._group_name
2264 )
2265 break
2266 except LinstorVolumeManagerError as e:
2267 if (
2268 not first_attempt or
2269 not high_availability or
2270 e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
2271 ):
2272 raise
2274 first_attempt = False
2275 self._create_resource_group(
2276 self._linstor,
2277 group_name,
2278 self._group_name,
2279 3,
2280 True
2281 )
2283 self._configure_volume_peer_slots(self._linstor, volume_name)
2285 def clean():
2286 try:
2287 self._destroy_volume(volume_uuid, force=True, preserve_properties=True)
2288 except Exception as e:
2289 self._logger(
2290 'Unable to destroy volume {} after creation fail: {}'
2291 .format(volume_uuid, e)
2292 )
2294 def create():
2295 try:
2296 create_definition()
2297 if place_resources:
2298 # Basic case when we use the default redundancy of the group.
2299 self._check_volume_creation_errors(
2300 self._linstor.resource_auto_place(
2301 rsc_name=volume_name,
2302 place_count=self._redundancy,
2303 diskless_on_remaining=False
2304 ),
2305 volume_uuid,
2306 self._group_name
2307 )
2308 except LinstorVolumeManagerError as e:
2309 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2310 clean()
2311 raise
2312 except Exception:
2313 clean()
2314 raise
2316 util.retry(create, maxretry=5)
2318 def _create_volume_with_properties(
2319 self,
2320 volume_uuid,
2321 volume_name,
2322 size,
2323 place_resources,
2324 high_availability
2325 ):
2326 if self.check_volume_exists(volume_uuid):
2327 raise LinstorVolumeManagerError(
2328 'Could not create volume `{}` from SR `{}`, it already exists'
2329 .format(volume_uuid, self._group_name) + ' in properties',
2330 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
2331 )
2333 if volume_name in self._fetch_resource_names():
2334 raise LinstorVolumeManagerError(
2335 'Could not create volume `{}` from SR `{}`, '.format(
2336 volume_uuid, self._group_name
2337 ) + 'resource of the same name already exists in LINSTOR'
2338 )
2340 # I am paranoid.
2341 volume_properties = self._get_volume_properties(volume_uuid)
2342 if (volume_properties.get(self.PROP_NOT_EXISTS) is not None):
2343 raise LinstorVolumeManagerError(
2344 'Could not create volume `{}`, '.format(volume_uuid) +
2345 'properties already exist'
2346 )
2348 try:
2349 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING
2350 volume_properties[self.PROP_VOLUME_NAME] = volume_name
2352 self._create_volume(
2353 volume_uuid,
2354 volume_name,
2355 size,
2356 place_resources,
2357 high_availability
2358 )
2360 assert volume_properties.namespace == \
2361 self._build_volume_namespace(volume_uuid)
2362 return volume_properties
2363 except LinstorVolumeManagerError as e:
2364 # Do not destroy existing resource!
2365 # In theory we can't get this error because we check this event
2366 # before the `self._create_volume` case.
2367 # It can only happen if the same volume uuid is used in the same
2368 # call in another host.
2369 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2370 self._destroy_volume(volume_uuid, force=True)
2371 raise
2373 def _find_device_path(self, volume_uuid, volume_name):
2374 current_device_path = self._request_device_path(
2375 volume_uuid, volume_name, activate=True
2376 )
2378 # We use realpath here to get the /dev/drbd<id> path instead of
2379 # /dev/drbd/by-res/<resource_name>.
2380 expected_device_path = self.build_device_path(volume_name)
2381 util.wait_for_path(expected_device_path, 5)
2383 device_realpath = os.path.realpath(expected_device_path)
2384 if current_device_path != device_realpath:
2385 raise LinstorVolumeManagerError(
2386 'Invalid path, current={}, expected={} (realpath={})'
2387 .format(
2388 current_device_path,
2389 expected_device_path,
2390 device_realpath
2391 )
2392 )
2393 return expected_device_path
2395 def _request_device_path(self, volume_uuid, volume_name, activate=False):
2396 node_name = socket.gethostname()
2398 resource = next(filter(
2399 lambda resource: resource.node_name == node_name and
2400 resource.name == volume_name,
2401 self._get_resource_cache().resources
2402 ), None)
2404 if not resource:
2405 if activate:
2406 self._mark_resource_cache_as_dirty()
2407 self._activate_device_path(
2408 self._linstor, node_name, volume_name
2409 )
2410 return self._request_device_path(volume_uuid, volume_name)
2411 raise LinstorVolumeManagerError(
2412 'Unable to get dev path for `{}`, no resource found but definition "seems" to exist'
2413 .format(volume_uuid)
2414 )
2416 # Contains a path of the /dev/drbd<id> form.
2417 device_path = resource.volumes[0].device_path
2418 if not device_path:
2419 raise LinstorVolumeManagerError('Empty dev path for `{}`!'.format(volume_uuid))
2420 return device_path
2422 def _destroy_resource(self, resource_name, force=False):
2423 result = self._linstor.resource_dfn_delete(resource_name)
2424 error_str = self._get_error_str(result)
2425 if not error_str:
2426 self._mark_resource_cache_as_dirty()
2427 return
2429 if not force:
2430 self._mark_resource_cache_as_dirty()
2431 raise LinstorVolumeManagerError(
2432 'Could not destroy resource `{}` from SR `{}`: {}'
2433 .format(resource_name, self._group_name, error_str)
2434 )
2436 # If force is used, ensure there is no opener.
2437 all_openers = get_all_volume_openers(resource_name, '0')
2438 for openers in all_openers.values():
2439 if openers:
2440 self._mark_resource_cache_as_dirty()
2441 raise LinstorVolumeManagerError(
2442 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)'
2443 .format(resource_name, self._group_name, error_str, all_openers)
2444 )
2446 # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue?
2447 resource_states = filter(
2448 lambda resource_state: resource_state.name == resource_name,
2449 self._get_resource_cache().resource_states
2450 )
2452 # Mark only after computation of states.
2453 self._mark_resource_cache_as_dirty()
2455 for resource_state in resource_states:
2456 volume_state = resource_state.volume_states[0]
2457 if resource_state.in_use:
2458 demote_drbd_resource(resource_state.node_name, resource_name)
2459 break
2460 self._destroy_resource(resource_name)
2462 def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False):
2463 volume_properties = self._get_volume_properties(volume_uuid)
2464 try:
2465 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
2466 if volume_name in self._fetch_resource_names():
2467 self._destroy_resource(volume_name, force)
2469 # Assume this call is atomic.
2470 if not preserve_properties:
2471 volume_properties.clear()
2472 except Exception as e:
2473 raise LinstorVolumeManagerError(
2474 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e)
2475 )
2477 def _build_volumes(self, repair):
2478 properties = self._kv_cache
2479 resource_names = self._fetch_resource_names()
2481 self._volumes = set()
2483 updating_uuid_volumes = self._get_volumes_by_property(
2484 self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False
2485 )
2486 if updating_uuid_volumes and not repair:
2487 raise LinstorVolumeManagerError(
2488 'Cannot build LINSTOR volume list: '
2489 'It exists invalid "updating uuid volumes", repair is required'
2490 )
2492 existing_volumes = self._get_volumes_by_property(
2493 self.REG_NOT_EXISTS, ignore_inexisting_volumes=False
2494 )
2495 for volume_uuid, not_exists in existing_volumes.items():
2496 properties.namespace = self._build_volume_namespace(volume_uuid)
2498 src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC)
2499 if src_uuid:
2500 self._logger(
2501 'Ignoring volume during manager initialization with prop '
2502 ' PROP_UPDATING_UUID_SRC: {} (properties={})'
2503 .format(
2504 volume_uuid,
2505 self._get_filtered_properties(properties)
2506 )
2507 )
2508 continue
2510 # Insert volume in list if the volume exists. Or if the volume
2511 # is being created and a slave wants to use it (repair = False).
2512 #
2513 # If we are on the master and if repair is True and state is
2514 # Creating, it's probably a bug or crash: the creation process has
2515 # been stopped.
2516 if not_exists == self.STATE_EXISTS or (
2517 not repair and not_exists == self.STATE_CREATING
2518 ):
2519 self._volumes.add(volume_uuid)
2520 continue
2522 if not repair:
2523 self._logger(
2524 'Ignoring bad volume during manager initialization: {} '
2525 '(properties={})'.format(
2526 volume_uuid,
2527 self._get_filtered_properties(properties)
2528 )
2529 )
2530 continue
2532 # Remove bad volume.
2533 try:
2534 self._logger(
2535 'Removing bad volume during manager initialization: {} '
2536 '(properties={})'.format(
2537 volume_uuid,
2538 self._get_filtered_properties(properties)
2539 )
2540 )
2541 volume_name = properties.get(self.PROP_VOLUME_NAME)
2543 # Little optimization, don't call `self._destroy_volume`,
2544 # we already have resource name list.
2545 if volume_name in resource_names:
2546 self._destroy_resource(volume_name, force=True)
2548 # Assume this call is atomic.
2549 properties.clear()
2550 except Exception as e:
2551 # Do not raise, we don't want to block user action.
2552 self._logger(
2553 'Cannot clean volume {}: {}'.format(volume_uuid, e)
2554 )
2556 # The volume can't be removed, maybe it's still in use,
2557 # in this case rename it with the "DELETED_" prefix.
2558 # This prefix is mandatory if it exists a snap transaction to
2559 # rollback because the original VDI UUID can try to be renamed
2560 # with the UUID we are trying to delete...
2561 if not volume_uuid.startswith('DELETED_'):
2562 self.update_volume_uuid(
2563 volume_uuid, 'DELETED_' + volume_uuid, force=True
2564 )
2566 for dest_uuid, src_uuid in updating_uuid_volumes.items():
2567 dest_namespace = self._build_volume_namespace(dest_uuid)
2569 properties.namespace = dest_namespace
2570 if int(properties.get(self.PROP_NOT_EXISTS)):
2571 properties.clear()
2572 continue
2574 properties.namespace = self._build_volume_namespace(src_uuid)
2575 properties.clear()
2577 properties.namespace = dest_namespace
2578 properties.pop(self.PROP_UPDATING_UUID_SRC)
2580 if src_uuid in self._volumes:
2581 self._volumes.remove(src_uuid)
2582 self._volumes.add(dest_uuid)
2584 def _get_sr_properties(self):
2585 return self._create_linstor_kv(self._build_sr_namespace())
2587 def _get_volumes_by_property(
2588 self, reg_prop, ignore_inexisting_volumes=True
2589 ):
2590 base_properties = self._get_kv_cache()
2591 base_properties.namespace = self._build_volume_namespace()
2593 volume_properties = {}
2594 for volume_uuid in self._volumes:
2595 volume_properties[volume_uuid] = ''
2597 for key, value in base_properties.items():
2598 res = reg_prop.match(key)
2599 if res:
2600 volume_uuid = res.groups()[0]
2601 if not ignore_inexisting_volumes or \
2602 volume_uuid in self._volumes:
2603 volume_properties[volume_uuid] = value
2605 return volume_properties
2607 def _create_linstor_kv(self, namespace):
2608 return linstor.KV(
2609 self._group_name,
2610 uri=self._linstor.controller_host(),
2611 namespace=namespace
2612 )
2614 def _get_volume_properties(self, volume_uuid):
2615 properties = self._get_kv_cache()
2616 properties.namespace = self._build_volume_namespace(volume_uuid)
2617 return properties
2619 @classmethod
2620 def _build_sr_namespace(cls):
2621 return '/{}/'.format(cls.NAMESPACE_SR)
2623 @classmethod
2624 def _build_volume_namespace(cls, volume_uuid=None):
2625 # Return a path to all volumes if `volume_uuid` is not given.
2626 if volume_uuid is None:
2627 return '/{}/'.format(cls.NAMESPACE_VOLUME)
2628 return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid)
2630 @classmethod
2631 def _get_error_str(cls, result):
2632 return ', '.join([
2633 err.message for err in cls._filter_errors(result)
2634 ])
2636 @classmethod
2637 def _create_linstor_instance(
2638 cls, uri, keep_uri_unmodified=False, attempt_count=30
2639 ):
2640 retry = False
2642 def connect(uri):
2643 if not uri:
2644 uri = get_controller_uri()
2645 if not uri:
2646 raise LinstorVolumeManagerError(
2647 'Unable to find controller uri...'
2648 )
2649 instance = linstor.Linstor(uri, keep_alive=True)
2650 instance.connect()
2651 return instance
2653 try:
2654 return connect(uri)
2655 except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError):
2656 pass
2658 if not keep_uri_unmodified:
2659 uri = None
2661 return util.retry(
2662 lambda: connect(uri),
2663 maxretry=attempt_count,
2664 period=1,
2665 exceptions=[
2666 linstor.errors.LinstorNetworkError,
2667 LinstorVolumeManagerError
2668 ]
2669 )
2671 @classmethod
2672 def _configure_volume_peer_slots(cls, lin, volume_name):
2673 result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3)
2674 error_str = cls._get_error_str(result)
2675 if error_str:
2676 raise LinstorVolumeManagerError(
2677 'Could not configure volume peer slots of {}: {}'
2678 .format(volume_name, error_str)
2679 )
2681 @classmethod
2682 def _activate_device_path(cls, lin, node_name, volume_name):
2683 result = lin.resource_make_available(node_name, volume_name, diskful=False)
2684 if linstor.Linstor.all_api_responses_no_error(result):
2685 return
2686 errors = linstor.Linstor.filter_api_call_response_errors(result)
2687 if len(errors) == 1 and errors[0].is_error(
2688 linstor.consts.FAIL_EXISTS_RSC
2689 ):
2690 return
2692 raise LinstorVolumeManagerError(
2693 'Unable to activate device path of `{}` on node `{}`: {}'
2694 .format(volume_name, node_name, ', '.join(
2695 [str(x) for x in result]))
2696 )
2698 @classmethod
2699 def _request_database_path(cls, lin, activate=False):
2700 node_name = socket.gethostname()
2702 try:
2703 resource = next(filter(
2704 lambda resource: resource.node_name == node_name and
2705 resource.name == DATABASE_VOLUME_NAME,
2706 lin.resource_list_raise().resources
2707 ), None)
2708 except Exception as e:
2709 raise LinstorVolumeManagerError(
2710 'Unable to fetch database resource: {}'
2711 .format(e)
2712 )
2714 if not resource:
2715 if activate:
2716 cls._activate_device_path(
2717 lin, node_name, DATABASE_VOLUME_NAME
2718 )
2719 return cls._request_database_path(
2720 DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME
2721 )
2722 raise LinstorVolumeManagerError(
2723 'Empty dev path for `{}`, but definition "seems" to exist'
2724 .format(DATABASE_PATH)
2725 )
2726 # Contains a path of the /dev/drbd<id> form.
2727 return resource.volumes[0].device_path
2729 @classmethod
2730 def _create_database_volume(
2731 cls, lin, group_name, storage_pool_name, node_names, redundancy
2732 ):
2733 try:
2734 dfns = lin.resource_dfn_list_raise().resource_definitions
2735 except Exception as e:
2736 raise LinstorVolumeManagerError(
2737 'Unable to get definitions during database creation: {}'
2738 .format(e)
2739 )
2741 if dfns:
2742 raise LinstorVolumeManagerError(
2743 'Could not create volume `{}` from SR `{}`, '.format(
2744 DATABASE_VOLUME_NAME, group_name
2745 ) + 'LINSTOR volume list must be empty.'
2746 )
2748 # Workaround to use thin lvm. Without this line an error is returned:
2749 # "Not enough available nodes"
2750 # I don't understand why but this command protect against this bug.
2751 try:
2752 pools = lin.storage_pool_list_raise(
2753 filter_by_stor_pools=[storage_pool_name]
2754 )
2755 except Exception as e:
2756 raise LinstorVolumeManagerError(
2757 'Failed to get storage pool list before database creation: {}'
2758 .format(e)
2759 )
2761 # Ensure we have a correct list of storage pools.
2762 assert pools.storage_pools # We must have at least one storage pool!
2763 nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools))
2764 for node_name in nodes_with_pool:
2765 assert node_name in node_names
2766 util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool))
2768 # Create the database definition.
2769 size = cls.round_up_volume_size(DATABASE_SIZE)
2770 cls._check_volume_creation_errors(lin.resource_group_spawn(
2771 rsc_grp_name=group_name,
2772 rsc_dfn_name=DATABASE_VOLUME_NAME,
2773 vlm_sizes=['{}B'.format(size)],
2774 definitions_only=True
2775 ), DATABASE_VOLUME_NAME, group_name)
2776 cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME)
2778 # Create real resources on the first nodes.
2779 resources = []
2781 diskful_nodes = []
2782 diskless_nodes = []
2783 for node_name in node_names:
2784 if node_name in nodes_with_pool:
2785 diskful_nodes.append(node_name)
2786 else:
2787 diskless_nodes.append(node_name)
2789 assert diskful_nodes
2790 for node_name in diskful_nodes[:redundancy]:
2791 util.SMlog('Create database diskful on {}'.format(node_name))
2792 resources.append(linstor.ResourceData(
2793 node_name=node_name,
2794 rsc_name=DATABASE_VOLUME_NAME,
2795 storage_pool=storage_pool_name
2796 ))
2797 # Create diskless resources on the remaining set.
2798 for node_name in diskful_nodes[redundancy:] + diskless_nodes:
2799 util.SMlog('Create database diskless on {}'.format(node_name))
2800 resources.append(linstor.ResourceData(
2801 node_name=node_name,
2802 rsc_name=DATABASE_VOLUME_NAME,
2803 diskless=True
2804 ))
2806 result = lin.resource_create(resources)
2807 error_str = cls._get_error_str(result)
2808 if error_str:
2809 raise LinstorVolumeManagerError(
2810 'Could not create database volume from SR `{}`: {}'.format(
2811 group_name, error_str
2812 )
2813 )
2815 # Create database and ensure path exists locally and
2816 # on replicated devices.
2817 current_device_path = cls._request_database_path(lin, activate=True)
2819 # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be
2820 # plugged.
2821 for node_name in node_names:
2822 cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME)
2824 # We use realpath here to get the /dev/drbd<id> path instead of
2825 # /dev/drbd/by-res/<resource_name>.
2826 expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME)
2827 util.wait_for_path(expected_device_path, 5)
2829 device_realpath = os.path.realpath(expected_device_path)
2830 if current_device_path != device_realpath:
2831 raise LinstorVolumeManagerError(
2832 'Invalid path, current={}, expected={} (realpath={})'
2833 .format(
2834 current_device_path,
2835 expected_device_path,
2836 device_realpath
2837 )
2838 )
2840 try:
2841 util.retry(
2842 lambda: util.pread2([DATABASE_MKFS, expected_device_path]),
2843 maxretry=5
2844 )
2845 except Exception as e:
2846 raise LinstorVolumeManagerError(
2847 'Failed to execute {} on database volume: {}'
2848 .format(DATABASE_MKFS, e)
2849 )
2851 return expected_device_path
2853 @classmethod
2854 def _destroy_database_volume(cls, lin, group_name):
2855 error_str = cls._get_error_str(
2856 lin.resource_dfn_delete(DATABASE_VOLUME_NAME)
2857 )
2858 if error_str:
2859 raise LinstorVolumeManagerError(
2860 'Could not destroy resource `{}` from SR `{}`: {}'
2861 .format(DATABASE_VOLUME_NAME, group_name, error_str)
2862 )
2864 @classmethod
2865 def _mount_database_volume(cls, volume_path, mount=True, force=False):
2866 try:
2867 # 1. Create a backup config folder.
2868 database_not_empty = bool(os.listdir(DATABASE_PATH))
2869 backup_path = cls._create_database_backup_path()
2871 # 2. Move the config in the mounted volume.
2872 if database_not_empty:
2873 cls._move_files(DATABASE_PATH, backup_path)
2875 cls._mount_volume(volume_path, DATABASE_PATH, mount)
2877 if database_not_empty:
2878 cls._move_files(backup_path, DATABASE_PATH, force)
2880 # 3. Remove useless backup directory.
2881 try:
2882 os.rmdir(backup_path)
2883 except Exception as e:
2884 raise LinstorVolumeManagerError(
2885 'Failed to remove backup path {} of LINSTOR config: {}'
2886 .format(backup_path, e)
2887 )
2888 except Exception as e:
2889 def force_exec(fn):
2890 try:
2891 fn()
2892 except Exception:
2893 pass
2895 if mount == cls._is_mounted(DATABASE_PATH):
2896 force_exec(lambda: cls._move_files(
2897 DATABASE_PATH, backup_path
2898 ))
2899 force_exec(lambda: cls._mount_volume(
2900 volume_path, DATABASE_PATH, not mount
2901 ))
2903 if mount != cls._is_mounted(DATABASE_PATH):
2904 force_exec(lambda: cls._move_files(
2905 backup_path, DATABASE_PATH
2906 ))
2908 force_exec(lambda: os.rmdir(backup_path))
2909 raise e
2911 @classmethod
2912 def _force_destroy_database_volume(cls, lin, group_name):
2913 try:
2914 cls._destroy_database_volume(lin, group_name)
2915 except Exception:
2916 pass
2918 @classmethod
2919 def _destroy_storage_pool(cls, lin, group_name, node_name):
2920 def destroy():
2921 result = lin.storage_pool_delete(node_name, group_name)
2922 errors = cls._filter_errors(result)
2923 if cls._check_errors(errors, [
2924 linstor.consts.FAIL_NOT_FOUND_STOR_POOL,
2925 linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN
2926 ]):
2927 return
2929 if errors:
2930 raise LinstorVolumeManagerError(
2931 'Failed to destroy SP `{}` on node `{}`: {}'.format(
2932 group_name,
2933 node_name,
2934 cls._get_error_str(errors)
2935 )
2936 )
2938 # We must retry to avoid errors like:
2939 # "can not be deleted as volumes / snapshot-volumes are still using it"
2940 # after LINSTOR database volume destruction.
2941 return util.retry(destroy, maxretry=10)
2943 @classmethod
2944 def _create_resource_group(
2945 cls,
2946 lin,
2947 group_name,
2948 storage_pool_name,
2949 redundancy,
2950 destroy_old_group
2951 ):
2952 rg_creation_attempt = 0
2953 while True:
2954 result = lin.resource_group_create(
2955 name=group_name,
2956 place_count=redundancy,
2957 storage_pool=storage_pool_name,
2958 diskless_on_remaining=False
2959 )
2960 error_str = cls._get_error_str(result)
2961 if not error_str:
2962 break
2964 errors = cls._filter_errors(result)
2965 if destroy_old_group and cls._check_errors(errors, [
2966 linstor.consts.FAIL_EXISTS_RSC_GRP
2967 ]):
2968 rg_creation_attempt += 1
2969 if rg_creation_attempt < 2:
2970 try:
2971 cls._destroy_resource_group(lin, group_name)
2972 except Exception as e:
2973 error_str = 'Failed to destroy old and empty RG: {}'.format(e)
2974 else:
2975 continue
2977 raise LinstorVolumeManagerError(
2978 'Could not create RG `{}`: {}'.format(
2979 group_name, error_str
2980 )
2981 )
2983 result = lin.volume_group_create(group_name)
2984 error_str = cls._get_error_str(result)
2985 if error_str:
2986 raise LinstorVolumeManagerError(
2987 'Could not create VG `{}`: {}'.format(
2988 group_name, error_str
2989 )
2990 )
2992 @classmethod
2993 def _destroy_resource_group(cls, lin, group_name):
2994 def destroy():
2995 result = lin.resource_group_delete(group_name)
2996 errors = cls._filter_errors(result)
2997 if cls._check_errors(errors, [
2998 linstor.consts.FAIL_NOT_FOUND_RSC_GRP
2999 ]):
3000 return
3002 if errors:
3003 raise LinstorVolumeManagerError(
3004 'Failed to destroy RG `{}`: {}'
3005 .format(group_name, cls._get_error_str(errors))
3006 )
3008 return util.retry(destroy, maxretry=10)
3010 @classmethod
3011 def _build_group_name(cls, base_name):
3012 # If thin provisioning is used we have a path like this:
3013 # `VG/LV`. "/" is not accepted by LINSTOR.
3014 return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_'))
3016 # Used to store important data in a HA context,
3017 # i.e. a replication count of 3.
3018 @classmethod
3019 def _build_ha_group_name(cls, base_name):
3020 return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_'))
3022 @classmethod
3023 def _check_volume_creation_errors(cls, result, volume_uuid, group_name):
3024 errors = cls._filter_errors(result)
3025 if cls._check_errors(errors, [
3026 linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN
3027 ]):
3028 raise LinstorVolumeManagerError(
3029 'Failed to create volume `{}` from SR `{}`, it already exists'
3030 .format(volume_uuid, group_name),
3031 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
3032 )
3034 if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]):
3035 raise LinstorVolumeManagerError(
3036 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist'
3037 .format(volume_uuid, group_name),
3038 LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
3039 )
3041 if errors:
3042 raise LinstorVolumeManagerError(
3043 'Failed to create volume `{}` from SR `{}`: {}'.format(
3044 volume_uuid,
3045 group_name,
3046 cls._get_error_str(errors)
3047 )
3048 )
3050 @classmethod
3051 def _move_files(cls, src_dir, dest_dir, force=False):
3052 def listdir(dir):
3053 ignored = ['lost+found']
3054 return [file for file in os.listdir(dir) if file not in ignored]
3056 try:
3057 if not force:
3058 files = listdir(dest_dir)
3059 if files:
3060 raise LinstorVolumeManagerError(
3061 'Cannot move files from {} to {} because destination '
3062 'contains: {}'.format(src_dir, dest_dir, files)
3063 )
3064 except LinstorVolumeManagerError:
3065 raise
3066 except Exception as e:
3067 raise LinstorVolumeManagerError(
3068 'Cannot list dir {}: {}'.format(dest_dir, e)
3069 )
3071 try:
3072 for file in listdir(src_dir):
3073 try:
3074 dest_file = os.path.join(dest_dir, file)
3075 if not force and os.path.exists(dest_file):
3076 raise LinstorVolumeManagerError(
3077 'Cannot move {} because it already exists in the '
3078 'destination'.format(file)
3079 )
3080 shutil.move(os.path.join(src_dir, file), dest_file)
3081 except LinstorVolumeManagerError:
3082 raise
3083 except Exception as e:
3084 raise LinstorVolumeManagerError(
3085 'Cannot move {}: {}'.format(file, e)
3086 )
3087 except Exception as e:
3088 if not force:
3089 try:
3090 cls._move_files(dest_dir, src_dir, force=True)
3091 except Exception:
3092 pass
3094 raise LinstorVolumeManagerError(
3095 'Failed to move files from {} to {}: {}'.format(
3096 src_dir, dest_dir, e
3097 )
3098 )
3100 @staticmethod
3101 def _create_database_backup_path():
3102 path = DATABASE_PATH + '-' + str(uuid.uuid4())
3103 try:
3104 os.mkdir(path)
3105 return path
3106 except Exception as e:
3107 raise LinstorVolumeManagerError(
3108 'Failed to create backup path {} of LINSTOR config: {}'
3109 .format(path, e)
3110 )
3112 @staticmethod
3113 def _get_filtered_properties(properties):
3114 return dict(properties.items())
3116 @staticmethod
3117 def _filter_errors(result):
3118 return [
3119 err for err in result
3120 if hasattr(err, 'is_error') and err.is_error()
3121 ]
3123 @staticmethod
3124 def _check_errors(result, codes):
3125 for err in result:
3126 for code in codes:
3127 if err.is_error(code):
3128 return True
3129 return False
3131 @classmethod
3132 def _controller_is_running(cls):
3133 return cls._service_is_running('linstor-controller')
3135 @classmethod
3136 def _start_controller(cls, start=True):
3137 return cls._start_service('linstor-controller', start)
3139 @staticmethod
3140 def _start_service(name, start=True):
3141 action = 'start' if start else 'stop'
3142 (ret, out, err) = util.doexec([
3143 'systemctl', action, name
3144 ])
3145 if ret != 0:
3146 raise LinstorVolumeManagerError(
3147 'Failed to {} {}: {} {}'
3148 .format(action, name, out, err)
3149 )
3151 @staticmethod
3152 def _service_is_running(name):
3153 (ret, out, err) = util.doexec([
3154 'systemctl', 'is-active', '--quiet', name
3155 ])
3156 return not ret
3158 @staticmethod
3159 def _is_mounted(mountpoint):
3160 (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint])
3161 return ret == 0
3163 @classmethod
3164 def _mount_volume(cls, volume_path, mountpoint, mount=True):
3165 if mount:
3166 try:
3167 util.pread(['mount', volume_path, mountpoint])
3168 except Exception as e:
3169 raise LinstorVolumeManagerError(
3170 'Failed to mount volume {} on {}: {}'
3171 .format(volume_path, mountpoint, e)
3172 )
3173 else:
3174 try:
3175 if cls._is_mounted(mountpoint):
3176 util.pread(['umount', mountpoint])
3177 except Exception as e:
3178 raise LinstorVolumeManagerError(
3179 'Failed to umount volume {} on {}: {}'
3180 .format(volume_path, mountpoint, e)
3181 )
3184# ==============================================================================
3186# Check if a path is a DRBD resource and log the process name/pid
3187# that opened it.
3188def log_drbd_openers(path):
3189 # Ignore if it's not a symlink to DRBD resource.
3190 if not path.startswith(DRBD_BY_RES_PATH):
3191 return
3193 # Compute resource name.
3194 res_name_end = path.find('/', len(DRBD_BY_RES_PATH))
3195 if res_name_end == -1:
3196 return
3197 res_name = path[len(DRBD_BY_RES_PATH):res_name_end]
3199 volume_end = path.rfind('/')
3200 if volume_end == res_name_end:
3201 return
3202 volume = path[volume_end + 1:]
3204 try:
3205 # Ensure path is a DRBD.
3206 drbd_path = os.path.realpath(path)
3207 stats = os.stat(drbd_path)
3208 if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147:
3209 return
3211 # Find where the device is open.
3212 (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name])
3213 if ret != 0:
3214 util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format(
3215 res_name, stderr
3216 ))
3217 return
3219 # Is it a local device?
3220 if stdout.startswith('{} role:Primary'.format(res_name)):
3221 util.SMlog(
3222 'DRBD resource `{}` is open on local host: {}'
3223 .format(path, get_local_volume_openers(res_name, volume))
3224 )
3225 return
3227 # Is it a remote device?
3228 util.SMlog(
3229 'DRBD resource `{}` is open on hosts: {}'
3230 .format(path, get_all_volume_openers(res_name, volume))
3231 )
3232 except Exception as e:
3233 util.SMlog(
3234 'Got exception while trying to determine where DRBD resource ' +
3235 '`{}` is open: {}'.format(path, e)
3236 )