Coverage for drivers/linstorvolumemanager.py : 10%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python3
2#
3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16#
18from sm_typing import override
20import errno
21import json
22import linstor
23import os.path
24import re
25import shutil
26import socket
27import stat
28import time
29import util
30import uuid
32# Persistent prefix to add to RAW persistent volumes.
33PERSISTENT_PREFIX = 'xcp-persistent-'
35# Contains the data of the "/var/lib/linstor" directory.
36DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database'
37DATABASE_SIZE = 1 << 30 # 1GB.
38DATABASE_PATH = '/var/lib/linstor'
39DATABASE_MKFS = 'mkfs.ext4'
41REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary")
42REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$')
44DRBD_BY_RES_PATH = '/dev/drbd/by-res/'
46PLUGIN = 'linstor-manager'
49# ==============================================================================
51def get_local_volume_openers(resource_name, volume):
52 if not resource_name or volume is None:
53 raise Exception('Cannot get DRBD openers without resource name and/or volume.')
55 path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format(
56 resource_name, volume
57 )
59 with open(path, 'r') as openers:
60 # Not a big cost, so read all lines directly.
61 lines = openers.readlines()
63 result = {}
65 opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)')
66 for line in lines:
67 match = opener_re.match(line)
68 assert match
70 groups = match.groups()
71 process_name = groups[0]
72 pid = groups[1]
73 open_duration_ms = groups[2]
74 result[pid] = {
75 'process-name': process_name,
76 'open-duration': open_duration_ms
77 }
79 return json.dumps(result)
81def get_all_volume_openers(resource_name, volume):
82 PLUGIN_CMD = 'getDrbdOpeners'
84 volume = str(volume)
85 openers = {}
87 # Make sure this call never stucks because this function can be called
88 # during HA init and in this case we can wait forever.
89 session = util.timeout_call(10, util.get_localAPI_session)
91 hosts = session.xenapi.host.get_all_records()
92 for host_ref, host_record in hosts.items():
93 node_name = host_record['hostname']
94 try:
95 if not session.xenapi.host_metrics.get_record(
96 host_record['metrics']
97 )['live']:
98 # Ensure we call plugin on online hosts only.
99 continue
101 openers[node_name] = json.loads(
102 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {
103 'resourceName': resource_name,
104 'volume': volume
105 })
106 )
107 except Exception as e:
108 util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format(
109 resource_name, node_name, e
110 ))
112 return openers
115# ==============================================================================
117def round_up(value, divisor):
118 assert divisor
119 divisor = int(divisor)
120 return ((int(value) + divisor - 1) // divisor) * divisor
123def round_down(value, divisor):
124 assert divisor
125 value = int(value)
126 return value - (value % int(divisor))
129# ==============================================================================
131def get_remote_host_ip(node_name):
132 (ret, stdout, stderr) = util.doexec([
133 'drbdsetup', 'show', DATABASE_VOLUME_NAME, '--json'
134 ])
135 if ret != 0:
136 return
138 try:
139 conf = json.loads(stdout)
140 if not conf:
141 return
143 for connection in conf[0]['connections']:
144 if connection['net']['_name'] == node_name:
145 value = connection['path']['_remote_host']
146 res = REG_DRBDSETUP_IP.match(value)
147 if res:
148 return res.groups()[0]
149 break
150 except Exception:
151 pass
154def _get_controller_uri():
155 PLUGIN_CMD = 'hasControllerRunning'
157 # Try to find controller using drbdadm.
158 (ret, stdout, stderr) = util.doexec([
159 'drbdadm', 'status', DATABASE_VOLUME_NAME
160 ])
161 if ret == 0:
162 # If we are here, the database device exists locally.
164 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
165 # Nice case, we have the controller running on this local host.
166 return 'linstor://localhost'
168 # Try to find the host using DRBD connections.
169 res = REG_DRBDADM_PRIMARY.search(stdout)
170 if res:
171 node_name = res.groups()[0]
172 ip = get_remote_host_ip(node_name)
173 if ip:
174 return 'linstor://' + ip
176 # Worst case: we use many hosts in the pool (>= 4), so we can't find the
177 # primary using drbdadm because we don't have all connections to the
178 # replicated volume. `drbdadm status xcp-persistent-database` returns
179 # 3 connections by default.
180 try:
181 session = util.timeout_call(10, util.get_localAPI_session)
183 for host_ref, host_record in session.xenapi.host.get_all_records().items():
184 node_name = host_record['hostname']
185 try:
186 if util.strtobool(
187 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {})
188 ):
189 return 'linstor://' + host_record['address']
190 except Exception as e:
191 # Can throw and exception if a host is offline. So catch it.
192 util.SMlog('Unable to search controller on `{}`: {}'.format(
193 node_name, e
194 ))
195 except:
196 # Not found, maybe we are trying to create the SR...
197 pass
199def get_controller_uri():
200 retries = 0
201 while True:
202 uri = _get_controller_uri()
203 if uri:
204 return uri
206 retries += 1
207 if retries >= 10:
208 break
209 time.sleep(1)
212def get_controller_node_name():
213 PLUGIN_CMD = 'hasControllerRunning'
215 (ret, stdout, stderr) = util.doexec([
216 'drbdadm', 'status', DATABASE_VOLUME_NAME
217 ])
219 if ret == 0:
220 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
221 return 'localhost'
223 res = REG_DRBDADM_PRIMARY.search(stdout)
224 if res:
225 return res.groups()[0]
227 session = util.timeout_call(5, util.get_localAPI_session)
229 for host_ref, host_record in session.xenapi.host.get_all_records().items():
230 node_name = host_record['hostname']
231 try:
232 if not session.xenapi.host_metrics.get_record(
233 host_record['metrics']
234 )['live']:
235 continue
237 if util.strtobool(session.xenapi.host.call_plugin(
238 host_ref, PLUGIN, PLUGIN_CMD, {}
239 )):
240 return node_name
241 except Exception as e:
242 util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format(
243 node_name, e
244 ))
247def demote_drbd_resource(node_name, resource_name):
248 PLUGIN_CMD = 'demoteDrbdResource'
250 session = util.timeout_call(5, util.get_localAPI_session)
252 for host_ref, host_record in session.xenapi.host.get_all_records().items():
253 if host_record['hostname'] != node_name:
254 continue
256 try:
257 session.xenapi.host.call_plugin(
258 host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name}
259 )
260 except Exception as e:
261 util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format(
262 resource_name, node_name, e
263 ))
264 raise Exception(
265 'Can\'t demote resource `{}`, unable to find node `{}`'
266 .format(resource_name, node_name)
267 )
269# ==============================================================================
271class LinstorVolumeManagerError(Exception):
272 ERR_GENERIC = 0,
273 ERR_VOLUME_EXISTS = 1,
274 ERR_VOLUME_NOT_EXISTS = 2,
275 ERR_VOLUME_DESTROY = 3,
276 ERR_GROUP_NOT_EXISTS = 4
278 def __init__(self, message, code=ERR_GENERIC):
279 super(LinstorVolumeManagerError, self).__init__(message)
280 self._code = code
282 @property
283 def code(self):
284 return self._code
287# ==============================================================================
289# Note:
290# If a storage pool is not accessible after a network change:
291# linstor node interface modify <NODE> default --ip <IP>
294class LinstorVolumeManager(object):
295 """
296 API to manager LINSTOR volumes in XCP-ng.
297 A volume in this context is a physical part of the storage layer.
298 """
300 __slots__ = (
301 '_linstor', '_logger', '_redundancy',
302 '_base_group_name', '_group_name', '_ha_group_name',
303 '_volumes', '_storage_pools', '_storage_pools_time',
304 '_kv_cache', '_resource_cache', '_volume_info_cache',
305 '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty'
306 )
308 DEV_ROOT_PATH = DRBD_BY_RES_PATH
310 # Default sector size.
311 BLOCK_SIZE = 512
313 # List of volume properties.
314 PROP_METADATA = 'metadata'
315 PROP_NOT_EXISTS = 'not-exists'
316 PROP_VOLUME_NAME = 'volume-name'
317 PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp'
319 # A volume can only be locked for a limited duration.
320 # The goal is to give enough time to slaves to execute some actions on
321 # a device before an UUID update or a coalesce for example.
322 # Expiration is expressed in seconds.
323 LOCKED_EXPIRATION_DELAY = 1 * 60
325 # Used when volume uuid is being updated.
326 PROP_UPDATING_UUID_SRC = 'updating-uuid-src'
328 # States of property PROP_NOT_EXISTS.
329 STATE_EXISTS = '0'
330 STATE_NOT_EXISTS = '1'
331 STATE_CREATING = '2'
333 # Property namespaces.
334 NAMESPACE_SR = 'xcp/sr'
335 NAMESPACE_VOLUME = 'xcp/volume'
337 # Regex to match properties.
338 REG_PROP = '^([^/]+)/{}$'
340 REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA))
341 REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS))
342 REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME))
343 REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC))
345 # Prefixes of SR/VOLUME in the LINSTOR DB.
346 # A LINSTOR (resource, group, ...) name cannot start with a number.
347 # So we add a prefix behind our SR/VOLUME uuids.
348 PREFIX_SR = 'xcp-sr-'
349 PREFIX_HA = 'xcp-ha-'
350 PREFIX_VOLUME = 'xcp-volume-'
352 # Limit request number when storage pool info is asked, we fetch
353 # the current pool status after N elapsed seconds.
354 STORAGE_POOLS_FETCH_INTERVAL = 15
356 @staticmethod
357 def default_logger(*args):
358 print(args)
360 # --------------------------------------------------------------------------
361 # API.
362 # --------------------------------------------------------------------------
364 class VolumeInfo(object):
365 __slots__ = (
366 'name',
367 'allocated_size', # Allocated size, place count is not used.
368 'virtual_size', # Total virtual available size of this volume
369 # (i.e. the user size at creation).
370 'diskful' # Array of nodes that have a diskful volume.
371 )
373 def __init__(self, name):
374 self.name = name
375 self.allocated_size = 0
376 self.virtual_size = 0
377 self.diskful = []
379 @override
380 def __repr__(self) -> str:
381 return 'VolumeInfo("{}", {}, {}, {})'.format(
382 self.name, self.allocated_size, self.virtual_size,
383 self.diskful
384 )
386 # --------------------------------------------------------------------------
388 def __init__(
389 self, uri, group_name, repair=False, logger=default_logger.__func__,
390 attempt_count=30
391 ):
392 """
393 Create a new LinstorVolumeManager object.
394 :param str uri: URI to communicate with the LINSTOR controller.
395 :param str group_name: The SR goup name to use.
396 :param bool repair: If true we try to remove bad volumes due to a crash
397 or unexpected behavior.
398 :param function logger: Function to log messages.
399 :param int attempt_count: Number of attempts to join the controller.
400 """
402 self._linstor = self._create_linstor_instance(
403 uri, attempt_count=attempt_count
404 )
405 self._base_group_name = group_name
407 # Ensure group exists.
408 group_name = self._build_group_name(group_name)
409 groups = self._linstor.resource_group_list_raise([group_name]).resource_groups
410 if not groups:
411 raise LinstorVolumeManagerError(
412 'Unable to find `{}` Linstor SR'.format(group_name)
413 )
415 # Ok. ;)
416 self._logger = logger
417 self._redundancy = groups[0].select_filter.place_count
418 self._group_name = group_name
419 self._ha_group_name = self._build_ha_group_name(self._base_group_name)
420 self._volumes = set()
421 self._storage_pools_time = 0
423 # To increate performance and limit request count to LINSTOR services,
424 # we use caches.
425 self._kv_cache = self._create_kv_cache()
426 self._resource_cache = None
427 self._resource_cache_dirty = True
428 self._volume_info_cache = None
429 self._volume_info_cache_dirty = True
430 self._build_volumes(repair=repair)
432 @property
433 def group_name(self):
434 """
435 Give the used group name.
436 :return: The group name.
437 :rtype: str
438 """
439 return self._base_group_name
441 @property
442 def redundancy(self):
443 """
444 Give the used redundancy.
445 :return: The redundancy.
446 :rtype: int
447 """
448 return self._redundancy
450 @property
451 def volumes(self):
452 """
453 Give the volumes uuid set.
454 :return: The volumes uuid set.
455 :rtype: set(str)
456 """
457 return self._volumes
459 @property
460 def max_volume_size_allowed(self):
461 """
462 Give the max volume size currently available in B.
463 :return: The current size.
464 :rtype: int
465 """
467 candidates = self._find_best_size_candidates()
468 if not candidates:
469 raise LinstorVolumeManagerError(
470 'Failed to get max volume size allowed'
471 )
473 size = candidates[0].max_volume_size
474 if size < 0:
475 raise LinstorVolumeManagerError(
476 'Invalid max volume size allowed given: {}'.format(size)
477 )
478 return self.round_down_volume_size(size * 1024)
480 @property
481 def physical_size(self):
482 """
483 Give the total physical size of the SR.
484 :return: The physical size.
485 :rtype: int
486 """
487 return self._compute_size('total_capacity')
489 @property
490 def physical_free_size(self):
491 """
492 Give the total free physical size of the SR.
493 :return: The physical free size.
494 :rtype: int
495 """
496 return self._compute_size('free_capacity')
498 @property
499 def allocated_volume_size(self):
500 """
501 Give the allocated size for all volumes. The place count is not
502 used here. When thick lvm is used, the size for one volume should
503 be equal to the virtual volume size. With thin lvm, the size is equal
504 or lower to the volume size.
505 :return: The allocated size of all volumes.
506 :rtype: int
507 """
509 # Paths: /res_name/vol_number/size
510 sizes = {}
512 for resource in self._get_resource_cache().resources:
513 if resource.name not in sizes:
514 current = sizes[resource.name] = {}
515 else:
516 current = sizes[resource.name]
518 for volume in resource.volumes:
519 # We ignore diskless pools of the form "DfltDisklessStorPool".
520 if volume.storage_pool_name != self._group_name:
521 continue
523 allocated_size = max(volume.allocated_size, 0)
524 current_allocated_size = current.get(volume.number) or -1
525 if allocated_size > current_allocated_size:
526 current[volume.number] = allocated_size
528 total_size = 0
529 for volumes in sizes.values():
530 for size in volumes.values():
531 total_size += size
533 return total_size * 1024
535 def get_min_physical_size(self):
536 """
537 Give the minimum physical size of the SR.
538 I.e. the size of the smallest disk + the number of pools.
539 :return: The physical min size.
540 :rtype: tuple(int, int)
541 """
542 size = None
543 pool_count = 0
544 for pool in self._get_storage_pools(force=True):
545 space = pool.free_space
546 if space:
547 pool_count += 1
548 current_size = space.total_capacity
549 if current_size < 0:
550 raise LinstorVolumeManagerError(
551 'Failed to get pool total_capacity attr of `{}`'
552 .format(pool.node_name)
553 )
554 if size is None or current_size < size:
555 size = current_size
556 return (pool_count, (size or 0) * 1024)
558 @property
559 def metadata(self):
560 """
561 Get the metadata of the SR.
562 :return: Dictionary that contains metadata.
563 :rtype: dict(str, dict)
564 """
566 sr_properties = self._get_sr_properties()
567 metadata = sr_properties.get(self.PROP_METADATA)
568 if metadata is not None:
569 metadata = json.loads(metadata)
570 if isinstance(metadata, dict):
571 return metadata
572 raise LinstorVolumeManagerError(
573 'Expected dictionary in SR metadata: {}'.format(
574 self._group_name
575 )
576 )
578 return {}
580 @metadata.setter
581 def metadata(self, metadata):
582 """
583 Set the metadata of the SR.
584 :param dict metadata: Dictionary that contains metadata.
585 """
587 assert isinstance(metadata, dict)
588 sr_properties = self._get_sr_properties()
589 sr_properties[self.PROP_METADATA] = json.dumps(metadata)
591 @property
592 def disconnected_hosts(self):
593 """
594 Get the list of disconnected hosts.
595 :return: Set that contains disconnected hosts.
596 :rtype: set(str)
597 """
599 disconnected_hosts = set()
600 for pool in self._get_storage_pools():
601 for report in pool.reports:
602 if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \
603 linstor.consts.WARN_NOT_CONNECTED:
604 disconnected_hosts.add(pool.node_name)
605 break
606 return disconnected_hosts
608 def check_volume_exists(self, volume_uuid):
609 """
610 Check if a volume exists in the SR.
611 :return: True if volume exists.
612 :rtype: bool
613 """
614 return volume_uuid in self._volumes
616 def create_volume(
617 self,
618 volume_uuid,
619 size,
620 persistent=True,
621 volume_name=None,
622 high_availability=False
623 ):
624 """
625 Create a new volume on the SR.
626 :param str volume_uuid: The volume uuid to use.
627 :param int size: volume size in B.
628 :param bool persistent: If false the volume will be unavailable
629 on the next constructor call LinstorSR(...).
630 :param str volume_name: If set, this name is used in the LINSTOR
631 database instead of a generated name.
632 :param bool high_availability: If set, the volume is created in
633 the HA group.
634 :return: The current device path of the volume.
635 :rtype: str
636 """
638 self._logger('Creating LINSTOR volume {}...'.format(volume_uuid))
639 if not volume_name:
640 volume_name = self.build_volume_name(util.gen_uuid())
641 volume_properties = self._create_volume_with_properties(
642 volume_uuid,
643 volume_name,
644 size,
645 True, # place_resources
646 high_availability
647 )
649 # Volume created! Now try to find the device path.
650 try:
651 self._logger(
652 'Find device path of LINSTOR volume {}...'.format(volume_uuid)
653 )
654 device_path = self._find_device_path(volume_uuid, volume_name)
655 if persistent:
656 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
657 self._volumes.add(volume_uuid)
658 self._logger(
659 'LINSTOR volume {} created!'.format(volume_uuid)
660 )
661 return device_path
662 except Exception:
663 # There is an issue to find the path.
664 # At this point the volume has just been created, so force flag can be used.
665 self._destroy_volume(volume_uuid, force=True)
666 raise
668 def mark_volume_as_persistent(self, volume_uuid):
669 """
670 Mark volume as persistent if created with persistent=False.
671 :param str volume_uuid: The volume uuid to mark.
672 """
674 self._ensure_volume_exists(volume_uuid)
676 # Mark volume as persistent.
677 volume_properties = self._get_volume_properties(volume_uuid)
678 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
680 def destroy_volume(self, volume_uuid):
681 """
682 Destroy a volume.
683 :param str volume_uuid: The volume uuid to destroy.
684 """
686 self._ensure_volume_exists(volume_uuid)
687 self.ensure_volume_is_not_locked(volume_uuid)
689 # Mark volume as destroyed.
690 volume_properties = self._get_volume_properties(volume_uuid)
691 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
693 try:
694 self._volumes.remove(volume_uuid)
695 self._destroy_volume(volume_uuid)
696 except Exception as e:
697 raise LinstorVolumeManagerError(
698 str(e),
699 LinstorVolumeManagerError.ERR_VOLUME_DESTROY
700 )
702 def lock_volume(self, volume_uuid, locked=True):
703 """
704 Prevent modifications of the volume properties during
705 "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked
706 when used. This method is useful to attach/detach correctly a volume on
707 a slave. Without it the GC can rename a volume, in this case the old
708 volume path can be used by a slave...
709 :param str volume_uuid: The volume uuid to protect/unprotect.
710 :param bool locked: Lock/unlock the volume.
711 """
713 self._ensure_volume_exists(volume_uuid)
715 self._logger(
716 '{} volume {} as locked'.format(
717 'Mark' if locked else 'Unmark',
718 volume_uuid
719 )
720 )
722 volume_properties = self._get_volume_properties(volume_uuid)
723 if locked:
724 volume_properties[
725 self.PROP_IS_READONLY_TIMESTAMP
726 ] = str(time.time())
727 elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties:
728 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
730 def ensure_volume_is_not_locked(self, volume_uuid, timeout=None):
731 """
732 Ensure a volume is not locked. Wait if necessary.
733 :param str volume_uuid: The volume uuid to check.
734 :param int timeout: If the volume is always locked after the expiration
735 of the timeout, an exception is thrown.
736 """
737 return self.ensure_volume_list_is_not_locked([volume_uuid], timeout)
739 def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None):
740 checked = set()
741 for volume_uuid in volume_uuids:
742 if volume_uuid in self._volumes:
743 checked.add(volume_uuid)
745 if not checked:
746 return
748 waiting = False
750 volume_properties = self._get_kv_cache()
752 start = time.time()
753 while True:
754 # Can't delete in for loop, use a copy of the list.
755 remaining = checked.copy()
756 for volume_uuid in checked:
757 volume_properties.namespace = \
758 self._build_volume_namespace(volume_uuid)
759 timestamp = volume_properties.get(
760 self.PROP_IS_READONLY_TIMESTAMP
761 )
762 if timestamp is None:
763 remaining.remove(volume_uuid)
764 continue
766 now = time.time()
767 if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY:
768 self._logger(
769 'Remove readonly timestamp on {}'.format(volume_uuid)
770 )
771 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
772 remaining.remove(volume_uuid)
773 continue
775 if not waiting:
776 self._logger(
777 'Volume {} is locked, waiting...'.format(volume_uuid)
778 )
779 waiting = True
780 break
782 if not remaining:
783 break
784 checked = remaining
786 if timeout is not None and now - start > timeout:
787 raise LinstorVolumeManagerError(
788 'volume `{}` is locked and timeout has been reached'
789 .format(volume_uuid),
790 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
791 )
793 # We must wait to use the volume. After that we can modify it
794 # ONLY if the SR is locked to avoid bad reads on the slaves.
795 time.sleep(1)
796 volume_properties = self._create_kv_cache()
798 if waiting:
799 self._logger('No volume locked now!')
801 def remove_volume_if_diskless(self, volume_uuid):
802 """
803 Remove disless path from local node.
804 :param str volume_uuid: The volume uuid to remove.
805 """
807 self._ensure_volume_exists(volume_uuid)
809 volume_properties = self._get_volume_properties(volume_uuid)
810 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
812 node_name = socket.gethostname()
814 for resource in self._get_resource_cache().resources:
815 if resource.name == volume_name and resource.node_name == node_name:
816 if linstor.consts.FLAG_TIE_BREAKER in resource.flags:
817 return
818 break
820 result = self._linstor.resource_delete_if_diskless(
821 node_name=node_name, rsc_name=volume_name
822 )
823 if not linstor.Linstor.all_api_responses_no_error(result):
824 raise LinstorVolumeManagerError(
825 'Unable to delete diskless path of `{}` on node `{}`: {}'
826 .format(volume_name, node_name, ', '.join(
827 [str(x) for x in result]))
828 )
830 def introduce_volume(self, volume_uuid):
831 pass # TODO: Implement me.
833 def resize_volume(self, volume_uuid, new_size):
834 """
835 Resize a volume.
836 :param str volume_uuid: The volume uuid to resize.
837 :param int new_size: New size in B.
838 """
840 volume_name = self.get_volume_name(volume_uuid)
841 self.ensure_volume_is_not_locked(volume_uuid)
842 new_size = self.round_up_volume_size(new_size) // 1024
844 retry_count = 30
845 while True:
846 result = self._linstor.volume_dfn_modify(
847 rsc_name=volume_name,
848 volume_nr=0,
849 size=new_size
850 )
852 self._mark_resource_cache_as_dirty()
854 error_str = self._get_error_str(result)
855 if not error_str:
856 break
858 # After volume creation, DRBD volume can be unusable during many seconds.
859 # So we must retry the definition change if the device is not up to date.
860 # Often the case for thick provisioning.
861 if retry_count and error_str.find('non-UpToDate DRBD device') >= 0:
862 time.sleep(2)
863 retry_count -= 1
864 continue
866 raise LinstorVolumeManagerError(
867 'Could not resize volume `{}` from SR `{}`: {}'
868 .format(volume_uuid, self._group_name, error_str)
869 )
871 def get_volume_name(self, volume_uuid):
872 """
873 Get the name of a particular volume.
874 :param str volume_uuid: The volume uuid of the name to get.
875 :return: The volume name.
876 :rtype: str
877 """
879 self._ensure_volume_exists(volume_uuid)
880 volume_properties = self._get_volume_properties(volume_uuid)
881 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
882 if volume_name:
883 return volume_name
884 raise LinstorVolumeManagerError(
885 'Failed to get volume name of {}'.format(volume_uuid)
886 )
888 def get_volume_size(self, volume_uuid):
889 """
890 Get the size of a particular volume.
891 :param str volume_uuid: The volume uuid of the size to get.
892 :return: The volume size.
893 :rtype: int
894 """
896 volume_name = self.get_volume_name(volume_uuid)
897 dfns = self._linstor.resource_dfn_list_raise(
898 query_volume_definitions=True,
899 filter_by_resource_definitions=[volume_name]
900 ).resource_definitions
902 size = dfns[0].volume_definitions[0].size
903 if size < 0:
904 raise LinstorVolumeManagerError(
905 'Failed to get volume size of: {}'.format(volume_uuid)
906 )
907 return size * 1024
909 def set_auto_promote_timeout(self, volume_uuid, timeout):
910 """
911 Define the blocking time of open calls when a DRBD
912 is already open on another host.
913 :param str volume_uuid: The volume uuid to modify.
914 """
916 volume_name = self.get_volume_name(volume_uuid)
917 result = self._linstor.resource_dfn_modify(volume_name, {
918 'DrbdOptions/Resource/auto-promote-timeout': timeout
919 })
920 error_str = self._get_error_str(result)
921 if error_str:
922 raise LinstorVolumeManagerError(
923 'Could not change the auto promote timeout of `{}`: {}'
924 .format(volume_uuid, error_str)
925 )
927 def set_drbd_ha_properties(self, volume_name, enabled=True):
928 """
929 Set or not HA DRBD properties required by drbd-reactor and
930 by specific volumes.
931 :param str volume_name: The volume to modify.
932 :param bool enabled: Enable or disable HA properties.
933 """
935 properties = {
936 'DrbdOptions/auto-quorum': 'disabled',
937 'DrbdOptions/Resource/auto-promote': 'no',
938 'DrbdOptions/Resource/on-no-data-accessible': 'io-error',
939 'DrbdOptions/Resource/on-no-quorum': 'io-error',
940 'DrbdOptions/Resource/on-suspended-primary-outdated': 'force-secondary',
941 'DrbdOptions/Resource/quorum': 'majority'
942 }
943 if enabled:
944 result = self._linstor.resource_dfn_modify(volume_name, properties)
945 else:
946 result = self._linstor.resource_dfn_modify(volume_name, {}, delete_props=list(properties.keys()))
948 error_str = self._get_error_str(result)
949 if error_str:
950 raise LinstorVolumeManagerError(
951 'Could not modify HA DRBD properties on volume `{}`: {}'
952 .format(volume_name, error_str)
953 )
955 def get_volume_info(self, volume_uuid):
956 """
957 Get the volume info of a particular volume.
958 :param str volume_uuid: The volume uuid of the volume info to get.
959 :return: The volume info.
960 :rtype: VolumeInfo
961 """
963 volume_name = self.get_volume_name(volume_uuid)
964 return self._get_volumes_info()[volume_name]
966 def get_device_path(self, volume_uuid):
967 """
968 Get the dev path of a volume, create a diskless if necessary.
969 :param str volume_uuid: The volume uuid to get the dev path.
970 :return: The current device path of the volume.
971 :rtype: str
972 """
974 volume_name = self.get_volume_name(volume_uuid)
975 return self._find_device_path(volume_uuid, volume_name)
977 def get_volume_uuid_from_device_path(self, device_path):
978 """
979 Get the volume uuid of a device_path.
980 :param str device_path: The dev path to find the volume uuid.
981 :return: The volume uuid of the local device path.
982 :rtype: str
983 """
985 expected_volume_name = \
986 self.get_volume_name_from_device_path(device_path)
988 volume_names = self.get_volumes_with_name()
989 for volume_uuid, volume_name in volume_names.items():
990 if volume_name == expected_volume_name:
991 return volume_uuid
993 raise LinstorVolumeManagerError(
994 'Unable to find volume uuid from dev path `{}`'.format(device_path)
995 )
997 def get_volume_name_from_device_path(self, device_path):
998 """
999 Get the volume name of a device_path.
1000 :param str device_path: The dev path to find the volume name.
1001 :return: The volume name of the device path.
1002 :rtype: str
1003 """
1005 # Assume that we have a path like this:
1006 # - "/dev/drbd/by-res/xcp-volume-<UUID>/0"
1007 # - "../xcp-volume-<UUID>/0"
1008 if device_path.startswith(DRBD_BY_RES_PATH):
1009 prefix_len = len(DRBD_BY_RES_PATH)
1010 elif device_path.startswith('../'):
1011 prefix_len = 3
1012 else:
1013 raise LinstorVolumeManagerError('Unexpected device path: `{}`'.format(device_path))
1015 res_name_end = device_path.find('/', prefix_len)
1016 assert res_name_end != -1
1017 return device_path[prefix_len:res_name_end]
1019 def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False):
1020 """
1021 Change the uuid of a volume.
1022 :param str volume_uuid: The volume to modify.
1023 :param str new_volume_uuid: The new volume uuid to use.
1024 :param bool force: If true we doesn't check if volume_uuid is in the
1025 volume list. I.e. the volume can be marked as deleted but the volume
1026 can still be in the LINSTOR KV store if the deletion has failed.
1027 In specific cases like "undo" after a failed clone we must rename a bad
1028 deleted VDI.
1029 """
1031 self._logger(
1032 'Trying to update volume UUID {} to {}...'
1033 .format(volume_uuid, new_volume_uuid)
1034 )
1035 assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value'
1037 if not force:
1038 self._ensure_volume_exists(volume_uuid)
1039 self.ensure_volume_is_not_locked(volume_uuid)
1041 if new_volume_uuid in self._volumes:
1042 raise LinstorVolumeManagerError(
1043 'Volume `{}` already exists'.format(new_volume_uuid),
1044 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
1045 )
1047 volume_properties = self._get_volume_properties(volume_uuid)
1048 if volume_properties.get(self.PROP_UPDATING_UUID_SRC):
1049 raise LinstorVolumeManagerError(
1050 'Cannot update volume uuid {}: invalid state'
1051 .format(volume_uuid)
1052 )
1054 # 1. Copy in temp variables metadata and volume_name.
1055 metadata = volume_properties.get(self.PROP_METADATA)
1056 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
1058 # 2. Switch to new volume namespace.
1059 volume_properties.namespace = self._build_volume_namespace(
1060 new_volume_uuid
1061 )
1063 if list(volume_properties.items()):
1064 raise LinstorVolumeManagerError(
1065 'Cannot update volume uuid {} to {}: '
1066 .format(volume_uuid, new_volume_uuid) +
1067 'this last one is not empty'
1068 )
1070 try:
1071 # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC.
1072 # If we crash after that, the new properties can be removed
1073 # properly.
1074 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
1075 volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid
1077 # 4. Copy the properties.
1078 # Note: On new volumes, during clone for example, the metadata
1079 # may be missing. So we must test it to avoid this error:
1080 # "None has to be a str/unicode, but is <type 'NoneType'>"
1081 if metadata:
1082 volume_properties[self.PROP_METADATA] = metadata
1083 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1085 # 5. Ok!
1086 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
1087 except Exception as err:
1088 try:
1089 # Clear the new volume properties in case of failure.
1090 assert volume_properties.namespace == \
1091 self._build_volume_namespace(new_volume_uuid)
1092 volume_properties.clear()
1093 except Exception as e:
1094 self._logger(
1095 'Failed to clear new volume properties: {} (ignoring...)'
1096 .format(e)
1097 )
1098 raise LinstorVolumeManagerError(
1099 'Failed to copy volume properties: {}'.format(err)
1100 )
1102 try:
1103 # 6. After this point, it's ok we can remove the
1104 # PROP_UPDATING_UUID_SRC property and clear the src properties
1105 # without problems.
1107 # 7. Switch to old volume namespace.
1108 volume_properties.namespace = self._build_volume_namespace(
1109 volume_uuid
1110 )
1111 volume_properties.clear()
1113 # 8. Switch a last time to new volume namespace.
1114 volume_properties.namespace = self._build_volume_namespace(
1115 new_volume_uuid
1116 )
1117 volume_properties.pop(self.PROP_UPDATING_UUID_SRC)
1118 except Exception as e:
1119 raise LinstorVolumeManagerError(
1120 'Failed to clear volume properties '
1121 'after volume uuid update: {}'.format(e)
1122 )
1124 self._volumes.remove(volume_uuid)
1125 self._volumes.add(new_volume_uuid)
1127 self._logger(
1128 'UUID update succeeded of {} to {}! (properties={})'
1129 .format(
1130 volume_uuid, new_volume_uuid,
1131 self._get_filtered_properties(volume_properties)
1132 )
1133 )
1135 def update_volume_name(self, volume_uuid, volume_name):
1136 """
1137 Change the volume name of a volume.
1138 :param str volume_uuid: The volume to modify.
1139 :param str volume_name: The volume_name to use.
1140 """
1142 self._ensure_volume_exists(volume_uuid)
1143 self.ensure_volume_is_not_locked(volume_uuid)
1144 if not volume_name.startswith(self.PREFIX_VOLUME):
1145 raise LinstorVolumeManagerError(
1146 'Volume name `{}` must be start with `{}`'
1147 .format(volume_name, self.PREFIX_VOLUME)
1148 )
1150 if volume_name not in self._fetch_resource_names():
1151 raise LinstorVolumeManagerError(
1152 'Volume `{}` doesn\'t exist'.format(volume_name)
1153 )
1155 volume_properties = self._get_volume_properties(volume_uuid)
1156 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1158 def get_usage_states(self, volume_uuid):
1159 """
1160 Check if a volume is currently used.
1161 :param str volume_uuid: The volume uuid to check.
1162 :return: A dictionnary that contains states.
1163 :rtype: dict(str, bool or None)
1164 """
1166 states = {}
1168 volume_name = self.get_volume_name(volume_uuid)
1169 for resource_state in self._linstor.resource_list_raise(
1170 filter_by_resources=[volume_name]
1171 ).resource_states:
1172 states[resource_state.node_name] = resource_state.in_use
1174 return states
1176 def get_volume_openers(self, volume_uuid):
1177 """
1178 Get openers of a volume.
1179 :param str volume_uuid: The volume uuid to monitor.
1180 :return: A dictionnary that contains openers.
1181 :rtype: dict(str, obj)
1182 """
1183 return get_all_volume_openers(self.get_volume_name(volume_uuid), '0')
1185 def get_volumes_with_name(self):
1186 """
1187 Give a volume dictionnary that contains names actually owned.
1188 :return: A volume/name dict.
1189 :rtype: dict(str, str)
1190 """
1191 return self._get_volumes_by_property(self.REG_VOLUME_NAME)
1193 def get_volumes_with_info(self):
1194 """
1195 Give a volume dictionnary that contains VolumeInfos.
1196 :return: A volume/VolumeInfo dict.
1197 :rtype: dict(str, VolumeInfo)
1198 """
1200 volumes = {}
1202 all_volume_info = self._get_volumes_info()
1203 volume_names = self.get_volumes_with_name()
1204 for volume_uuid, volume_name in volume_names.items():
1205 if volume_name:
1206 volume_info = all_volume_info.get(volume_name)
1207 if volume_info:
1208 volumes[volume_uuid] = volume_info
1209 continue
1211 # Well I suppose if this volume is not available,
1212 # LINSTOR has been used directly without using this API.
1213 volumes[volume_uuid] = self.VolumeInfo('')
1215 return volumes
1217 def get_volumes_with_metadata(self):
1218 """
1219 Give a volume dictionnary that contains metadata.
1220 :return: A volume/metadata dict.
1221 :rtype: dict(str, dict)
1222 """
1224 volumes = {}
1226 metadata = self._get_volumes_by_property(self.REG_METADATA)
1227 for volume_uuid, volume_metadata in metadata.items():
1228 if volume_metadata:
1229 volume_metadata = json.loads(volume_metadata)
1230 if isinstance(volume_metadata, dict):
1231 volumes[volume_uuid] = volume_metadata
1232 continue
1233 raise LinstorVolumeManagerError(
1234 'Expected dictionary in volume metadata: {}'
1235 .format(volume_uuid)
1236 )
1238 volumes[volume_uuid] = {}
1240 return volumes
1242 def get_volume_metadata(self, volume_uuid):
1243 """
1244 Get the metadata of a volume.
1245 :return: Dictionary that contains metadata.
1246 :rtype: dict
1247 """
1249 self._ensure_volume_exists(volume_uuid)
1250 volume_properties = self._get_volume_properties(volume_uuid)
1251 metadata = volume_properties.get(self.PROP_METADATA)
1252 if metadata:
1253 metadata = json.loads(metadata)
1254 if isinstance(metadata, dict):
1255 return metadata
1256 raise LinstorVolumeManagerError(
1257 'Expected dictionary in volume metadata: {}'
1258 .format(volume_uuid)
1259 )
1260 return {}
1262 def set_volume_metadata(self, volume_uuid, metadata):
1263 """
1264 Set the metadata of a volume.
1265 :param dict metadata: Dictionary that contains metadata.
1266 """
1268 self._ensure_volume_exists(volume_uuid)
1269 self.ensure_volume_is_not_locked(volume_uuid)
1271 assert isinstance(metadata, dict)
1272 volume_properties = self._get_volume_properties(volume_uuid)
1273 volume_properties[self.PROP_METADATA] = json.dumps(metadata)
1275 def update_volume_metadata(self, volume_uuid, metadata):
1276 """
1277 Update the metadata of a volume. It modify only the given keys.
1278 It doesn't remove unreferenced key instead of set_volume_metadata.
1279 :param dict metadata: Dictionary that contains metadata.
1280 """
1282 self._ensure_volume_exists(volume_uuid)
1283 self.ensure_volume_is_not_locked(volume_uuid)
1285 assert isinstance(metadata, dict)
1286 volume_properties = self._get_volume_properties(volume_uuid)
1288 current_metadata = json.loads(
1289 volume_properties.get(self.PROP_METADATA, '{}')
1290 )
1291 if not isinstance(metadata, dict):
1292 raise LinstorVolumeManagerError(
1293 'Expected dictionary in volume metadata: {}'
1294 .format(volume_uuid)
1295 )
1297 for key, value in metadata.items():
1298 current_metadata[key] = value
1299 volume_properties[self.PROP_METADATA] = json.dumps(current_metadata)
1301 def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True):
1302 """
1303 Clone a volume. Do not copy the data, this method creates a new volume
1304 with the same size.
1305 :param str volume_uuid: The volume to clone.
1306 :param str clone_uuid: The cloned volume.
1307 :param bool persistent: If false the volume will be unavailable
1308 on the next constructor call LinstorSR(...).
1309 :return: The current device path of the cloned volume.
1310 :rtype: str
1311 """
1313 volume_name = self.get_volume_name(volume_uuid)
1314 self.ensure_volume_is_not_locked(volume_uuid)
1316 # 1. Find ideal nodes + size to use.
1317 ideal_node_names, size = self._get_volume_node_names_and_size(
1318 volume_name
1319 )
1320 if size <= 0:
1321 raise LinstorVolumeManagerError(
1322 'Invalid size of {} for volume `{}`'.format(size, volume_name)
1323 )
1325 # 2. Create clone!
1326 return self.create_volume(clone_uuid, size, persistent)
1328 def remove_resourceless_volumes(self):
1329 """
1330 Remove all volumes without valid or non-empty name
1331 (i.e. without LINSTOR resource). It's different than
1332 LinstorVolumeManager constructor that takes a `repair` param that
1333 removes volumes with `PROP_NOT_EXISTS` to 1.
1334 """
1336 resource_names = self._fetch_resource_names()
1337 for volume_uuid, volume_name in self.get_volumes_with_name().items():
1338 if not volume_name or volume_name not in resource_names:
1339 # Don't force, we can be sure of what's happening.
1340 self.destroy_volume(volume_uuid)
1342 def destroy(self):
1343 """
1344 Destroy this SR. Object should not be used after that.
1345 :param bool force: Try to destroy volumes before if true.
1346 """
1348 # 1. Ensure volume list is empty. No cost.
1349 if self._volumes:
1350 raise LinstorVolumeManagerError(
1351 'Cannot destroy LINSTOR volume manager: '
1352 'It exists remaining volumes'
1353 )
1355 # 2. Fetch ALL resource names.
1356 # This list may therefore contain volumes created outside
1357 # the scope of the driver.
1358 resource_names = self._fetch_resource_names(ignore_deleted=False)
1359 try:
1360 resource_names.remove(DATABASE_VOLUME_NAME)
1361 except KeyError:
1362 # Really strange to reach that point.
1363 # Normally we always have the database volume in the list.
1364 pass
1366 # 3. Ensure the resource name list is entirely empty...
1367 if resource_names:
1368 raise LinstorVolumeManagerError(
1369 'Cannot destroy LINSTOR volume manager: '
1370 'It exists remaining volumes (created externally or being deleted)'
1371 )
1373 # 4. Destroying...
1374 controller_is_running = self._controller_is_running()
1375 uri = 'linstor://localhost'
1376 try:
1377 if controller_is_running:
1378 self._start_controller(start=False)
1380 # 4.1. Umount LINSTOR database.
1381 self._mount_database_volume(
1382 self.build_device_path(DATABASE_VOLUME_NAME),
1383 mount=False,
1384 force=True
1385 )
1387 # 4.2. Refresh instance.
1388 self._start_controller(start=True)
1389 self._linstor = self._create_linstor_instance(
1390 uri, keep_uri_unmodified=True
1391 )
1393 # 4.3. Destroy database volume.
1394 self._destroy_resource(DATABASE_VOLUME_NAME)
1396 # 4.4. Refresh linstor connection.
1397 # Without we get this error:
1398 # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.."
1399 # Because the deletion of the databse was not seen by Linstor for some reason.
1400 # It seems a simple refresh of the Linstor connection make it aware of the deletion.
1401 self._linstor.disconnect()
1402 self._linstor.connect()
1404 # 4.5. Destroy remaining drbd nodes on hosts.
1405 # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups.
1406 # It needs to be done locally by each host so we go through the linstor-manager plugin.
1407 # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with:
1408 # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it."
1409 session = util.timeout_call(5, util.get_localAPI_session)
1410 for host_ref in session.xenapi.host.get_all():
1411 try:
1412 response = session.xenapi.host.call_plugin(
1413 host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name}
1414 )
1415 except Exception as e:
1416 util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e))
1418 # 4.6. Destroy group and storage pools.
1419 self._destroy_resource_group(self._linstor, self._group_name)
1420 self._destroy_resource_group(self._linstor, self._ha_group_name)
1421 for pool in self._get_storage_pools(force=True):
1422 self._destroy_storage_pool(
1423 self._linstor, pool.name, pool.node_name
1424 )
1425 except Exception as e:
1426 self._start_controller(start=controller_is_running)
1427 raise e
1429 try:
1430 self._start_controller(start=False)
1431 for file in os.listdir(DATABASE_PATH):
1432 if file != 'lost+found':
1433 os.remove(DATABASE_PATH + '/' + file)
1434 except Exception as e:
1435 util.SMlog(
1436 'Ignoring failure after LINSTOR SR destruction: {}'
1437 .format(e)
1438 )
1440 def find_up_to_date_diskful_nodes(self, volume_uuid):
1441 """
1442 Find all nodes that contain a specific volume using diskful disks.
1443 The disk must be up to data to be used.
1444 :param str volume_uuid: The volume to use.
1445 :return: The available nodes.
1446 :rtype: tuple(set(str), str)
1447 """
1449 volume_name = self.get_volume_name(volume_uuid)
1451 in_use_by = None
1452 node_names = set()
1454 resource_states = filter(
1455 lambda resource_state: resource_state.name == volume_name,
1456 self._get_resource_cache().resource_states
1457 )
1459 for resource_state in resource_states:
1460 volume_state = resource_state.volume_states[0]
1461 if volume_state.disk_state == 'UpToDate':
1462 node_names.add(resource_state.node_name)
1463 if resource_state.in_use:
1464 in_use_by = resource_state.node_name
1466 return (node_names, in_use_by)
1468 def invalidate_resource_cache(self):
1469 """
1470 If resources are impacted by external commands like vhdutil,
1471 it's necessary to call this function to invalidate current resource
1472 cache.
1473 """
1474 self._mark_resource_cache_as_dirty()
1476 def has_node(self, node_name):
1477 """
1478 Check if a node exists in the LINSTOR database.
1479 :rtype: bool
1480 """
1481 result = self._linstor.node_list()
1482 error_str = self._get_error_str(result)
1483 if error_str:
1484 raise LinstorVolumeManagerError(
1485 'Failed to list nodes using `{}`: {}'
1486 .format(node_name, error_str)
1487 )
1488 return bool(result[0].node(node_name))
1490 def create_node(self, node_name, ip):
1491 """
1492 Create a new node in the LINSTOR database.
1493 :param str node_name: Node name to use.
1494 :param str ip: Host IP to communicate.
1495 """
1496 result = self._linstor.node_create(
1497 node_name,
1498 linstor.consts.VAL_NODE_TYPE_CMBD,
1499 ip
1500 )
1501 errors = self._filter_errors(result)
1502 if errors:
1503 error_str = self._get_error_str(errors)
1504 raise LinstorVolumeManagerError(
1505 'Failed to create node `{}`: {}'.format(node_name, error_str)
1506 )
1508 def destroy_node(self, node_name):
1509 """
1510 Destroy a node in the LINSTOR database.
1511 :param str node_name: Node name to remove.
1512 """
1513 result = self._linstor.node_delete(node_name)
1514 errors = self._filter_errors(result)
1515 if errors:
1516 error_str = self._get_error_str(errors)
1517 raise LinstorVolumeManagerError(
1518 'Failed to destroy node `{}`: {}'.format(node_name, error_str)
1519 )
1521 def create_node_interface(self, node_name, name, ip):
1522 """
1523 Create a new node interface in the LINSTOR database.
1524 :param str node_name: Node name of the interface to use.
1525 :param str name: Interface to create.
1526 :param str ip: IP of the interface.
1527 """
1528 result = self._linstor.netinterface_create(node_name, name, ip)
1529 errors = self._filter_errors(result)
1530 if errors:
1531 error_str = self._get_error_str(errors)
1532 raise LinstorVolumeManagerError(
1533 'Failed to create node interface on `{}`: {}'.format(node_name, error_str)
1534 )
1536 def destroy_node_interface(self, node_name, name):
1537 """
1538 Destroy a node interface in the LINSTOR database.
1539 :param str node_name: Node name of the interface to remove.
1540 :param str name: Interface to remove.
1541 """
1543 if name == 'default':
1544 raise LinstorVolumeManagerError(
1545 'Unable to delete the default interface of a node!'
1546 )
1548 result = self._linstor.netinterface_delete(node_name, name)
1549 errors = self._filter_errors(result)
1550 if errors:
1551 error_str = self._get_error_str(errors)
1552 raise LinstorVolumeManagerError(
1553 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str)
1554 )
1556 def modify_node_interface(self, node_name, name, ip):
1557 """
1558 Modify a node interface in the LINSTOR database. Create it if necessary.
1559 :param str node_name: Node name of the interface to use.
1560 :param str name: Interface to modify or create.
1561 :param str ip: IP of the interface.
1562 """
1563 result = self._linstor.netinterface_create(node_name, name, ip)
1564 errors = self._filter_errors(result)
1565 if not errors:
1566 return
1568 if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]):
1569 result = self._linstor.netinterface_modify(node_name, name, ip)
1570 errors = self._filter_errors(result)
1571 if not errors:
1572 return
1574 error_str = self._get_error_str(errors)
1575 raise LinstorVolumeManagerError(
1576 'Unable to modify interface on `{}`: {}'.format(node_name, error_str)
1577 )
1579 def list_node_interfaces(self, node_name):
1580 """
1581 List all node interfaces.
1582 :param str node_name: Node name to use to list interfaces.
1583 :rtype: list
1584 :
1585 """
1586 result = self._linstor.net_interface_list(node_name)
1587 if not result:
1588 raise LinstorVolumeManagerError(
1589 'Unable to list interfaces on `{}`: no list received'.format(node_name)
1590 )
1592 interfaces = {}
1593 for interface in result:
1594 interface = interface._rest_data
1595 interfaces[interface['name']] = {
1596 'address': interface['address'],
1597 'active': interface['is_active']
1598 }
1599 return interfaces
1601 def get_node_preferred_interface(self, node_name):
1602 """
1603 Get the preferred interface used by a node.
1604 :param str node_name: Node name of the interface to get.
1605 :rtype: str
1606 """
1607 try:
1608 nodes = self._linstor.node_list_raise([node_name]).nodes
1609 if nodes:
1610 properties = nodes[0].props
1611 return properties.get('PrefNic', 'default')
1612 return nodes
1613 except Exception as e:
1614 raise LinstorVolumeManagerError(
1615 'Failed to get preferred interface: `{}`'.format(e)
1616 )
1618 def set_node_preferred_interface(self, node_name, name):
1619 """
1620 Set the preferred interface to use on a node.
1621 :param str node_name: Node name of the interface.
1622 :param str name: Preferred interface to use.
1623 """
1624 result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name})
1625 errors = self._filter_errors(result)
1626 if errors:
1627 error_str = self._get_error_str(errors)
1628 raise LinstorVolumeManagerError(
1629 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str)
1630 )
1632 def get_nodes_info(self):
1633 """
1634 Get all nodes + statuses, used or not by the pool.
1635 :rtype: dict(str, dict)
1636 """
1637 try:
1638 nodes = {}
1639 for node in self._linstor.node_list_raise().nodes:
1640 nodes[node.name] = node.connection_status
1641 return nodes
1642 except Exception as e:
1643 raise LinstorVolumeManagerError(
1644 'Failed to get all nodes: `{}`'.format(e)
1645 )
1647 def get_storage_pools_info(self):
1648 """
1649 Give all storage pools of current group name.
1650 :rtype: dict(str, list)
1651 """
1652 storage_pools = {}
1653 for pool in self._get_storage_pools(force=True):
1654 if pool.node_name not in storage_pools:
1655 storage_pools[pool.node_name] = []
1657 size = -1
1658 capacity = -1
1660 space = pool.free_space
1661 if space:
1662 size = space.free_capacity
1663 if size < 0:
1664 size = -1
1665 else:
1666 size *= 1024
1667 capacity = space.total_capacity
1668 if capacity <= 0:
1669 capacity = -1
1670 else:
1671 capacity *= 1024
1673 storage_pools[pool.node_name].append({
1674 'name': pool.name,
1675 'linstor-uuid': pool.uuid,
1676 'free-size': size,
1677 'capacity': capacity
1678 })
1680 return storage_pools
1682 def get_resources_info(self):
1683 """
1684 Give all resources of current group name.
1685 :rtype: dict(str, list)
1686 """
1687 resources = {}
1688 resource_list = self._get_resource_cache()
1689 volume_names = self.get_volumes_with_name()
1690 for resource in resource_list.resources:
1691 if resource.name not in resources:
1692 resources[resource.name] = { 'nodes': {}, 'uuid': '' }
1693 resource_nodes = resources[resource.name]['nodes']
1695 resource_nodes[resource.node_name] = {
1696 'volumes': [],
1697 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags,
1698 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags
1699 }
1700 resource_volumes = resource_nodes[resource.node_name]['volumes']
1702 for volume in resource.volumes:
1703 # We ignore diskless pools of the form "DfltDisklessStorPool".
1704 if volume.storage_pool_name != self._group_name:
1705 continue
1707 usable_size = volume.usable_size
1708 if usable_size < 0:
1709 usable_size = -1
1710 else:
1711 usable_size *= 1024
1713 allocated_size = volume.allocated_size
1714 if allocated_size < 0:
1715 allocated_size = -1
1716 else:
1717 allocated_size *= 1024
1719 resource_volumes.append({
1720 'storage-pool-name': volume.storage_pool_name,
1721 'linstor-uuid': volume.uuid,
1722 'number': volume.number,
1723 'device-path': volume.device_path,
1724 'usable-size': usable_size,
1725 'allocated-size': allocated_size
1726 })
1728 for resource_state in resource_list.resource_states:
1729 resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name]
1730 resource['in-use'] = resource_state.in_use
1732 volumes = resource['volumes']
1733 for volume_state in resource_state.volume_states:
1734 volume = next((x for x in volumes if x['number'] == volume_state.number), None)
1735 if volume:
1736 volume['disk-state'] = volume_state.disk_state
1738 for volume_uuid, volume_name in volume_names.items():
1739 resource = resources.get(volume_name)
1740 if resource:
1741 resource['uuid'] = volume_uuid
1743 return resources
1745 def get_database_path(self):
1746 """
1747 Get the database path.
1748 :return: The current database path.
1749 :rtype: str
1750 """
1751 return self._request_database_path(self._linstor)
1753 @classmethod
1754 def get_all_group_names(cls, base_name):
1755 """
1756 Get all group names. I.e. list of current group + HA.
1757 :param str base_name: The SR group_name to use.
1758 :return: List of group names.
1759 :rtype: list
1760 """
1761 return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)]
1763 @classmethod
1764 def create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__):
1765 """
1766 Create a new SR on the given nodes.
1767 :param str group_name: The SR group_name to use.
1768 :param set(str) ips: Node ips.
1769 :param int redundancy: How many copy of volumes should we store?
1770 :param bool thin_provisioning: Use thin or thick provisioning.
1771 :param function logger: Function to log messages.
1772 :return: A new LinstorSr instance.
1773 :rtype: LinstorSr
1774 """
1776 try:
1777 cls._start_controller(start=True)
1778 sr = cls._create_sr(group_name, ips, redundancy, thin_provisioning, logger)
1779 finally:
1780 # Controller must be stopped and volume unmounted because
1781 # it is the role of the drbd-reactor daemon to do the right
1782 # actions.
1783 cls._start_controller(start=False)
1784 cls._mount_volume(
1785 cls.build_device_path(DATABASE_VOLUME_NAME),
1786 DATABASE_PATH,
1787 mount=False
1788 )
1789 return sr
1791 @classmethod
1792 def _create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__):
1793 # 1. Check if SR already exists.
1794 uri = 'linstor://localhost'
1796 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1798 node_names = list(ips.keys())
1799 for node_name, ip in ips.items():
1800 while True:
1801 # Try to create node.
1802 result = lin.node_create(
1803 node_name,
1804 linstor.consts.VAL_NODE_TYPE_CMBD,
1805 ip
1806 )
1808 errors = cls._filter_errors(result)
1809 if cls._check_errors(
1810 errors, [linstor.consts.FAIL_EXISTS_NODE]
1811 ):
1812 # If it already exists, remove, then recreate.
1813 result = lin.node_delete(node_name)
1814 error_str = cls._get_error_str(result)
1815 if error_str:
1816 raise LinstorVolumeManagerError(
1817 'Failed to remove old node `{}`: {}'
1818 .format(node_name, error_str)
1819 )
1820 elif not errors:
1821 break # Created!
1822 else:
1823 raise LinstorVolumeManagerError(
1824 'Failed to create node `{}` with ip `{}`: {}'.format(
1825 node_name, ip, cls._get_error_str(errors)
1826 )
1827 )
1829 driver_pool_name = group_name
1830 base_group_name = group_name
1831 group_name = cls._build_group_name(group_name)
1832 storage_pool_name = group_name
1833 pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools
1834 if pools:
1835 existing_node_names = [pool.node_name for pool in pools]
1836 raise LinstorVolumeManagerError(
1837 'Unable to create SR `{}`. It already exists on node(s): {}'
1838 .format(group_name, existing_node_names)
1839 )
1841 if lin.resource_group_list_raise(
1842 cls.get_all_group_names(base_group_name)
1843 ).resource_groups:
1844 if not lin.resource_dfn_list_raise().resource_definitions:
1845 backup_path = cls._create_database_backup_path()
1846 logger(
1847 'Group name already exists `{}` without LVs. '
1848 'Ignoring and moving the config files in {}'.format(group_name, backup_path)
1849 )
1850 cls._move_files(DATABASE_PATH, backup_path)
1851 else:
1852 raise LinstorVolumeManagerError(
1853 'Unable to create SR `{}`: The group name already exists'
1854 .format(group_name)
1855 )
1857 if thin_provisioning:
1858 driver_pool_parts = driver_pool_name.split('/')
1859 if not len(driver_pool_parts) == 2:
1860 raise LinstorVolumeManagerError(
1861 'Invalid group name using thin provisioning. '
1862 'Expected format: \'VG/LV`\''
1863 )
1865 # 2. Create storage pool on each node + resource group.
1866 reg_volume_group_not_found = re.compile(
1867 ".*Volume group '.*' not found$"
1868 )
1870 i = 0
1871 try:
1872 # 2.a. Create storage pools.
1873 storage_pool_count = 0
1874 while i < len(node_names):
1875 node_name = node_names[i]
1877 result = lin.storage_pool_create(
1878 node_name=node_name,
1879 storage_pool_name=storage_pool_name,
1880 storage_driver='LVM_THIN' if thin_provisioning else 'LVM',
1881 driver_pool_name=driver_pool_name
1882 )
1884 errors = linstor.Linstor.filter_api_call_response_errors(
1885 result
1886 )
1887 if errors:
1888 if len(errors) == 1 and errors[0].is_error(
1889 linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR
1890 ) and reg_volume_group_not_found.match(errors[0].message):
1891 logger(
1892 'Volume group `{}` not found on `{}`. Ignoring...'
1893 .format(group_name, node_name)
1894 )
1895 cls._destroy_storage_pool(lin, storage_pool_name, node_name)
1896 else:
1897 error_str = cls._get_error_str(result)
1898 raise LinstorVolumeManagerError(
1899 'Could not create SP `{}` on node `{}`: {}'
1900 .format(group_name, node_name, error_str)
1901 )
1902 else:
1903 storage_pool_count += 1
1904 i += 1
1906 if not storage_pool_count:
1907 raise LinstorVolumeManagerError(
1908 'Unable to create SR `{}`: No VG group found'.format(
1909 group_name,
1910 )
1911 )
1913 # 2.b. Create resource groups.
1914 ha_group_name = cls._build_ha_group_name(base_group_name)
1915 cls._create_resource_group(
1916 lin,
1917 group_name,
1918 storage_pool_name,
1919 redundancy,
1920 True
1921 )
1922 cls._create_resource_group(
1923 lin,
1924 ha_group_name,
1925 storage_pool_name,
1926 3,
1927 True
1928 )
1930 # 3. Create the LINSTOR database volume and mount it.
1931 try:
1932 logger('Creating database volume...')
1933 volume_path = cls._create_database_volume(
1934 lin, ha_group_name, storage_pool_name, node_names, redundancy
1935 )
1936 except LinstorVolumeManagerError as e:
1937 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
1938 logger('Destroying database volume after creation fail...')
1939 cls._force_destroy_database_volume(lin, group_name)
1940 raise
1942 try:
1943 logger('Mounting database volume...')
1945 # First we must disable the controller to move safely the
1946 # LINSTOR config.
1947 cls._start_controller(start=False)
1949 cls._mount_database_volume(volume_path)
1950 except Exception as e:
1951 # Ensure we are connected because controller has been
1952 # restarted during mount call.
1953 logger('Destroying database volume after mount fail...')
1955 try:
1956 cls._start_controller(start=True)
1957 except Exception:
1958 pass
1960 lin = cls._create_linstor_instance(
1961 uri, keep_uri_unmodified=True
1962 )
1963 cls._force_destroy_database_volume(lin, group_name)
1964 raise e
1966 cls._start_controller(start=True)
1967 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1969 # 4. Remove storage pools/resource/volume group in the case of errors.
1970 except Exception as e:
1971 logger('Destroying resource group and storage pools after fail...')
1972 try:
1973 cls._destroy_resource_group(lin, group_name)
1974 cls._destroy_resource_group(lin, ha_group_name)
1975 except Exception as e2:
1976 logger('Failed to destroy resource group: {}'.format(e2))
1977 pass
1978 j = 0
1979 i = min(i, len(node_names) - 1)
1980 while j <= i:
1981 try:
1982 cls._destroy_storage_pool(lin, storage_pool_name, node_names[j])
1983 except Exception as e2:
1984 logger('Failed to destroy resource group: {}'.format(e2))
1985 pass
1986 j += 1
1987 raise e
1989 # 5. Return new instance.
1990 instance = cls.__new__(cls)
1991 instance._linstor = lin
1992 instance._logger = logger
1993 instance._redundancy = redundancy
1994 instance._base_group_name = base_group_name
1995 instance._group_name = group_name
1996 instance._volumes = set()
1997 instance._storage_pools_time = 0
1998 instance._kv_cache = instance._create_kv_cache()
1999 instance._resource_cache = None
2000 instance._resource_cache_dirty = True
2001 instance._volume_info_cache = None
2002 instance._volume_info_cache_dirty = True
2003 return instance
2005 @classmethod
2006 def build_device_path(cls, volume_name):
2007 """
2008 Build a device path given a volume name.
2009 :param str volume_name: The volume name to use.
2010 :return: A valid or not device path.
2011 :rtype: str
2012 """
2014 return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name)
2016 @classmethod
2017 def build_volume_name(cls, base_name):
2018 """
2019 Build a volume name given a base name (i.e. a UUID).
2020 :param str base_name: The volume name to use.
2021 :return: A valid or not device path.
2022 :rtype: str
2023 """
2024 return '{}{}'.format(cls.PREFIX_VOLUME, base_name)
2026 @classmethod
2027 def round_up_volume_size(cls, volume_size):
2028 """
2029 Align volume size on higher multiple of BLOCK_SIZE.
2030 :param int volume_size: The volume size to align.
2031 :return: An aligned volume size.
2032 :rtype: int
2033 """
2034 return round_up(volume_size, cls.BLOCK_SIZE)
2036 @classmethod
2037 def round_down_volume_size(cls, volume_size):
2038 """
2039 Align volume size on lower multiple of BLOCK_SIZE.
2040 :param int volume_size: The volume size to align.
2041 :return: An aligned volume size.
2042 :rtype: int
2043 """
2044 return round_down(volume_size, cls.BLOCK_SIZE)
2046 # --------------------------------------------------------------------------
2047 # Private helpers.
2048 # --------------------------------------------------------------------------
2050 def _create_kv_cache(self):
2051 self._kv_cache = self._create_linstor_kv('/')
2052 self._kv_cache_dirty = False
2053 return self._kv_cache
2055 def _get_kv_cache(self):
2056 if self._kv_cache_dirty:
2057 self._kv_cache = self._create_kv_cache()
2058 return self._kv_cache
2060 def _create_resource_cache(self):
2061 self._resource_cache = self._linstor.resource_list_raise()
2062 self._resource_cache_dirty = False
2063 return self._resource_cache
2065 def _get_resource_cache(self):
2066 if self._resource_cache_dirty:
2067 self._resource_cache = self._create_resource_cache()
2068 return self._resource_cache
2070 def _mark_resource_cache_as_dirty(self):
2071 self._resource_cache_dirty = True
2072 self._volume_info_cache_dirty = True
2074 # --------------------------------------------------------------------------
2076 def _ensure_volume_exists(self, volume_uuid):
2077 if volume_uuid not in self._volumes:
2078 raise LinstorVolumeManagerError(
2079 'volume `{}` doesn\'t exist'.format(volume_uuid),
2080 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
2081 )
2083 def _find_best_size_candidates(self):
2084 result = self._linstor.resource_group_qmvs(self._group_name)
2085 error_str = self._get_error_str(result)
2086 if error_str:
2087 raise LinstorVolumeManagerError(
2088 'Failed to get max volume size allowed of SR `{}`: {}'.format(
2089 self._group_name,
2090 error_str
2091 )
2092 )
2093 return result[0].candidates
2095 def _fetch_resource_names(self, ignore_deleted=True):
2096 resource_names = set()
2097 dfns = self._linstor.resource_dfn_list_raise().resource_definitions
2098 for dfn in dfns:
2099 if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and (
2100 ignore_deleted or
2101 linstor.consts.FLAG_DELETE not in dfn.flags
2102 ):
2103 resource_names.add(dfn.name)
2104 return resource_names
2106 def _get_volumes_info(self, volume_name=None):
2107 all_volume_info = {}
2109 if not self._volume_info_cache_dirty:
2110 return self._volume_info_cache
2112 def process_resource(resource):
2113 if resource.name not in all_volume_info:
2114 current = all_volume_info[resource.name] = self.VolumeInfo(
2115 resource.name
2116 )
2117 else:
2118 current = all_volume_info[resource.name]
2120 if linstor.consts.FLAG_DISKLESS not in resource.flags:
2121 current.diskful.append(resource.node_name)
2123 for volume in resource.volumes:
2124 # We ignore diskless pools of the form "DfltDisklessStorPool".
2125 if volume.storage_pool_name != self._group_name:
2126 continue
2127 # Only fetch first volume.
2128 if volume.number != 0:
2129 continue
2131 allocated_size = volume.allocated_size
2132 if allocated_size > current.allocated_size:
2133 current.allocated_size = allocated_size
2135 usable_size = volume.usable_size
2136 if usable_size > 0 and (
2137 usable_size < current.virtual_size or
2138 not current.virtual_size
2139 ):
2140 current.virtual_size = usable_size
2142 try:
2143 for resource in self._get_resource_cache().resources:
2144 process_resource(resource)
2145 for volume in all_volume_info.values():
2146 if volume.allocated_size <= 0:
2147 raise LinstorVolumeManagerError('Failed to get allocated size of `{}`'.format(resource.name))
2149 if volume.virtual_size <= 0:
2150 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(volume.name))
2152 volume.allocated_size *= 1024
2153 volume.virtual_size *= 1024
2154 except LinstorVolumeManagerError:
2155 self._mark_resource_cache_as_dirty()
2156 raise
2158 self._volume_info_cache_dirty = False
2159 self._volume_info_cache = all_volume_info
2161 return all_volume_info
2163 def _get_volume_node_names_and_size(self, volume_name):
2164 node_names = set()
2165 size = -1
2166 for resource in self._linstor.resource_list_raise(
2167 filter_by_resources=[volume_name]
2168 ).resources:
2169 for volume in resource.volumes:
2170 # We ignore diskless pools of the form "DfltDisklessStorPool".
2171 if volume.storage_pool_name != self._group_name:
2172 continue
2174 node_names.add(resource.node_name)
2176 usable_size = volume.usable_size
2177 if usable_size <= 0:
2178 continue
2180 if size < 0:
2181 size = usable_size
2182 else:
2183 size = min(size, usable_size)
2185 if size <= 0:
2186 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(resource.name))
2188 return (node_names, size * 1024)
2190 def _compute_size(self, attr):
2191 capacity = 0
2192 for pool in self._get_storage_pools(force=True):
2193 space = pool.free_space
2194 if space:
2195 size = getattr(space, attr)
2196 if size < 0:
2197 raise LinstorVolumeManagerError(
2198 'Failed to get pool {} attr of `{}`'
2199 .format(attr, pool.node_name)
2200 )
2201 capacity += size
2202 return capacity * 1024
2204 def _get_node_names(self):
2205 node_names = set()
2206 for pool in self._get_storage_pools():
2207 node_names.add(pool.node_name)
2208 return node_names
2210 def _get_storage_pools(self, force=False):
2211 cur_time = time.time()
2212 elsaped_time = cur_time - self._storage_pools_time
2214 if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL:
2215 self._storage_pools = self._linstor.storage_pool_list_raise(
2216 filter_by_stor_pools=[self._group_name]
2217 ).storage_pools
2218 self._storage_pools_time = time.time()
2220 return self._storage_pools
2222 def _create_volume(
2223 self,
2224 volume_uuid,
2225 volume_name,
2226 size,
2227 place_resources,
2228 high_availability
2229 ):
2230 size = self.round_up_volume_size(size)
2231 self._mark_resource_cache_as_dirty()
2233 group_name = self._ha_group_name if high_availability else self._group_name
2234 def create_definition():
2235 first_attempt = True
2236 while True:
2237 try:
2238 self._check_volume_creation_errors(
2239 self._linstor.resource_group_spawn(
2240 rsc_grp_name=group_name,
2241 rsc_dfn_name=volume_name,
2242 vlm_sizes=['{}B'.format(size)],
2243 definitions_only=True
2244 ),
2245 volume_uuid,
2246 self._group_name
2247 )
2248 break
2249 except LinstorVolumeManagerError as e:
2250 if (
2251 not first_attempt or
2252 not high_availability or
2253 e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
2254 ):
2255 raise
2257 first_attempt = False
2258 self._create_resource_group(
2259 self._linstor,
2260 group_name,
2261 self._group_name,
2262 3,
2263 True
2264 )
2266 self._configure_volume_peer_slots(self._linstor, volume_name)
2268 def clean():
2269 try:
2270 self._destroy_volume(volume_uuid, force=True, preserve_properties=True)
2271 except Exception as e:
2272 self._logger(
2273 'Unable to destroy volume {} after creation fail: {}'
2274 .format(volume_uuid, e)
2275 )
2277 def create():
2278 try:
2279 create_definition()
2280 if place_resources:
2281 # Basic case when we use the default redundancy of the group.
2282 self._check_volume_creation_errors(
2283 self._linstor.resource_auto_place(
2284 rsc_name=volume_name,
2285 place_count=self._redundancy,
2286 diskless_on_remaining=False
2287 ),
2288 volume_uuid,
2289 self._group_name
2290 )
2291 except LinstorVolumeManagerError as e:
2292 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2293 clean()
2294 raise
2295 except Exception:
2296 clean()
2297 raise
2299 util.retry(create, maxretry=5)
2301 def _create_volume_with_properties(
2302 self,
2303 volume_uuid,
2304 volume_name,
2305 size,
2306 place_resources,
2307 high_availability
2308 ):
2309 if self.check_volume_exists(volume_uuid):
2310 raise LinstorVolumeManagerError(
2311 'Could not create volume `{}` from SR `{}`, it already exists'
2312 .format(volume_uuid, self._group_name) + ' in properties',
2313 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
2314 )
2316 if volume_name in self._fetch_resource_names():
2317 raise LinstorVolumeManagerError(
2318 'Could not create volume `{}` from SR `{}`, '.format(
2319 volume_uuid, self._group_name
2320 ) + 'resource of the same name already exists in LINSTOR'
2321 )
2323 # I am paranoid.
2324 volume_properties = self._get_volume_properties(volume_uuid)
2325 if (volume_properties.get(self.PROP_NOT_EXISTS) is not None):
2326 raise LinstorVolumeManagerError(
2327 'Could not create volume `{}`, '.format(volume_uuid) +
2328 'properties already exist'
2329 )
2331 try:
2332 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING
2333 volume_properties[self.PROP_VOLUME_NAME] = volume_name
2335 self._create_volume(
2336 volume_uuid,
2337 volume_name,
2338 size,
2339 place_resources,
2340 high_availability
2341 )
2343 assert volume_properties.namespace == \
2344 self._build_volume_namespace(volume_uuid)
2345 return volume_properties
2346 except LinstorVolumeManagerError as e:
2347 # Do not destroy existing resource!
2348 # In theory we can't get this error because we check this event
2349 # before the `self._create_volume` case.
2350 # It can only happen if the same volume uuid is used in the same
2351 # call in another host.
2352 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2353 self._destroy_volume(volume_uuid, force=True)
2354 raise
2356 def _find_device_path(self, volume_uuid, volume_name):
2357 current_device_path = self._request_device_path(
2358 volume_uuid, volume_name, activate=True
2359 )
2361 # We use realpath here to get the /dev/drbd<id> path instead of
2362 # /dev/drbd/by-res/<resource_name>.
2363 expected_device_path = self.build_device_path(volume_name)
2364 util.wait_for_path(expected_device_path, 5)
2366 device_realpath = os.path.realpath(expected_device_path)
2367 if current_device_path != device_realpath:
2368 raise LinstorVolumeManagerError(
2369 'Invalid path, current={}, expected={} (realpath={})'
2370 .format(
2371 current_device_path,
2372 expected_device_path,
2373 device_realpath
2374 )
2375 )
2376 return expected_device_path
2378 def _request_device_path(self, volume_uuid, volume_name, activate=False):
2379 node_name = socket.gethostname()
2381 resource = next(filter(
2382 lambda resource: resource.node_name == node_name and
2383 resource.name == volume_name,
2384 self._get_resource_cache().resources
2385 ), None)
2387 if not resource:
2388 if activate:
2389 self._mark_resource_cache_as_dirty()
2390 self._activate_device_path(
2391 self._linstor, node_name, volume_name
2392 )
2393 return self._request_device_path(volume_uuid, volume_name)
2394 raise LinstorVolumeManagerError(
2395 'Empty dev path for `{}`, but definition "seems" to exist'
2396 .format(volume_uuid)
2397 )
2398 # Contains a path of the /dev/drbd<id> form.
2399 return resource.volumes[0].device_path
2401 def _destroy_resource(self, resource_name, force=False):
2402 result = self._linstor.resource_dfn_delete(resource_name)
2403 error_str = self._get_error_str(result)
2404 if not error_str:
2405 self._mark_resource_cache_as_dirty()
2406 return
2408 if not force:
2409 self._mark_resource_cache_as_dirty()
2410 raise LinstorVolumeManagerError(
2411 'Could not destroy resource `{}` from SR `{}`: {}'
2412 .format(resource_name, self._group_name, error_str)
2413 )
2415 # If force is used, ensure there is no opener.
2416 all_openers = get_all_volume_openers(resource_name, '0')
2417 for openers in all_openers.values():
2418 if openers:
2419 self._mark_resource_cache_as_dirty()
2420 raise LinstorVolumeManagerError(
2421 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)'
2422 .format(resource_name, self._group_name, error_str, all_openers)
2423 )
2425 # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue?
2426 resource_states = filter(
2427 lambda resource_state: resource_state.name == resource_name,
2428 self._get_resource_cache().resource_states
2429 )
2431 # Mark only after computation of states.
2432 self._mark_resource_cache_as_dirty()
2434 for resource_state in resource_states:
2435 volume_state = resource_state.volume_states[0]
2436 if resource_state.in_use:
2437 demote_drbd_resource(resource_state.node_name, resource_name)
2438 break
2439 self._destroy_resource(resource_name)
2441 def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False):
2442 volume_properties = self._get_volume_properties(volume_uuid)
2443 try:
2444 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
2445 if volume_name in self._fetch_resource_names():
2446 self._destroy_resource(volume_name, force)
2448 # Assume this call is atomic.
2449 if not preserve_properties:
2450 volume_properties.clear()
2451 except Exception as e:
2452 raise LinstorVolumeManagerError(
2453 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e)
2454 )
2456 def _build_volumes(self, repair):
2457 properties = self._kv_cache
2458 resource_names = self._fetch_resource_names()
2460 self._volumes = set()
2462 updating_uuid_volumes = self._get_volumes_by_property(
2463 self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False
2464 )
2465 if updating_uuid_volumes and not repair:
2466 raise LinstorVolumeManagerError(
2467 'Cannot build LINSTOR volume list: '
2468 'It exists invalid "updating uuid volumes", repair is required'
2469 )
2471 existing_volumes = self._get_volumes_by_property(
2472 self.REG_NOT_EXISTS, ignore_inexisting_volumes=False
2473 )
2474 for volume_uuid, not_exists in existing_volumes.items():
2475 properties.namespace = self._build_volume_namespace(volume_uuid)
2477 src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC)
2478 if src_uuid:
2479 self._logger(
2480 'Ignoring volume during manager initialization with prop '
2481 ' PROP_UPDATING_UUID_SRC: {} (properties={})'
2482 .format(
2483 volume_uuid,
2484 self._get_filtered_properties(properties)
2485 )
2486 )
2487 continue
2489 # Insert volume in list if the volume exists. Or if the volume
2490 # is being created and a slave wants to use it (repair = False).
2491 #
2492 # If we are on the master and if repair is True and state is
2493 # Creating, it's probably a bug or crash: the creation process has
2494 # been stopped.
2495 if not_exists == self.STATE_EXISTS or (
2496 not repair and not_exists == self.STATE_CREATING
2497 ):
2498 self._volumes.add(volume_uuid)
2499 continue
2501 if not repair:
2502 self._logger(
2503 'Ignoring bad volume during manager initialization: {} '
2504 '(properties={})'.format(
2505 volume_uuid,
2506 self._get_filtered_properties(properties)
2507 )
2508 )
2509 continue
2511 # Remove bad volume.
2512 try:
2513 self._logger(
2514 'Removing bad volume during manager initialization: {} '
2515 '(properties={})'.format(
2516 volume_uuid,
2517 self._get_filtered_properties(properties)
2518 )
2519 )
2520 volume_name = properties.get(self.PROP_VOLUME_NAME)
2522 # Little optimization, don't call `self._destroy_volume`,
2523 # we already have resource name list.
2524 if volume_name in resource_names:
2525 self._destroy_resource(volume_name, force=True)
2527 # Assume this call is atomic.
2528 properties.clear()
2529 except Exception as e:
2530 # Do not raise, we don't want to block user action.
2531 self._logger(
2532 'Cannot clean volume {}: {}'.format(volume_uuid, e)
2533 )
2535 # The volume can't be removed, maybe it's still in use,
2536 # in this case rename it with the "DELETED_" prefix.
2537 # This prefix is mandatory if it exists a snap transaction to
2538 # rollback because the original VDI UUID can try to be renamed
2539 # with the UUID we are trying to delete...
2540 if not volume_uuid.startswith('DELETED_'):
2541 self.update_volume_uuid(
2542 volume_uuid, 'DELETED_' + volume_uuid, force=True
2543 )
2545 for dest_uuid, src_uuid in updating_uuid_volumes.items():
2546 dest_namespace = self._build_volume_namespace(dest_uuid)
2548 properties.namespace = dest_namespace
2549 if int(properties.get(self.PROP_NOT_EXISTS)):
2550 properties.clear()
2551 continue
2553 properties.namespace = self._build_volume_namespace(src_uuid)
2554 properties.clear()
2556 properties.namespace = dest_namespace
2557 properties.pop(self.PROP_UPDATING_UUID_SRC)
2559 if src_uuid in self._volumes:
2560 self._volumes.remove(src_uuid)
2561 self._volumes.add(dest_uuid)
2563 def _get_sr_properties(self):
2564 return self._create_linstor_kv(self._build_sr_namespace())
2566 def _get_volumes_by_property(
2567 self, reg_prop, ignore_inexisting_volumes=True
2568 ):
2569 base_properties = self._get_kv_cache()
2570 base_properties.namespace = self._build_volume_namespace()
2572 volume_properties = {}
2573 for volume_uuid in self._volumes:
2574 volume_properties[volume_uuid] = ''
2576 for key, value in base_properties.items():
2577 res = reg_prop.match(key)
2578 if res:
2579 volume_uuid = res.groups()[0]
2580 if not ignore_inexisting_volumes or \
2581 volume_uuid in self._volumes:
2582 volume_properties[volume_uuid] = value
2584 return volume_properties
2586 def _create_linstor_kv(self, namespace):
2587 return linstor.KV(
2588 self._group_name,
2589 uri=self._linstor.controller_host(),
2590 namespace=namespace
2591 )
2593 def _get_volume_properties(self, volume_uuid):
2594 properties = self._get_kv_cache()
2595 properties.namespace = self._build_volume_namespace(volume_uuid)
2596 return properties
2598 @classmethod
2599 def _build_sr_namespace(cls):
2600 return '/{}/'.format(cls.NAMESPACE_SR)
2602 @classmethod
2603 def _build_volume_namespace(cls, volume_uuid=None):
2604 # Return a path to all volumes if `volume_uuid` is not given.
2605 if volume_uuid is None:
2606 return '/{}/'.format(cls.NAMESPACE_VOLUME)
2607 return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid)
2609 @classmethod
2610 def _get_error_str(cls, result):
2611 return ', '.join([
2612 err.message for err in cls._filter_errors(result)
2613 ])
2615 @classmethod
2616 def _create_linstor_instance(
2617 cls, uri, keep_uri_unmodified=False, attempt_count=30
2618 ):
2619 retry = False
2621 def connect(uri):
2622 if not uri:
2623 uri = get_controller_uri()
2624 if not uri:
2625 raise LinstorVolumeManagerError(
2626 'Unable to find controller uri...'
2627 )
2628 instance = linstor.Linstor(uri, keep_alive=True)
2629 instance.connect()
2630 return instance
2632 try:
2633 return connect(uri)
2634 except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError):
2635 pass
2637 if not keep_uri_unmodified:
2638 uri = None
2640 return util.retry(
2641 lambda: connect(uri),
2642 maxretry=attempt_count,
2643 period=1,
2644 exceptions=[
2645 linstor.errors.LinstorNetworkError,
2646 LinstorVolumeManagerError
2647 ]
2648 )
2650 @classmethod
2651 def _configure_volume_peer_slots(cls, lin, volume_name):
2652 result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3)
2653 error_str = cls._get_error_str(result)
2654 if error_str:
2655 raise LinstorVolumeManagerError(
2656 'Could not configure volume peer slots of {}: {}'
2657 .format(volume_name, error_str)
2658 )
2660 @classmethod
2661 def _activate_device_path(cls, lin, node_name, volume_name):
2662 result = lin.resource_make_available(node_name, volume_name, diskful=False)
2663 if linstor.Linstor.all_api_responses_no_error(result):
2664 return
2665 errors = linstor.Linstor.filter_api_call_response_errors(result)
2666 if len(errors) == 1 and errors[0].is_error(
2667 linstor.consts.FAIL_EXISTS_RSC
2668 ):
2669 return
2671 raise LinstorVolumeManagerError(
2672 'Unable to activate device path of `{}` on node `{}`: {}'
2673 .format(volume_name, node_name, ', '.join(
2674 [str(x) for x in result]))
2675 )
2677 @classmethod
2678 def _request_database_path(cls, lin, activate=False):
2679 node_name = socket.gethostname()
2681 try:
2682 resource = next(filter(
2683 lambda resource: resource.node_name == node_name and
2684 resource.name == DATABASE_VOLUME_NAME,
2685 lin.resource_list_raise().resources
2686 ), None)
2687 except Exception as e:
2688 raise LinstorVolumeManagerError(
2689 'Unable to fetch database resource: {}'
2690 .format(e)
2691 )
2693 if not resource:
2694 if activate:
2695 cls._activate_device_path(
2696 lin, node_name, DATABASE_VOLUME_NAME
2697 )
2698 return cls._request_database_path(
2699 DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME
2700 )
2701 raise LinstorVolumeManagerError(
2702 'Empty dev path for `{}`, but definition "seems" to exist'
2703 .format(DATABASE_PATH)
2704 )
2705 # Contains a path of the /dev/drbd<id> form.
2706 return resource.volumes[0].device_path
2708 @classmethod
2709 def _create_database_volume(
2710 cls, lin, group_name, storage_pool_name, node_names, redundancy
2711 ):
2712 try:
2713 dfns = lin.resource_dfn_list_raise().resource_definitions
2714 except Exception as e:
2715 raise LinstorVolumeManagerError(
2716 'Unable to get definitions during database creation: {}'
2717 .format(e)
2718 )
2720 if dfns:
2721 raise LinstorVolumeManagerError(
2722 'Could not create volume `{}` from SR `{}`, '.format(
2723 DATABASE_VOLUME_NAME, group_name
2724 ) + 'LINSTOR volume list must be empty.'
2725 )
2727 # Workaround to use thin lvm. Without this line an error is returned:
2728 # "Not enough available nodes"
2729 # I don't understand why but this command protect against this bug.
2730 try:
2731 pools = lin.storage_pool_list_raise(
2732 filter_by_stor_pools=[storage_pool_name]
2733 )
2734 except Exception as e:
2735 raise LinstorVolumeManagerError(
2736 'Failed to get storage pool list before database creation: {}'
2737 .format(e)
2738 )
2740 # Ensure we have a correct list of storage pools.
2741 assert pools.storage_pools # We must have at least one storage pool!
2742 nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools))
2743 for node_name in nodes_with_pool:
2744 assert node_name in node_names
2745 util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool))
2747 # Create the database definition.
2748 size = cls.round_up_volume_size(DATABASE_SIZE)
2749 cls._check_volume_creation_errors(lin.resource_group_spawn(
2750 rsc_grp_name=group_name,
2751 rsc_dfn_name=DATABASE_VOLUME_NAME,
2752 vlm_sizes=['{}B'.format(size)],
2753 definitions_only=True
2754 ), DATABASE_VOLUME_NAME, group_name)
2755 cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME)
2757 # Create real resources on the first nodes.
2758 resources = []
2760 diskful_nodes = []
2761 diskless_nodes = []
2762 for node_name in node_names:
2763 if node_name in nodes_with_pool:
2764 diskful_nodes.append(node_name)
2765 else:
2766 diskless_nodes.append(node_name)
2768 assert diskful_nodes
2769 for node_name in diskful_nodes[:redundancy]:
2770 util.SMlog('Create database diskful on {}'.format(node_name))
2771 resources.append(linstor.ResourceData(
2772 node_name=node_name,
2773 rsc_name=DATABASE_VOLUME_NAME,
2774 storage_pool=storage_pool_name
2775 ))
2776 # Create diskless resources on the remaining set.
2777 for node_name in diskful_nodes[redundancy:] + diskless_nodes:
2778 util.SMlog('Create database diskless on {}'.format(node_name))
2779 resources.append(linstor.ResourceData(
2780 node_name=node_name,
2781 rsc_name=DATABASE_VOLUME_NAME,
2782 diskless=True
2783 ))
2785 result = lin.resource_create(resources)
2786 error_str = cls._get_error_str(result)
2787 if error_str:
2788 raise LinstorVolumeManagerError(
2789 'Could not create database volume from SR `{}`: {}'.format(
2790 group_name, error_str
2791 )
2792 )
2794 # Create database and ensure path exists locally and
2795 # on replicated devices.
2796 current_device_path = cls._request_database_path(lin, activate=True)
2798 # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be
2799 # plugged.
2800 for node_name in node_names:
2801 cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME)
2803 # We use realpath here to get the /dev/drbd<id> path instead of
2804 # /dev/drbd/by-res/<resource_name>.
2805 expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME)
2806 util.wait_for_path(expected_device_path, 5)
2808 device_realpath = os.path.realpath(expected_device_path)
2809 if current_device_path != device_realpath:
2810 raise LinstorVolumeManagerError(
2811 'Invalid path, current={}, expected={} (realpath={})'
2812 .format(
2813 current_device_path,
2814 expected_device_path,
2815 device_realpath
2816 )
2817 )
2819 try:
2820 util.retry(
2821 lambda: util.pread2([DATABASE_MKFS, expected_device_path]),
2822 maxretry=5
2823 )
2824 except Exception as e:
2825 raise LinstorVolumeManagerError(
2826 'Failed to execute {} on database volume: {}'
2827 .format(DATABASE_MKFS, e)
2828 )
2830 return expected_device_path
2832 @classmethod
2833 def _destroy_database_volume(cls, lin, group_name):
2834 error_str = cls._get_error_str(
2835 lin.resource_dfn_delete(DATABASE_VOLUME_NAME)
2836 )
2837 if error_str:
2838 raise LinstorVolumeManagerError(
2839 'Could not destroy resource `{}` from SR `{}`: {}'
2840 .format(DATABASE_VOLUME_NAME, group_name, error_str)
2841 )
2843 @classmethod
2844 def _mount_database_volume(cls, volume_path, mount=True, force=False):
2845 try:
2846 # 1. Create a backup config folder.
2847 database_not_empty = bool(os.listdir(DATABASE_PATH))
2848 backup_path = cls._create_database_backup_path()
2850 # 2. Move the config in the mounted volume.
2851 if database_not_empty:
2852 cls._move_files(DATABASE_PATH, backup_path)
2854 cls._mount_volume(volume_path, DATABASE_PATH, mount)
2856 if database_not_empty:
2857 cls._move_files(backup_path, DATABASE_PATH, force)
2859 # 3. Remove useless backup directory.
2860 try:
2861 os.rmdir(backup_path)
2862 except Exception as e:
2863 raise LinstorVolumeManagerError(
2864 'Failed to remove backup path {} of LINSTOR config: {}'
2865 .format(backup_path, e)
2866 )
2867 except Exception as e:
2868 def force_exec(fn):
2869 try:
2870 fn()
2871 except Exception:
2872 pass
2874 if mount == cls._is_mounted(DATABASE_PATH):
2875 force_exec(lambda: cls._move_files(
2876 DATABASE_PATH, backup_path
2877 ))
2878 force_exec(lambda: cls._mount_volume(
2879 volume_path, DATABASE_PATH, not mount
2880 ))
2882 if mount != cls._is_mounted(DATABASE_PATH):
2883 force_exec(lambda: cls._move_files(
2884 backup_path, DATABASE_PATH
2885 ))
2887 force_exec(lambda: os.rmdir(backup_path))
2888 raise e
2890 @classmethod
2891 def _force_destroy_database_volume(cls, lin, group_name):
2892 try:
2893 cls._destroy_database_volume(lin, group_name)
2894 except Exception:
2895 pass
2897 @classmethod
2898 def _destroy_storage_pool(cls, lin, group_name, node_name):
2899 def destroy():
2900 result = lin.storage_pool_delete(node_name, group_name)
2901 errors = cls._filter_errors(result)
2902 if cls._check_errors(errors, [
2903 linstor.consts.FAIL_NOT_FOUND_STOR_POOL,
2904 linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN
2905 ]):
2906 return
2908 if errors:
2909 raise LinstorVolumeManagerError(
2910 'Failed to destroy SP `{}` on node `{}`: {}'.format(
2911 group_name,
2912 node_name,
2913 cls._get_error_str(errors)
2914 )
2915 )
2917 # We must retry to avoid errors like:
2918 # "can not be deleted as volumes / snapshot-volumes are still using it"
2919 # after LINSTOR database volume destruction.
2920 return util.retry(destroy, maxretry=10)
2922 @classmethod
2923 def _create_resource_group(
2924 cls,
2925 lin,
2926 group_name,
2927 storage_pool_name,
2928 redundancy,
2929 destroy_old_group
2930 ):
2931 rg_creation_attempt = 0
2932 while True:
2933 result = lin.resource_group_create(
2934 name=group_name,
2935 place_count=redundancy,
2936 storage_pool=storage_pool_name,
2937 diskless_on_remaining=False
2938 )
2939 error_str = cls._get_error_str(result)
2940 if not error_str:
2941 break
2943 errors = cls._filter_errors(result)
2944 if destroy_old_group and cls._check_errors(errors, [
2945 linstor.consts.FAIL_EXISTS_RSC_GRP
2946 ]):
2947 rg_creation_attempt += 1
2948 if rg_creation_attempt < 2:
2949 try:
2950 cls._destroy_resource_group(lin, group_name)
2951 except Exception as e:
2952 error_str = 'Failed to destroy old and empty RG: {}'.format(e)
2953 else:
2954 continue
2956 raise LinstorVolumeManagerError(
2957 'Could not create RG `{}`: {}'.format(
2958 group_name, error_str
2959 )
2960 )
2962 result = lin.volume_group_create(group_name)
2963 error_str = cls._get_error_str(result)
2964 if error_str:
2965 raise LinstorVolumeManagerError(
2966 'Could not create VG `{}`: {}'.format(
2967 group_name, error_str
2968 )
2969 )
2971 @classmethod
2972 def _destroy_resource_group(cls, lin, group_name):
2973 def destroy():
2974 result = lin.resource_group_delete(group_name)
2975 errors = cls._filter_errors(result)
2976 if cls._check_errors(errors, [
2977 linstor.consts.FAIL_NOT_FOUND_RSC_GRP
2978 ]):
2979 return
2981 if errors:
2982 raise LinstorVolumeManagerError(
2983 'Failed to destroy RG `{}`: {}'
2984 .format(group_name, cls._get_error_str(errors))
2985 )
2987 return util.retry(destroy, maxretry=10)
2989 @classmethod
2990 def _build_group_name(cls, base_name):
2991 # If thin provisioning is used we have a path like this:
2992 # `VG/LV`. "/" is not accepted by LINSTOR.
2993 return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_'))
2995 # Used to store important data in a HA context,
2996 # i.e. a replication count of 3.
2997 @classmethod
2998 def _build_ha_group_name(cls, base_name):
2999 return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_'))
3001 @classmethod
3002 def _check_volume_creation_errors(cls, result, volume_uuid, group_name):
3003 errors = cls._filter_errors(result)
3004 if cls._check_errors(errors, [
3005 linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN
3006 ]):
3007 raise LinstorVolumeManagerError(
3008 'Failed to create volume `{}` from SR `{}`, it already exists'
3009 .format(volume_uuid, group_name),
3010 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
3011 )
3013 if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]):
3014 raise LinstorVolumeManagerError(
3015 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist'
3016 .format(volume_uuid, group_name),
3017 LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
3018 )
3020 if errors:
3021 raise LinstorVolumeManagerError(
3022 'Failed to create volume `{}` from SR `{}`: {}'.format(
3023 volume_uuid,
3024 group_name,
3025 cls._get_error_str(errors)
3026 )
3027 )
3029 @classmethod
3030 def _move_files(cls, src_dir, dest_dir, force=False):
3031 def listdir(dir):
3032 ignored = ['lost+found']
3033 return [file for file in os.listdir(dir) if file not in ignored]
3035 try:
3036 if not force:
3037 files = listdir(dest_dir)
3038 if files:
3039 raise LinstorVolumeManagerError(
3040 'Cannot move files from {} to {} because destination '
3041 'contains: {}'.format(src_dir, dest_dir, files)
3042 )
3043 except LinstorVolumeManagerError:
3044 raise
3045 except Exception as e:
3046 raise LinstorVolumeManagerError(
3047 'Cannot list dir {}: {}'.format(dest_dir, e)
3048 )
3050 try:
3051 for file in listdir(src_dir):
3052 try:
3053 dest_file = os.path.join(dest_dir, file)
3054 if not force and os.path.exists(dest_file):
3055 raise LinstorVolumeManagerError(
3056 'Cannot move {} because it already exists in the '
3057 'destination'.format(file)
3058 )
3059 shutil.move(os.path.join(src_dir, file), dest_file)
3060 except LinstorVolumeManagerError:
3061 raise
3062 except Exception as e:
3063 raise LinstorVolumeManagerError(
3064 'Cannot move {}: {}'.format(file, e)
3065 )
3066 except Exception as e:
3067 if not force:
3068 try:
3069 cls._move_files(dest_dir, src_dir, force=True)
3070 except Exception:
3071 pass
3073 raise LinstorVolumeManagerError(
3074 'Failed to move files from {} to {}: {}'.format(
3075 src_dir, dest_dir, e
3076 )
3077 )
3079 @staticmethod
3080 def _create_database_backup_path():
3081 path = DATABASE_PATH + '-' + str(uuid.uuid4())
3082 try:
3083 os.mkdir(path)
3084 return path
3085 except Exception as e:
3086 raise LinstorVolumeManagerError(
3087 'Failed to create backup path {} of LINSTOR config: {}'
3088 .format(path, e)
3089 )
3091 @staticmethod
3092 def _get_filtered_properties(properties):
3093 return dict(properties.items())
3095 @staticmethod
3096 def _filter_errors(result):
3097 return [
3098 err for err in result
3099 if hasattr(err, 'is_error') and err.is_error()
3100 ]
3102 @staticmethod
3103 def _check_errors(result, codes):
3104 for err in result:
3105 for code in codes:
3106 if err.is_error(code):
3107 return True
3108 return False
3110 @classmethod
3111 def _controller_is_running(cls):
3112 return cls._service_is_running('linstor-controller')
3114 @classmethod
3115 def _start_controller(cls, start=True):
3116 return cls._start_service('linstor-controller', start)
3118 @staticmethod
3119 def _start_service(name, start=True):
3120 action = 'start' if start else 'stop'
3121 (ret, out, err) = util.doexec([
3122 'systemctl', action, name
3123 ])
3124 if ret != 0:
3125 raise LinstorVolumeManagerError(
3126 'Failed to {} {}: {} {}'
3127 .format(action, name, out, err)
3128 )
3130 @staticmethod
3131 def _service_is_running(name):
3132 (ret, out, err) = util.doexec([
3133 'systemctl', 'is-active', '--quiet', name
3134 ])
3135 return not ret
3137 @staticmethod
3138 def _is_mounted(mountpoint):
3139 (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint])
3140 return ret == 0
3142 @classmethod
3143 def _mount_volume(cls, volume_path, mountpoint, mount=True):
3144 if mount:
3145 try:
3146 util.pread(['mount', volume_path, mountpoint])
3147 except Exception as e:
3148 raise LinstorVolumeManagerError(
3149 'Failed to mount volume {} on {}: {}'
3150 .format(volume_path, mountpoint, e)
3151 )
3152 else:
3153 try:
3154 if cls._is_mounted(mountpoint):
3155 util.pread(['umount', mountpoint])
3156 except Exception as e:
3157 raise LinstorVolumeManagerError(
3158 'Failed to umount volume {} on {}: {}'
3159 .format(volume_path, mountpoint, e)
3160 )
3163# ==============================================================================
3165# Check if a path is a DRBD resource and log the process name/pid
3166# that opened it.
3167def log_drbd_openers(path):
3168 # Ignore if it's not a symlink to DRBD resource.
3169 if not path.startswith(DRBD_BY_RES_PATH):
3170 return
3172 # Compute resource name.
3173 res_name_end = path.find('/', len(DRBD_BY_RES_PATH))
3174 if res_name_end == -1:
3175 return
3176 res_name = path[len(DRBD_BY_RES_PATH):res_name_end]
3178 volume_end = path.rfind('/')
3179 if volume_end == res_name_end:
3180 return
3181 volume = path[volume_end + 1:]
3183 try:
3184 # Ensure path is a DRBD.
3185 drbd_path = os.path.realpath(path)
3186 stats = os.stat(drbd_path)
3187 if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147:
3188 return
3190 # Find where the device is open.
3191 (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name])
3192 if ret != 0:
3193 util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format(
3194 res_name, stderr
3195 ))
3196 return
3198 # Is it a local device?
3199 if stdout.startswith('{} role:Primary'.format(res_name)):
3200 util.SMlog(
3201 'DRBD resource `{}` is open on local host: {}'
3202 .format(path, get_local_volume_openers(res_name, volume))
3203 )
3204 return
3206 # Is it a remote device?
3207 util.SMlog(
3208 'DRBD resource `{}` is open on hosts: {}'
3209 .format(path, get_all_volume_openers(res_name, volume))
3210 )
3211 except Exception as e:
3212 util.SMlog(
3213 'Got exception while trying to determine where DRBD resource ' +
3214 '`{}` is open: {}'.format(path, e)
3215 )