Coverage for drivers/linstorvolumemanager.py : 10%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python3
2#
3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16#
18from sm_typing import override
20import errno
21import json
22import linstor
23import os.path
24import re
25import shutil
26import socket
27import stat
28import time
29import util
30import uuid
32# Persistent prefix to add to RAW persistent volumes.
33PERSISTENT_PREFIX = 'xcp-persistent-'
35# Contains the data of the "/var/lib/linstor" directory.
36DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database'
37DATABASE_SIZE = 1 << 30 # 1GB.
38DATABASE_PATH = '/var/lib/linstor'
39DATABASE_MKFS = 'mkfs.ext4'
41REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary")
42REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$')
44DRBD_BY_RES_PATH = '/dev/drbd/by-res/'
46PLUGIN = 'linstor-manager'
49# ==============================================================================
51def get_local_volume_openers(resource_name, volume):
52 if not resource_name or volume is None:
53 raise Exception('Cannot get DRBD openers without resource name and/or volume.')
55 path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format(
56 resource_name, volume
57 )
59 with open(path, 'r') as openers:
60 # Not a big cost, so read all lines directly.
61 lines = openers.readlines()
63 result = {}
65 opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)')
66 for line in lines:
67 match = opener_re.match(line)
68 assert match
70 groups = match.groups()
71 process_name = groups[0]
72 pid = groups[1]
73 open_duration_ms = groups[2]
74 result[pid] = {
75 'process-name': process_name,
76 'open-duration': open_duration_ms
77 }
79 return json.dumps(result)
81def get_all_volume_openers(resource_name, volume):
82 PLUGIN_CMD = 'getDrbdOpeners'
84 volume = str(volume)
85 openers = {}
87 # Make sure this call never stucks because this function can be called
88 # during HA init and in this case we can wait forever.
89 session = util.timeout_call(10, util.get_localAPI_session)
91 hosts = session.xenapi.host.get_all_records()
92 for host_ref, host_record in hosts.items():
93 node_name = host_record['hostname']
94 try:
95 if not session.xenapi.host_metrics.get_record(
96 host_record['metrics']
97 )['live']:
98 # Ensure we call plugin on online hosts only.
99 continue
101 openers[node_name] = json.loads(
102 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {
103 'resourceName': resource_name,
104 'volume': volume
105 })
106 )
107 except Exception as e:
108 util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format(
109 resource_name, node_name, e
110 ))
112 return openers
115# ==============================================================================
117def round_up(value, divisor):
118 assert divisor
119 divisor = int(divisor)
120 return ((int(value) + divisor - 1) // divisor) * divisor
123def round_down(value, divisor):
124 assert divisor
125 value = int(value)
126 return value - (value % int(divisor))
129# ==============================================================================
131def get_remote_host_ip(node_name):
132 (ret, stdout, stderr) = util.doexec([
133 'drbdsetup', 'show', DATABASE_VOLUME_NAME, '--json'
134 ])
135 if ret != 0:
136 return
138 try:
139 conf = json.loads(stdout)
140 if not conf:
141 return
143 for connection in conf[0]['connections']:
144 if connection['net']['_name'] == node_name:
145 value = connection['path']['_remote_host']
146 res = REG_DRBDSETUP_IP.match(value)
147 if res:
148 return res.groups()[0]
149 break
150 except Exception:
151 pass
154def _get_controller_uri():
155 PLUGIN_CMD = 'hasControllerRunning'
157 # Try to find controller using drbdadm.
158 (ret, stdout, stderr) = util.doexec([
159 'drbdadm', 'status', DATABASE_VOLUME_NAME
160 ])
161 if ret == 0:
162 # If we are here, the database device exists locally.
164 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
165 # Nice case, we have the controller running on this local host.
166 return 'linstor://localhost'
168 # Try to find the host using DRBD connections.
169 res = REG_DRBDADM_PRIMARY.search(stdout)
170 if res:
171 node_name = res.groups()[0]
172 ip = get_remote_host_ip(node_name)
173 if ip:
174 return 'linstor://' + ip
176 # Worst case: we use many hosts in the pool (>= 4), so we can't find the
177 # primary using drbdadm because we don't have all connections to the
178 # replicated volume. `drbdadm status xcp-persistent-database` returns
179 # 3 connections by default.
180 try:
181 session = util.timeout_call(10, util.get_localAPI_session)
183 for host_ref, host_record in session.xenapi.host.get_all_records().items():
184 node_name = host_record['hostname']
185 try:
186 if util.strtobool(
187 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {})
188 ):
189 return 'linstor://' + host_record['address']
190 except Exception as e:
191 # Can throw and exception if a host is offline. So catch it.
192 util.SMlog('Unable to search controller on `{}`: {}'.format(
193 node_name, e
194 ))
195 except:
196 # Not found, maybe we are trying to create the SR...
197 pass
199def get_controller_uri():
200 retries = 0
201 while True:
202 uri = _get_controller_uri()
203 if uri:
204 return uri
206 retries += 1
207 if retries >= 10:
208 break
209 time.sleep(1)
212def get_controller_node_name():
213 PLUGIN_CMD = 'hasControllerRunning'
215 (ret, stdout, stderr) = util.doexec([
216 'drbdadm', 'status', DATABASE_VOLUME_NAME
217 ])
219 if ret == 0:
220 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
221 return 'localhost'
223 res = REG_DRBDADM_PRIMARY.search(stdout)
224 if res:
225 return res.groups()[0]
227 session = util.timeout_call(5, util.get_localAPI_session)
229 for host_ref, host_record in session.xenapi.host.get_all_records().items():
230 node_name = host_record['hostname']
231 try:
232 if not session.xenapi.host_metrics.get_record(
233 host_record['metrics']
234 )['live']:
235 continue
237 if util.strtobool(session.xenapi.host.call_plugin(
238 host_ref, PLUGIN, PLUGIN_CMD, {}
239 )):
240 return node_name
241 except Exception as e:
242 util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format(
243 node_name, e
244 ))
247def demote_drbd_resource(node_name, resource_name):
248 PLUGIN_CMD = 'demoteDrbdResource'
250 session = util.timeout_call(5, util.get_localAPI_session)
252 for host_ref, host_record in session.xenapi.host.get_all_records().items():
253 if host_record['hostname'] != node_name:
254 continue
256 try:
257 session.xenapi.host.call_plugin(
258 host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name}
259 )
260 except Exception as e:
261 util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format(
262 resource_name, node_name, e
263 ))
264 raise Exception(
265 'Can\'t demote resource `{}`, unable to find node `{}`'
266 .format(resource_name, node_name)
267 )
269# ==============================================================================
271class LinstorVolumeManagerError(Exception):
272 ERR_GENERIC = 0,
273 ERR_VOLUME_EXISTS = 1,
274 ERR_VOLUME_NOT_EXISTS = 2,
275 ERR_VOLUME_DESTROY = 3,
276 ERR_GROUP_NOT_EXISTS = 4
278 def __init__(self, message, code=ERR_GENERIC):
279 super(LinstorVolumeManagerError, self).__init__(message)
280 self._code = code
282 @property
283 def code(self):
284 return self._code
287# ==============================================================================
289# Note:
290# If a storage pool is not accessible after a network change:
291# linstor node interface modify <NODE> default --ip <IP>
294class LinstorVolumeManager(object):
295 """
296 API to manager LINSTOR volumes in XCP-ng.
297 A volume in this context is a physical part of the storage layer.
298 """
300 __slots__ = (
301 '_linstor', '_logger', '_redundancy',
302 '_base_group_name', '_group_name', '_ha_group_name',
303 '_volumes', '_storage_pools', '_storage_pools_time',
304 '_kv_cache', '_resource_cache', '_volume_info_cache',
305 '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty'
306 )
308 DEV_ROOT_PATH = DRBD_BY_RES_PATH
310 # Default sector size.
311 BLOCK_SIZE = 512
313 # List of volume properties.
314 PROP_METADATA = 'metadata'
315 PROP_NOT_EXISTS = 'not-exists'
316 PROP_VOLUME_NAME = 'volume-name'
317 PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp'
319 # A volume can only be locked for a limited duration.
320 # The goal is to give enough time to slaves to execute some actions on
321 # a device before an UUID update or a coalesce for example.
322 # Expiration is expressed in seconds.
323 LOCKED_EXPIRATION_DELAY = 1 * 60
325 # Used when volume uuid is being updated.
326 PROP_UPDATING_UUID_SRC = 'updating-uuid-src'
328 # States of property PROP_NOT_EXISTS.
329 STATE_EXISTS = '0'
330 STATE_NOT_EXISTS = '1'
331 STATE_CREATING = '2'
333 # Property namespaces.
334 NAMESPACE_SR = 'xcp/sr'
335 NAMESPACE_VOLUME = 'xcp/volume'
337 # Regex to match properties.
338 REG_PROP = '^([^/]+)/{}$'
340 REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA))
341 REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS))
342 REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME))
343 REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC))
345 # Prefixes of SR/VOLUME in the LINSTOR DB.
346 # A LINSTOR (resource, group, ...) name cannot start with a number.
347 # So we add a prefix behind our SR/VOLUME uuids.
348 PREFIX_SR = 'xcp-sr-'
349 PREFIX_HA = 'xcp-ha-'
350 PREFIX_VOLUME = 'xcp-volume-'
352 # Limit request number when storage pool info is asked, we fetch
353 # the current pool status after N elapsed seconds.
354 STORAGE_POOLS_FETCH_INTERVAL = 15
356 @staticmethod
357 def default_logger(*args):
358 print(args)
360 # --------------------------------------------------------------------------
361 # API.
362 # --------------------------------------------------------------------------
364 class VolumeInfo(object):
365 __slots__ = (
366 'name',
367 'allocated_size', # Allocated size, place count is not used.
368 'virtual_size', # Total virtual available size of this volume
369 # (i.e. the user size at creation).
370 'diskful' # Array of nodes that have a diskful volume.
371 )
373 def __init__(self, name):
374 self.name = name
375 self.allocated_size = 0
376 self.virtual_size = 0
377 self.diskful = []
379 @override
380 def __repr__(self) -> str:
381 return 'VolumeInfo("{}", {}, {}, {})'.format(
382 self.name, self.allocated_size, self.virtual_size,
383 self.diskful
384 )
386 # --------------------------------------------------------------------------
388 def __init__(
389 self, uri, group_name, repair=False, logger=default_logger.__func__,
390 attempt_count=30
391 ):
392 """
393 Create a new LinstorVolumeManager object.
394 :param str uri: URI to communicate with the LINSTOR controller.
395 :param str group_name: The SR goup name to use.
396 :param bool repair: If true we try to remove bad volumes due to a crash
397 or unexpected behavior.
398 :param function logger: Function to log messages.
399 :param int attempt_count: Number of attempts to join the controller.
400 """
402 self._linstor = self._create_linstor_instance(
403 uri, attempt_count=attempt_count
404 )
405 self._base_group_name = group_name
407 # Ensure group exists.
408 group_name = self._build_group_name(group_name)
409 groups = self._linstor.resource_group_list_raise([group_name]).resource_groups
410 if not groups:
411 raise LinstorVolumeManagerError(
412 'Unable to find `{}` Linstor SR'.format(group_name)
413 )
415 # Ok. ;)
416 self._logger = logger
417 self._redundancy = groups[0].select_filter.place_count
418 self._group_name = group_name
419 self._ha_group_name = self._build_ha_group_name(self._base_group_name)
420 self._volumes = set()
421 self._storage_pools_time = 0
423 # To increate performance and limit request count to LINSTOR services,
424 # we use caches.
425 self._kv_cache = self._create_kv_cache()
426 self._resource_cache = None
427 self._resource_cache_dirty = True
428 self._volume_info_cache = None
429 self._volume_info_cache_dirty = True
430 self._build_volumes(repair=repair)
432 @property
433 def group_name(self):
434 """
435 Give the used group name.
436 :return: The group name.
437 :rtype: str
438 """
439 return self._base_group_name
441 @property
442 def redundancy(self):
443 """
444 Give the used redundancy.
445 :return: The redundancy.
446 :rtype: int
447 """
448 return self._redundancy
450 @property
451 def volumes(self):
452 """
453 Give the volumes uuid set.
454 :return: The volumes uuid set.
455 :rtype: set(str)
456 """
457 return self._volumes
459 @property
460 def max_volume_size_allowed(self):
461 """
462 Give the max volume size currently available in B.
463 :return: The current size.
464 :rtype: int
465 """
467 candidates = self._find_best_size_candidates()
468 if not candidates:
469 raise LinstorVolumeManagerError(
470 'Failed to get max volume size allowed'
471 )
473 size = candidates[0].max_volume_size
474 if size < 0:
475 raise LinstorVolumeManagerError(
476 'Invalid max volume size allowed given: {}'.format(size)
477 )
478 return self.round_down_volume_size(size * 1024)
480 @property
481 def physical_size(self):
482 """
483 Give the total physical size of the SR.
484 :return: The physical size.
485 :rtype: int
486 """
487 return self._compute_size('total_capacity')
489 @property
490 def physical_free_size(self):
491 """
492 Give the total free physical size of the SR.
493 :return: The physical free size.
494 :rtype: int
495 """
496 return self._compute_size('free_capacity')
498 @property
499 def allocated_volume_size(self):
500 """
501 Give the allocated size for all volumes. The place count is not
502 used here. When thick lvm is used, the size for one volume should
503 be equal to the virtual volume size. With thin lvm, the size is equal
504 or lower to the volume size.
505 :return: The allocated size of all volumes.
506 :rtype: int
507 """
509 # Paths: /res_name/vol_number/size
510 sizes = {}
512 for resource in self._get_resource_cache().resources:
513 if resource.name not in sizes:
514 current = sizes[resource.name] = {}
515 else:
516 current = sizes[resource.name]
518 for volume in resource.volumes:
519 # We ignore diskless pools of the form "DfltDisklessStorPool".
520 if volume.storage_pool_name != self._group_name:
521 continue
523 allocated_size = max(volume.allocated_size, 0)
524 current_allocated_size = current.get(volume.number) or -1
525 if allocated_size > current_allocated_size:
526 current[volume.number] = allocated_size
528 total_size = 0
529 for volumes in sizes.values():
530 for size in volumes.values():
531 total_size += size
533 return total_size * 1024
535 def get_min_physical_size(self):
536 """
537 Give the minimum physical size of the SR.
538 I.e. the size of the smallest disk + the number of pools.
539 :return: The physical min size.
540 :rtype: tuple(int, int)
541 """
542 size = None
543 pool_count = 0
544 for pool in self._get_storage_pools(force=True):
545 space = pool.free_space
546 if space:
547 pool_count += 1
548 current_size = space.total_capacity
549 if current_size < 0:
550 raise LinstorVolumeManagerError(
551 'Failed to get pool total_capacity attr of `{}`'
552 .format(pool.node_name)
553 )
554 if size is None or current_size < size:
555 size = current_size
556 return (pool_count, (size or 0) * 1024)
558 @property
559 def metadata(self):
560 """
561 Get the metadata of the SR.
562 :return: Dictionary that contains metadata.
563 :rtype: dict(str, dict)
564 """
566 sr_properties = self._get_sr_properties()
567 metadata = sr_properties.get(self.PROP_METADATA)
568 if metadata is not None:
569 metadata = json.loads(metadata)
570 if isinstance(metadata, dict):
571 return metadata
572 raise LinstorVolumeManagerError(
573 'Expected dictionary in SR metadata: {}'.format(
574 self._group_name
575 )
576 )
578 return {}
580 @metadata.setter
581 def metadata(self, metadata):
582 """
583 Set the metadata of the SR.
584 :param dict metadata: Dictionary that contains metadata.
585 """
587 assert isinstance(metadata, dict)
588 sr_properties = self._get_sr_properties()
589 sr_properties[self.PROP_METADATA] = json.dumps(metadata)
591 @property
592 def disconnected_hosts(self):
593 """
594 Get the list of disconnected hosts.
595 :return: Set that contains disconnected hosts.
596 :rtype: set(str)
597 """
599 disconnected_hosts = set()
600 for pool in self._get_storage_pools():
601 for report in pool.reports:
602 if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \
603 linstor.consts.WARN_NOT_CONNECTED:
604 disconnected_hosts.add(pool.node_name)
605 break
606 return disconnected_hosts
608 def check_volume_exists(self, volume_uuid):
609 """
610 Check if a volume exists in the SR.
611 :return: True if volume exists.
612 :rtype: bool
613 """
614 return volume_uuid in self._volumes
616 def create_volume(
617 self,
618 volume_uuid,
619 size,
620 persistent=True,
621 volume_name=None,
622 high_availability=False
623 ):
624 """
625 Create a new volume on the SR.
626 :param str volume_uuid: The volume uuid to use.
627 :param int size: volume size in B.
628 :param bool persistent: If false the volume will be unavailable
629 on the next constructor call LinstorSR(...).
630 :param str volume_name: If set, this name is used in the LINSTOR
631 database instead of a generated name.
632 :param bool high_availability: If set, the volume is created in
633 the HA group.
634 :return: The current device path of the volume.
635 :rtype: str
636 """
638 self._logger('Creating LINSTOR volume {}...'.format(volume_uuid))
639 if not volume_name:
640 volume_name = self.build_volume_name(util.gen_uuid())
641 volume_properties = self._create_volume_with_properties(
642 volume_uuid,
643 volume_name,
644 size,
645 True, # place_resources
646 high_availability
647 )
649 # Volume created! Now try to find the device path.
650 try:
651 self._logger(
652 'Find device path of LINSTOR volume {}...'.format(volume_uuid)
653 )
654 device_path = self._find_device_path(volume_uuid, volume_name)
655 if persistent:
656 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
657 self._volumes.add(volume_uuid)
658 self._logger(
659 'LINSTOR volume {} created!'.format(volume_uuid)
660 )
661 return device_path
662 except Exception:
663 # There is an issue to find the path.
664 # At this point the volume has just been created, so force flag can be used.
665 self._destroy_volume(volume_uuid, force=True)
666 raise
668 def mark_volume_as_persistent(self, volume_uuid):
669 """
670 Mark volume as persistent if created with persistent=False.
671 :param str volume_uuid: The volume uuid to mark.
672 """
674 self._ensure_volume_exists(volume_uuid)
676 # Mark volume as persistent.
677 volume_properties = self._get_volume_properties(volume_uuid)
678 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
680 def destroy_volume(self, volume_uuid):
681 """
682 Destroy a volume.
683 :param str volume_uuid: The volume uuid to destroy.
684 """
686 self._ensure_volume_exists(volume_uuid)
687 self.ensure_volume_is_not_locked(volume_uuid)
689 # Mark volume as destroyed.
690 volume_properties = self._get_volume_properties(volume_uuid)
691 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
693 try:
694 self._volumes.remove(volume_uuid)
695 self._destroy_volume(volume_uuid)
696 except Exception as e:
697 raise LinstorVolumeManagerError(
698 str(e),
699 LinstorVolumeManagerError.ERR_VOLUME_DESTROY
700 )
702 def lock_volume(self, volume_uuid, locked=True):
703 """
704 Prevent modifications of the volume properties during
705 "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked
706 when used. This method is useful to attach/detach correctly a volume on
707 a slave. Without it the GC can rename a volume, in this case the old
708 volume path can be used by a slave...
709 :param str volume_uuid: The volume uuid to protect/unprotect.
710 :param bool locked: Lock/unlock the volume.
711 """
713 self._ensure_volume_exists(volume_uuid)
715 self._logger(
716 '{} volume {} as locked'.format(
717 'Mark' if locked else 'Unmark',
718 volume_uuid
719 )
720 )
722 volume_properties = self._get_volume_properties(volume_uuid)
723 if locked:
724 volume_properties[
725 self.PROP_IS_READONLY_TIMESTAMP
726 ] = str(time.time())
727 elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties:
728 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
730 def ensure_volume_is_not_locked(self, volume_uuid, timeout=None):
731 """
732 Ensure a volume is not locked. Wait if necessary.
733 :param str volume_uuid: The volume uuid to check.
734 :param int timeout: If the volume is always locked after the expiration
735 of the timeout, an exception is thrown.
736 """
737 return self.ensure_volume_list_is_not_locked([volume_uuid], timeout)
739 def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None):
740 checked = set()
741 for volume_uuid in volume_uuids:
742 if volume_uuid in self._volumes:
743 checked.add(volume_uuid)
745 if not checked:
746 return
748 waiting = False
750 volume_properties = self._get_kv_cache()
752 start = time.time()
753 while True:
754 # Can't delete in for loop, use a copy of the list.
755 remaining = checked.copy()
756 for volume_uuid in checked:
757 volume_properties.namespace = \
758 self._build_volume_namespace(volume_uuid)
759 timestamp = volume_properties.get(
760 self.PROP_IS_READONLY_TIMESTAMP
761 )
762 if timestamp is None:
763 remaining.remove(volume_uuid)
764 continue
766 now = time.time()
767 if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY:
768 self._logger(
769 'Remove readonly timestamp on {}'.format(volume_uuid)
770 )
771 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
772 remaining.remove(volume_uuid)
773 continue
775 if not waiting:
776 self._logger(
777 'Volume {} is locked, waiting...'.format(volume_uuid)
778 )
779 waiting = True
780 break
782 if not remaining:
783 break
784 checked = remaining
786 if timeout is not None and now - start > timeout:
787 raise LinstorVolumeManagerError(
788 'volume `{}` is locked and timeout has been reached'
789 .format(volume_uuid),
790 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
791 )
793 # We must wait to use the volume. After that we can modify it
794 # ONLY if the SR is locked to avoid bad reads on the slaves.
795 time.sleep(1)
796 volume_properties = self._create_kv_cache()
798 if waiting:
799 self._logger('No volume locked now!')
801 def remove_volume_if_diskless(self, volume_uuid):
802 """
803 Remove disless path from local node.
804 :param str volume_uuid: The volume uuid to remove.
805 """
807 self._ensure_volume_exists(volume_uuid)
809 volume_properties = self._get_volume_properties(volume_uuid)
810 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
812 node_name = socket.gethostname()
814 for resource in self._get_resource_cache().resources:
815 if resource.name == volume_name and resource.node_name == node_name:
816 if linstor.consts.FLAG_TIE_BREAKER in resource.flags:
817 return
818 break
820 result = self._linstor.resource_delete_if_diskless(
821 node_name=node_name, rsc_name=volume_name
822 )
823 if not linstor.Linstor.all_api_responses_no_error(result):
824 raise LinstorVolumeManagerError(
825 'Unable to delete diskless path of `{}` on node `{}`: {}'
826 .format(volume_name, node_name, ', '.join(
827 [str(x) for x in result]))
828 )
830 def introduce_volume(self, volume_uuid):
831 pass # TODO: Implement me.
833 def resize_volume(self, volume_uuid, new_size):
834 """
835 Resize a volume.
836 :param str volume_uuid: The volume uuid to resize.
837 :param int new_size: New size in B.
838 """
840 volume_name = self.get_volume_name(volume_uuid)
841 self.ensure_volume_is_not_locked(volume_uuid)
842 new_size = self.round_up_volume_size(new_size) // 1024
844 retry_count = 30
845 while True:
846 result = self._linstor.volume_dfn_modify(
847 rsc_name=volume_name,
848 volume_nr=0,
849 size=new_size
850 )
852 self._mark_resource_cache_as_dirty()
854 error_str = self._get_error_str(result)
855 if not error_str:
856 break
858 # After volume creation, DRBD volume can be unusable during many seconds.
859 # So we must retry the definition change if the device is not up to date.
860 # Often the case for thick provisioning.
861 if retry_count and error_str.find('non-UpToDate DRBD device') >= 0:
862 time.sleep(2)
863 retry_count -= 1
864 continue
866 raise LinstorVolumeManagerError(
867 'Could not resize volume `{}` from SR `{}`: {}'
868 .format(volume_uuid, self._group_name, error_str)
869 )
871 def get_volume_name(self, volume_uuid):
872 """
873 Get the name of a particular volume.
874 :param str volume_uuid: The volume uuid of the name to get.
875 :return: The volume name.
876 :rtype: str
877 """
879 self._ensure_volume_exists(volume_uuid)
880 volume_properties = self._get_volume_properties(volume_uuid)
881 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
882 if volume_name:
883 return volume_name
884 raise LinstorVolumeManagerError(
885 'Failed to get volume name of {}'.format(volume_uuid)
886 )
888 def get_volume_size(self, volume_uuid):
889 """
890 Get the size of a particular volume.
891 :param str volume_uuid: The volume uuid of the size to get.
892 :return: The volume size.
893 :rtype: int
894 """
896 volume_name = self.get_volume_name(volume_uuid)
897 dfns = self._linstor.resource_dfn_list_raise(
898 query_volume_definitions=True,
899 filter_by_resource_definitions=[volume_name]
900 ).resource_definitions
902 size = dfns[0].volume_definitions[0].size
903 if size < 0:
904 raise LinstorVolumeManagerError(
905 'Failed to get volume size of: {}'.format(volume_uuid)
906 )
907 return size * 1024
909 def set_auto_promote_timeout(self, volume_uuid, timeout):
910 """
911 Define the blocking time of open calls when a DRBD
912 is already open on another host.
913 :param str volume_uuid: The volume uuid to modify.
914 """
916 volume_name = self.get_volume_name(volume_uuid)
917 result = self._linstor.resource_dfn_modify(volume_name, {
918 'DrbdOptions/Resource/auto-promote-timeout': timeout
919 })
920 error_str = self._get_error_str(result)
921 if error_str:
922 raise LinstorVolumeManagerError(
923 'Could not change the auto promote timeout of `{}`: {}'
924 .format(volume_uuid, error_str)
925 )
927 def set_drbd_ha_properties(self, volume_name, enabled=True):
928 """
929 Set or not HA DRBD properties required by drbd-reactor and
930 by specific volumes.
931 :param str volume_name: The volume to modify.
932 :param bool enabled: Enable or disable HA properties.
933 """
935 properties = {
936 'DrbdOptions/auto-quorum': 'disabled',
937 'DrbdOptions/Resource/auto-promote': 'no',
938 'DrbdOptions/Resource/on-no-data-accessible': 'io-error',
939 'DrbdOptions/Resource/on-no-quorum': 'io-error',
940 'DrbdOptions/Resource/on-suspended-primary-outdated': 'force-secondary',
941 'DrbdOptions/Resource/quorum': 'majority'
942 }
943 if enabled:
944 result = self._linstor.resource_dfn_modify(volume_name, properties)
945 else:
946 result = self._linstor.resource_dfn_modify(volume_name, {}, delete_props=list(properties.keys()))
948 error_str = self._get_error_str(result)
949 if error_str:
950 raise LinstorVolumeManagerError(
951 'Could not modify HA DRBD properties on volume `{}`: {}'
952 .format(volume_name, error_str)
953 )
955 def get_volume_info(self, volume_uuid):
956 """
957 Get the volume info of a particular volume.
958 :param str volume_uuid: The volume uuid of the volume info to get.
959 :return: The volume info.
960 :rtype: VolumeInfo
961 """
963 volume_name = self.get_volume_name(volume_uuid)
964 return self._get_volumes_info()[volume_name]
966 def get_device_path(self, volume_uuid):
967 """
968 Get the dev path of a volume, create a diskless if necessary.
969 :param str volume_uuid: The volume uuid to get the dev path.
970 :return: The current device path of the volume.
971 :rtype: str
972 """
974 volume_name = self.get_volume_name(volume_uuid)
975 return self._find_device_path(volume_uuid, volume_name)
977 def get_volume_uuid_from_device_path(self, device_path):
978 """
979 Get the volume uuid of a device_path.
980 :param str device_path: The dev path to find the volume uuid.
981 :return: The volume uuid of the local device path.
982 :rtype: str
983 """
985 expected_volume_name = \
986 self.get_volume_name_from_device_path(device_path)
988 volume_names = self.get_volumes_with_name()
989 for volume_uuid, volume_name in volume_names.items():
990 if volume_name == expected_volume_name:
991 return volume_uuid
993 raise LinstorVolumeManagerError(
994 'Unable to find volume uuid from dev path `{}`'.format(device_path)
995 )
997 def get_volume_name_from_device_path(self, device_path):
998 """
999 Get the volume name of a device_path.
1000 :param str device_path: The dev path to find the volume name.
1001 :return: The volume name of the device path.
1002 :rtype: str
1003 """
1005 # Assume that we have a path like this:
1006 # - "/dev/drbd/by-res/xcp-volume-<UUID>/0"
1007 # - "../xcp-volume-<UUID>/0"
1008 if device_path.startswith(DRBD_BY_RES_PATH):
1009 prefix_len = len(DRBD_BY_RES_PATH)
1010 else:
1011 assert device_path.startswith('../')
1012 prefix_len = 3
1014 res_name_end = device_path.find('/', prefix_len)
1015 assert res_name_end != -1
1016 return device_path[prefix_len:res_name_end]
1018 def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False):
1019 """
1020 Change the uuid of a volume.
1021 :param str volume_uuid: The volume to modify.
1022 :param str new_volume_uuid: The new volume uuid to use.
1023 :param bool force: If true we doesn't check if volume_uuid is in the
1024 volume list. I.e. the volume can be marked as deleted but the volume
1025 can still be in the LINSTOR KV store if the deletion has failed.
1026 In specific cases like "undo" after a failed clone we must rename a bad
1027 deleted VDI.
1028 """
1030 self._logger(
1031 'Trying to update volume UUID {} to {}...'
1032 .format(volume_uuid, new_volume_uuid)
1033 )
1034 assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value'
1036 if not force:
1037 self._ensure_volume_exists(volume_uuid)
1038 self.ensure_volume_is_not_locked(volume_uuid)
1040 if new_volume_uuid in self._volumes:
1041 raise LinstorVolumeManagerError(
1042 'Volume `{}` already exists'.format(new_volume_uuid),
1043 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
1044 )
1046 volume_properties = self._get_volume_properties(volume_uuid)
1047 if volume_properties.get(self.PROP_UPDATING_UUID_SRC):
1048 raise LinstorVolumeManagerError(
1049 'Cannot update volume uuid {}: invalid state'
1050 .format(volume_uuid)
1051 )
1053 # 1. Copy in temp variables metadata and volume_name.
1054 metadata = volume_properties.get(self.PROP_METADATA)
1055 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
1057 # 2. Switch to new volume namespace.
1058 volume_properties.namespace = self._build_volume_namespace(
1059 new_volume_uuid
1060 )
1062 if list(volume_properties.items()):
1063 raise LinstorVolumeManagerError(
1064 'Cannot update volume uuid {} to {}: '
1065 .format(volume_uuid, new_volume_uuid) +
1066 'this last one is not empty'
1067 )
1069 try:
1070 # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC.
1071 # If we crash after that, the new properties can be removed
1072 # properly.
1073 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
1074 volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid
1076 # 4. Copy the properties.
1077 # Note: On new volumes, during clone for example, the metadata
1078 # may be missing. So we must test it to avoid this error:
1079 # "None has to be a str/unicode, but is <type 'NoneType'>"
1080 if metadata:
1081 volume_properties[self.PROP_METADATA] = metadata
1082 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1084 # 5. Ok!
1085 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
1086 except Exception as err:
1087 try:
1088 # Clear the new volume properties in case of failure.
1089 assert volume_properties.namespace == \
1090 self._build_volume_namespace(new_volume_uuid)
1091 volume_properties.clear()
1092 except Exception as e:
1093 self._logger(
1094 'Failed to clear new volume properties: {} (ignoring...)'
1095 .format(e)
1096 )
1097 raise LinstorVolumeManagerError(
1098 'Failed to copy volume properties: {}'.format(err)
1099 )
1101 try:
1102 # 6. After this point, it's ok we can remove the
1103 # PROP_UPDATING_UUID_SRC property and clear the src properties
1104 # without problems.
1106 # 7. Switch to old volume namespace.
1107 volume_properties.namespace = self._build_volume_namespace(
1108 volume_uuid
1109 )
1110 volume_properties.clear()
1112 # 8. Switch a last time to new volume namespace.
1113 volume_properties.namespace = self._build_volume_namespace(
1114 new_volume_uuid
1115 )
1116 volume_properties.pop(self.PROP_UPDATING_UUID_SRC)
1117 except Exception as e:
1118 raise LinstorVolumeManagerError(
1119 'Failed to clear volume properties '
1120 'after volume uuid update: {}'.format(e)
1121 )
1123 self._volumes.remove(volume_uuid)
1124 self._volumes.add(new_volume_uuid)
1126 self._logger(
1127 'UUID update succeeded of {} to {}! (properties={})'
1128 .format(
1129 volume_uuid, new_volume_uuid,
1130 self._get_filtered_properties(volume_properties)
1131 )
1132 )
1134 def update_volume_name(self, volume_uuid, volume_name):
1135 """
1136 Change the volume name of a volume.
1137 :param str volume_uuid: The volume to modify.
1138 :param str volume_name: The volume_name to use.
1139 """
1141 self._ensure_volume_exists(volume_uuid)
1142 self.ensure_volume_is_not_locked(volume_uuid)
1143 if not volume_name.startswith(self.PREFIX_VOLUME):
1144 raise LinstorVolumeManagerError(
1145 'Volume name `{}` must be start with `{}`'
1146 .format(volume_name, self.PREFIX_VOLUME)
1147 )
1149 if volume_name not in self._fetch_resource_names():
1150 raise LinstorVolumeManagerError(
1151 'Volume `{}` doesn\'t exist'.format(volume_name)
1152 )
1154 volume_properties = self._get_volume_properties(volume_uuid)
1155 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1157 def get_usage_states(self, volume_uuid):
1158 """
1159 Check if a volume is currently used.
1160 :param str volume_uuid: The volume uuid to check.
1161 :return: A dictionnary that contains states.
1162 :rtype: dict(str, bool or None)
1163 """
1165 states = {}
1167 volume_name = self.get_volume_name(volume_uuid)
1168 for resource_state in self._linstor.resource_list_raise(
1169 filter_by_resources=[volume_name]
1170 ).resource_states:
1171 states[resource_state.node_name] = resource_state.in_use
1173 return states
1175 def get_volume_openers(self, volume_uuid):
1176 """
1177 Get openers of a volume.
1178 :param str volume_uuid: The volume uuid to monitor.
1179 :return: A dictionnary that contains openers.
1180 :rtype: dict(str, obj)
1181 """
1182 return get_all_volume_openers(self.get_volume_name(volume_uuid), '0')
1184 def get_volumes_with_name(self):
1185 """
1186 Give a volume dictionnary that contains names actually owned.
1187 :return: A volume/name dict.
1188 :rtype: dict(str, str)
1189 """
1190 return self._get_volumes_by_property(self.REG_VOLUME_NAME)
1192 def get_volumes_with_info(self):
1193 """
1194 Give a volume dictionnary that contains VolumeInfos.
1195 :return: A volume/VolumeInfo dict.
1196 :rtype: dict(str, VolumeInfo)
1197 """
1199 volumes = {}
1201 all_volume_info = self._get_volumes_info()
1202 volume_names = self.get_volumes_with_name()
1203 for volume_uuid, volume_name in volume_names.items():
1204 if volume_name:
1205 volume_info = all_volume_info.get(volume_name)
1206 if volume_info:
1207 volumes[volume_uuid] = volume_info
1208 continue
1210 # Well I suppose if this volume is not available,
1211 # LINSTOR has been used directly without using this API.
1212 volumes[volume_uuid] = self.VolumeInfo('')
1214 return volumes
1216 def get_volumes_with_metadata(self):
1217 """
1218 Give a volume dictionnary that contains metadata.
1219 :return: A volume/metadata dict.
1220 :rtype: dict(str, dict)
1221 """
1223 volumes = {}
1225 metadata = self._get_volumes_by_property(self.REG_METADATA)
1226 for volume_uuid, volume_metadata in metadata.items():
1227 if volume_metadata:
1228 volume_metadata = json.loads(volume_metadata)
1229 if isinstance(volume_metadata, dict):
1230 volumes[volume_uuid] = volume_metadata
1231 continue
1232 raise LinstorVolumeManagerError(
1233 'Expected dictionary in volume metadata: {}'
1234 .format(volume_uuid)
1235 )
1237 volumes[volume_uuid] = {}
1239 return volumes
1241 def get_volume_metadata(self, volume_uuid):
1242 """
1243 Get the metadata of a volume.
1244 :return: Dictionary that contains metadata.
1245 :rtype: dict
1246 """
1248 self._ensure_volume_exists(volume_uuid)
1249 volume_properties = self._get_volume_properties(volume_uuid)
1250 metadata = volume_properties.get(self.PROP_METADATA)
1251 if metadata:
1252 metadata = json.loads(metadata)
1253 if isinstance(metadata, dict):
1254 return metadata
1255 raise LinstorVolumeManagerError(
1256 'Expected dictionary in volume metadata: {}'
1257 .format(volume_uuid)
1258 )
1259 return {}
1261 def set_volume_metadata(self, volume_uuid, metadata):
1262 """
1263 Set the metadata of a volume.
1264 :param dict metadata: Dictionary that contains metadata.
1265 """
1267 self._ensure_volume_exists(volume_uuid)
1268 self.ensure_volume_is_not_locked(volume_uuid)
1270 assert isinstance(metadata, dict)
1271 volume_properties = self._get_volume_properties(volume_uuid)
1272 volume_properties[self.PROP_METADATA] = json.dumps(metadata)
1274 def update_volume_metadata(self, volume_uuid, metadata):
1275 """
1276 Update the metadata of a volume. It modify only the given keys.
1277 It doesn't remove unreferenced key instead of set_volume_metadata.
1278 :param dict metadata: Dictionary that contains metadata.
1279 """
1281 self._ensure_volume_exists(volume_uuid)
1282 self.ensure_volume_is_not_locked(volume_uuid)
1284 assert isinstance(metadata, dict)
1285 volume_properties = self._get_volume_properties(volume_uuid)
1287 current_metadata = json.loads(
1288 volume_properties.get(self.PROP_METADATA, '{}')
1289 )
1290 if not isinstance(metadata, dict):
1291 raise LinstorVolumeManagerError(
1292 'Expected dictionary in volume metadata: {}'
1293 .format(volume_uuid)
1294 )
1296 for key, value in metadata.items():
1297 current_metadata[key] = value
1298 volume_properties[self.PROP_METADATA] = json.dumps(current_metadata)
1300 def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True):
1301 """
1302 Clone a volume. Do not copy the data, this method creates a new volume
1303 with the same size.
1304 :param str volume_uuid: The volume to clone.
1305 :param str clone_uuid: The cloned volume.
1306 :param bool persistent: If false the volume will be unavailable
1307 on the next constructor call LinstorSR(...).
1308 :return: The current device path of the cloned volume.
1309 :rtype: str
1310 """
1312 volume_name = self.get_volume_name(volume_uuid)
1313 self.ensure_volume_is_not_locked(volume_uuid)
1315 # 1. Find ideal nodes + size to use.
1316 ideal_node_names, size = self._get_volume_node_names_and_size(
1317 volume_name
1318 )
1319 if size <= 0:
1320 raise LinstorVolumeManagerError(
1321 'Invalid size of {} for volume `{}`'.format(size, volume_name)
1322 )
1324 # 2. Create clone!
1325 return self.create_volume(clone_uuid, size, persistent)
1327 def remove_resourceless_volumes(self):
1328 """
1329 Remove all volumes without valid or non-empty name
1330 (i.e. without LINSTOR resource). It's different than
1331 LinstorVolumeManager constructor that takes a `repair` param that
1332 removes volumes with `PROP_NOT_EXISTS` to 1.
1333 """
1335 resource_names = self._fetch_resource_names()
1336 for volume_uuid, volume_name in self.get_volumes_with_name().items():
1337 if not volume_name or volume_name not in resource_names:
1338 # Don't force, we can be sure of what's happening.
1339 self.destroy_volume(volume_uuid)
1341 def destroy(self):
1342 """
1343 Destroy this SR. Object should not be used after that.
1344 :param bool force: Try to destroy volumes before if true.
1345 """
1347 # 1. Ensure volume list is empty. No cost.
1348 if self._volumes:
1349 raise LinstorVolumeManagerError(
1350 'Cannot destroy LINSTOR volume manager: '
1351 'It exists remaining volumes'
1352 )
1354 # 2. Fetch ALL resource names.
1355 # This list may therefore contain volumes created outside
1356 # the scope of the driver.
1357 resource_names = self._fetch_resource_names(ignore_deleted=False)
1358 try:
1359 resource_names.remove(DATABASE_VOLUME_NAME)
1360 except KeyError:
1361 # Really strange to reach that point.
1362 # Normally we always have the database volume in the list.
1363 pass
1365 # 3. Ensure the resource name list is entirely empty...
1366 if resource_names:
1367 raise LinstorVolumeManagerError(
1368 'Cannot destroy LINSTOR volume manager: '
1369 'It exists remaining volumes (created externally or being deleted)'
1370 )
1372 # 4. Destroying...
1373 controller_is_running = self._controller_is_running()
1374 uri = 'linstor://localhost'
1375 try:
1376 if controller_is_running:
1377 self._start_controller(start=False)
1379 # 4.1. Umount LINSTOR database.
1380 self._mount_database_volume(
1381 self.build_device_path(DATABASE_VOLUME_NAME),
1382 mount=False,
1383 force=True
1384 )
1386 # 4.2. Refresh instance.
1387 self._start_controller(start=True)
1388 self._linstor = self._create_linstor_instance(
1389 uri, keep_uri_unmodified=True
1390 )
1392 # 4.3. Destroy database volume.
1393 self._destroy_resource(DATABASE_VOLUME_NAME)
1395 # 4.4. Refresh linstor connection.
1396 # Without we get this error:
1397 # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.."
1398 # Because the deletion of the databse was not seen by Linstor for some reason.
1399 # It seems a simple refresh of the Linstor connection make it aware of the deletion.
1400 self._linstor.disconnect()
1401 self._linstor.connect()
1403 # 4.5. Destroy remaining drbd nodes on hosts.
1404 # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups.
1405 # It needs to be done locally by each host so we go through the linstor-manager plugin.
1406 # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with:
1407 # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it."
1408 session = util.timeout_call(5, util.get_localAPI_session)
1409 for host_ref in session.xenapi.host.get_all():
1410 try:
1411 response = session.xenapi.host.call_plugin(
1412 host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name}
1413 )
1414 except Exception as e:
1415 util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e))
1417 # 4.6. Destroy group and storage pools.
1418 self._destroy_resource_group(self._linstor, self._group_name)
1419 self._destroy_resource_group(self._linstor, self._ha_group_name)
1420 for pool in self._get_storage_pools(force=True):
1421 self._destroy_storage_pool(
1422 self._linstor, pool.name, pool.node_name
1423 )
1424 except Exception as e:
1425 self._start_controller(start=controller_is_running)
1426 raise e
1428 try:
1429 self._start_controller(start=False)
1430 for file in os.listdir(DATABASE_PATH):
1431 if file != 'lost+found':
1432 os.remove(DATABASE_PATH + '/' + file)
1433 except Exception as e:
1434 util.SMlog(
1435 'Ignoring failure after LINSTOR SR destruction: {}'
1436 .format(e)
1437 )
1439 def find_up_to_date_diskful_nodes(self, volume_uuid):
1440 """
1441 Find all nodes that contain a specific volume using diskful disks.
1442 The disk must be up to data to be used.
1443 :param str volume_uuid: The volume to use.
1444 :return: The available nodes.
1445 :rtype: tuple(set(str), str)
1446 """
1448 volume_name = self.get_volume_name(volume_uuid)
1450 in_use_by = None
1451 node_names = set()
1453 resource_states = filter(
1454 lambda resource_state: resource_state.name == volume_name,
1455 self._get_resource_cache().resource_states
1456 )
1458 for resource_state in resource_states:
1459 volume_state = resource_state.volume_states[0]
1460 if volume_state.disk_state == 'UpToDate':
1461 node_names.add(resource_state.node_name)
1462 if resource_state.in_use:
1463 in_use_by = resource_state.node_name
1465 return (node_names, in_use_by)
1467 def invalidate_resource_cache(self):
1468 """
1469 If resources are impacted by external commands like vhdutil,
1470 it's necessary to call this function to invalidate current resource
1471 cache.
1472 """
1473 self._mark_resource_cache_as_dirty()
1475 def has_node(self, node_name):
1476 """
1477 Check if a node exists in the LINSTOR database.
1478 :rtype: bool
1479 """
1480 result = self._linstor.node_list()
1481 error_str = self._get_error_str(result)
1482 if error_str:
1483 raise LinstorVolumeManagerError(
1484 'Failed to list nodes using `{}`: {}'
1485 .format(node_name, error_str)
1486 )
1487 return bool(result[0].node(node_name))
1489 def create_node(self, node_name, ip):
1490 """
1491 Create a new node in the LINSTOR database.
1492 :param str node_name: Node name to use.
1493 :param str ip: Host IP to communicate.
1494 """
1495 result = self._linstor.node_create(
1496 node_name,
1497 linstor.consts.VAL_NODE_TYPE_CMBD,
1498 ip
1499 )
1500 errors = self._filter_errors(result)
1501 if errors:
1502 error_str = self._get_error_str(errors)
1503 raise LinstorVolumeManagerError(
1504 'Failed to create node `{}`: {}'.format(node_name, error_str)
1505 )
1507 def destroy_node(self, node_name):
1508 """
1509 Destroy a node in the LINSTOR database.
1510 :param str node_name: Node name to remove.
1511 """
1512 result = self._linstor.node_delete(node_name)
1513 errors = self._filter_errors(result)
1514 if errors:
1515 error_str = self._get_error_str(errors)
1516 raise LinstorVolumeManagerError(
1517 'Failed to destroy node `{}`: {}'.format(node_name, error_str)
1518 )
1520 def create_node_interface(self, node_name, name, ip):
1521 """
1522 Create a new node interface in the LINSTOR database.
1523 :param str node_name: Node name of the interface to use.
1524 :param str name: Interface to create.
1525 :param str ip: IP of the interface.
1526 """
1527 result = self._linstor.netinterface_create(node_name, name, ip)
1528 errors = self._filter_errors(result)
1529 if errors:
1530 error_str = self._get_error_str(errors)
1531 raise LinstorVolumeManagerError(
1532 'Failed to create node interface on `{}`: {}'.format(node_name, error_str)
1533 )
1535 def destroy_node_interface(self, node_name, name):
1536 """
1537 Destroy a node interface in the LINSTOR database.
1538 :param str node_name: Node name of the interface to remove.
1539 :param str name: Interface to remove.
1540 """
1542 if name == 'default':
1543 raise LinstorVolumeManagerError(
1544 'Unable to delete the default interface of a node!'
1545 )
1547 result = self._linstor.netinterface_delete(node_name, name)
1548 errors = self._filter_errors(result)
1549 if errors:
1550 error_str = self._get_error_str(errors)
1551 raise LinstorVolumeManagerError(
1552 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str)
1553 )
1555 def modify_node_interface(self, node_name, name, ip):
1556 """
1557 Modify a node interface in the LINSTOR database. Create it if necessary.
1558 :param str node_name: Node name of the interface to use.
1559 :param str name: Interface to modify or create.
1560 :param str ip: IP of the interface.
1561 """
1562 result = self._linstor.netinterface_create(node_name, name, ip)
1563 errors = self._filter_errors(result)
1564 if not errors:
1565 return
1567 if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]):
1568 result = self._linstor.netinterface_modify(node_name, name, ip)
1569 errors = self._filter_errors(result)
1570 if not errors:
1571 return
1573 error_str = self._get_error_str(errors)
1574 raise LinstorVolumeManagerError(
1575 'Unable to modify interface on `{}`: {}'.format(node_name, error_str)
1576 )
1578 def list_node_interfaces(self, node_name):
1579 """
1580 List all node interfaces.
1581 :param str node_name: Node name to use to list interfaces.
1582 :rtype: list
1583 :
1584 """
1585 result = self._linstor.net_interface_list(node_name)
1586 if not result:
1587 raise LinstorVolumeManagerError(
1588 'Unable to list interfaces on `{}`: no list received'.format(node_name)
1589 )
1591 interfaces = {}
1592 for interface in result:
1593 interface = interface._rest_data
1594 interfaces[interface['name']] = {
1595 'address': interface['address'],
1596 'active': interface['is_active']
1597 }
1598 return interfaces
1600 def get_node_preferred_interface(self, node_name):
1601 """
1602 Get the preferred interface used by a node.
1603 :param str node_name: Node name of the interface to get.
1604 :rtype: str
1605 """
1606 try:
1607 nodes = self._linstor.node_list_raise([node_name]).nodes
1608 if nodes:
1609 properties = nodes[0].props
1610 return properties.get('PrefNic', 'default')
1611 return nodes
1612 except Exception as e:
1613 raise LinstorVolumeManagerError(
1614 'Failed to get preferred interface: `{}`'.format(e)
1615 )
1617 def set_node_preferred_interface(self, node_name, name):
1618 """
1619 Set the preferred interface to use on a node.
1620 :param str node_name: Node name of the interface.
1621 :param str name: Preferred interface to use.
1622 """
1623 result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name})
1624 errors = self._filter_errors(result)
1625 if errors:
1626 error_str = self._get_error_str(errors)
1627 raise LinstorVolumeManagerError(
1628 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str)
1629 )
1631 def get_nodes_info(self):
1632 """
1633 Get all nodes + statuses, used or not by the pool.
1634 :rtype: dict(str, dict)
1635 """
1636 try:
1637 nodes = {}
1638 for node in self._linstor.node_list_raise().nodes:
1639 nodes[node.name] = node.connection_status
1640 return nodes
1641 except Exception as e:
1642 raise LinstorVolumeManagerError(
1643 'Failed to get all nodes: `{}`'.format(e)
1644 )
1646 def get_storage_pools_info(self):
1647 """
1648 Give all storage pools of current group name.
1649 :rtype: dict(str, list)
1650 """
1651 storage_pools = {}
1652 for pool in self._get_storage_pools(force=True):
1653 if pool.node_name not in storage_pools:
1654 storage_pools[pool.node_name] = []
1656 size = -1
1657 capacity = -1
1659 space = pool.free_space
1660 if space:
1661 size = space.free_capacity
1662 if size < 0:
1663 size = -1
1664 else:
1665 size *= 1024
1666 capacity = space.total_capacity
1667 if capacity <= 0:
1668 capacity = -1
1669 else:
1670 capacity *= 1024
1672 storage_pools[pool.node_name].append({
1673 'name': pool.name,
1674 'linstor-uuid': pool.uuid,
1675 'free-size': size,
1676 'capacity': capacity
1677 })
1679 return storage_pools
1681 def get_resources_info(self):
1682 """
1683 Give all resources of current group name.
1684 :rtype: dict(str, list)
1685 """
1686 resources = {}
1687 resource_list = self._get_resource_cache()
1688 volume_names = self.get_volumes_with_name()
1689 for resource in resource_list.resources:
1690 if resource.name not in resources:
1691 resources[resource.name] = { 'nodes': {}, 'uuid': '' }
1692 resource_nodes = resources[resource.name]['nodes']
1694 resource_nodes[resource.node_name] = {
1695 'volumes': [],
1696 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags,
1697 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags
1698 }
1699 resource_volumes = resource_nodes[resource.node_name]['volumes']
1701 for volume in resource.volumes:
1702 # We ignore diskless pools of the form "DfltDisklessStorPool".
1703 if volume.storage_pool_name != self._group_name:
1704 continue
1706 usable_size = volume.usable_size
1707 if usable_size < 0:
1708 usable_size = -1
1709 else:
1710 usable_size *= 1024
1712 allocated_size = volume.allocated_size
1713 if allocated_size < 0:
1714 allocated_size = -1
1715 else:
1716 allocated_size *= 1024
1718 resource_volumes.append({
1719 'storage-pool-name': volume.storage_pool_name,
1720 'linstor-uuid': volume.uuid,
1721 'number': volume.number,
1722 'device-path': volume.device_path,
1723 'usable-size': usable_size,
1724 'allocated-size': allocated_size
1725 })
1727 for resource_state in resource_list.resource_states:
1728 resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name]
1729 resource['in-use'] = resource_state.in_use
1731 volumes = resource['volumes']
1732 for volume_state in resource_state.volume_states:
1733 volume = next((x for x in volumes if x['number'] == volume_state.number), None)
1734 if volume:
1735 volume['disk-state'] = volume_state.disk_state
1737 for volume_uuid, volume_name in volume_names.items():
1738 resource = resources.get(volume_name)
1739 if resource:
1740 resource['uuid'] = volume_uuid
1742 return resources
1744 def get_database_path(self):
1745 """
1746 Get the database path.
1747 :return: The current database path.
1748 :rtype: str
1749 """
1750 return self._request_database_path(self._linstor)
1752 @classmethod
1753 def get_all_group_names(cls, base_name):
1754 """
1755 Get all group names. I.e. list of current group + HA.
1756 :param str base_name: The SR group_name to use.
1757 :return: List of group names.
1758 :rtype: list
1759 """
1760 return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)]
1762 @classmethod
1763 def create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__):
1764 """
1765 Create a new SR on the given nodes.
1766 :param str group_name: The SR group_name to use.
1767 :param set(str) ips: Node ips.
1768 :param int redundancy: How many copy of volumes should we store?
1769 :param bool thin_provisioning: Use thin or thick provisioning.
1770 :param function logger: Function to log messages.
1771 :return: A new LinstorSr instance.
1772 :rtype: LinstorSr
1773 """
1775 try:
1776 cls._start_controller(start=True)
1777 sr = cls._create_sr(group_name, ips, redundancy, thin_provisioning, logger)
1778 finally:
1779 # Controller must be stopped and volume unmounted because
1780 # it is the role of the drbd-reactor daemon to do the right
1781 # actions.
1782 cls._start_controller(start=False)
1783 cls._mount_volume(
1784 cls.build_device_path(DATABASE_VOLUME_NAME),
1785 DATABASE_PATH,
1786 mount=False
1787 )
1788 return sr
1790 @classmethod
1791 def _create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__):
1792 # 1. Check if SR already exists.
1793 uri = 'linstor://localhost'
1795 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1797 node_names = list(ips.keys())
1798 for node_name, ip in ips.items():
1799 while True:
1800 # Try to create node.
1801 result = lin.node_create(
1802 node_name,
1803 linstor.consts.VAL_NODE_TYPE_CMBD,
1804 ip
1805 )
1807 errors = cls._filter_errors(result)
1808 if cls._check_errors(
1809 errors, [linstor.consts.FAIL_EXISTS_NODE]
1810 ):
1811 # If it already exists, remove, then recreate.
1812 result = lin.node_delete(node_name)
1813 error_str = cls._get_error_str(result)
1814 if error_str:
1815 raise LinstorVolumeManagerError(
1816 'Failed to remove old node `{}`: {}'
1817 .format(node_name, error_str)
1818 )
1819 elif not errors:
1820 break # Created!
1821 else:
1822 raise LinstorVolumeManagerError(
1823 'Failed to create node `{}` with ip `{}`: {}'.format(
1824 node_name, ip, cls._get_error_str(errors)
1825 )
1826 )
1828 driver_pool_name = group_name
1829 base_group_name = group_name
1830 group_name = cls._build_group_name(group_name)
1831 storage_pool_name = group_name
1832 pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools
1833 if pools:
1834 existing_node_names = [pool.node_name for pool in pools]
1835 raise LinstorVolumeManagerError(
1836 'Unable to create SR `{}`. It already exists on node(s): {}'
1837 .format(group_name, existing_node_names)
1838 )
1840 if lin.resource_group_list_raise(
1841 cls.get_all_group_names(base_group_name)
1842 ).resource_groups:
1843 if not lin.resource_dfn_list_raise().resource_definitions:
1844 backup_path = cls._create_database_backup_path()
1845 logger(
1846 'Group name already exists `{}` without LVs. '
1847 'Ignoring and moving the config files in {}'.format(group_name, backup_path)
1848 )
1849 cls._move_files(DATABASE_PATH, backup_path)
1850 else:
1851 raise LinstorVolumeManagerError(
1852 'Unable to create SR `{}`: The group name already exists'
1853 .format(group_name)
1854 )
1856 if thin_provisioning:
1857 driver_pool_parts = driver_pool_name.split('/')
1858 if not len(driver_pool_parts) == 2:
1859 raise LinstorVolumeManagerError(
1860 'Invalid group name using thin provisioning. '
1861 'Expected format: \'VG/LV`\''
1862 )
1864 # 2. Create storage pool on each node + resource group.
1865 reg_volume_group_not_found = re.compile(
1866 ".*Volume group '.*' not found$"
1867 )
1869 i = 0
1870 try:
1871 # 2.a. Create storage pools.
1872 storage_pool_count = 0
1873 while i < len(node_names):
1874 node_name = node_names[i]
1876 result = lin.storage_pool_create(
1877 node_name=node_name,
1878 storage_pool_name=storage_pool_name,
1879 storage_driver='LVM_THIN' if thin_provisioning else 'LVM',
1880 driver_pool_name=driver_pool_name
1881 )
1883 errors = linstor.Linstor.filter_api_call_response_errors(
1884 result
1885 )
1886 if errors:
1887 if len(errors) == 1 and errors[0].is_error(
1888 linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR
1889 ) and reg_volume_group_not_found.match(errors[0].message):
1890 logger(
1891 'Volume group `{}` not found on `{}`. Ignoring...'
1892 .format(group_name, node_name)
1893 )
1894 cls._destroy_storage_pool(lin, storage_pool_name, node_name)
1895 else:
1896 error_str = cls._get_error_str(result)
1897 raise LinstorVolumeManagerError(
1898 'Could not create SP `{}` on node `{}`: {}'
1899 .format(group_name, node_name, error_str)
1900 )
1901 else:
1902 storage_pool_count += 1
1903 i += 1
1905 if not storage_pool_count:
1906 raise LinstorVolumeManagerError(
1907 'Unable to create SR `{}`: No VG group found'.format(
1908 group_name,
1909 )
1910 )
1912 # 2.b. Create resource groups.
1913 ha_group_name = cls._build_ha_group_name(base_group_name)
1914 cls._create_resource_group(
1915 lin,
1916 group_name,
1917 storage_pool_name,
1918 redundancy,
1919 True
1920 )
1921 cls._create_resource_group(
1922 lin,
1923 ha_group_name,
1924 storage_pool_name,
1925 3,
1926 True
1927 )
1929 # 3. Create the LINSTOR database volume and mount it.
1930 try:
1931 logger('Creating database volume...')
1932 volume_path = cls._create_database_volume(
1933 lin, ha_group_name, storage_pool_name, node_names, redundancy
1934 )
1935 except LinstorVolumeManagerError as e:
1936 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
1937 logger('Destroying database volume after creation fail...')
1938 cls._force_destroy_database_volume(lin, group_name)
1939 raise
1941 try:
1942 logger('Mounting database volume...')
1944 # First we must disable the controller to move safely the
1945 # LINSTOR config.
1946 cls._start_controller(start=False)
1948 cls._mount_database_volume(volume_path)
1949 except Exception as e:
1950 # Ensure we are connected because controller has been
1951 # restarted during mount call.
1952 logger('Destroying database volume after mount fail...')
1954 try:
1955 cls._start_controller(start=True)
1956 except Exception:
1957 pass
1959 lin = cls._create_linstor_instance(
1960 uri, keep_uri_unmodified=True
1961 )
1962 cls._force_destroy_database_volume(lin, group_name)
1963 raise e
1965 cls._start_controller(start=True)
1966 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1968 # 4. Remove storage pools/resource/volume group in the case of errors.
1969 except Exception as e:
1970 logger('Destroying resource group and storage pools after fail...')
1971 try:
1972 cls._destroy_resource_group(lin, group_name)
1973 cls._destroy_resource_group(lin, ha_group_name)
1974 except Exception as e2:
1975 logger('Failed to destroy resource group: {}'.format(e2))
1976 pass
1977 j = 0
1978 i = min(i, len(node_names) - 1)
1979 while j <= i:
1980 try:
1981 cls._destroy_storage_pool(lin, storage_pool_name, node_names[j])
1982 except Exception as e2:
1983 logger('Failed to destroy resource group: {}'.format(e2))
1984 pass
1985 j += 1
1986 raise e
1988 # 5. Return new instance.
1989 instance = cls.__new__(cls)
1990 instance._linstor = lin
1991 instance._logger = logger
1992 instance._redundancy = redundancy
1993 instance._base_group_name = base_group_name
1994 instance._group_name = group_name
1995 instance._volumes = set()
1996 instance._storage_pools_time = 0
1997 instance._kv_cache = instance._create_kv_cache()
1998 instance._resource_cache = None
1999 instance._resource_cache_dirty = True
2000 instance._volume_info_cache = None
2001 instance._volume_info_cache_dirty = True
2002 return instance
2004 @classmethod
2005 def build_device_path(cls, volume_name):
2006 """
2007 Build a device path given a volume name.
2008 :param str volume_name: The volume name to use.
2009 :return: A valid or not device path.
2010 :rtype: str
2011 """
2013 return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name)
2015 @classmethod
2016 def build_volume_name(cls, base_name):
2017 """
2018 Build a volume name given a base name (i.e. a UUID).
2019 :param str base_name: The volume name to use.
2020 :return: A valid or not device path.
2021 :rtype: str
2022 """
2023 return '{}{}'.format(cls.PREFIX_VOLUME, base_name)
2025 @classmethod
2026 def round_up_volume_size(cls, volume_size):
2027 """
2028 Align volume size on higher multiple of BLOCK_SIZE.
2029 :param int volume_size: The volume size to align.
2030 :return: An aligned volume size.
2031 :rtype: int
2032 """
2033 return round_up(volume_size, cls.BLOCK_SIZE)
2035 @classmethod
2036 def round_down_volume_size(cls, volume_size):
2037 """
2038 Align volume size on lower multiple of BLOCK_SIZE.
2039 :param int volume_size: The volume size to align.
2040 :return: An aligned volume size.
2041 :rtype: int
2042 """
2043 return round_down(volume_size, cls.BLOCK_SIZE)
2045 # --------------------------------------------------------------------------
2046 # Private helpers.
2047 # --------------------------------------------------------------------------
2049 def _create_kv_cache(self):
2050 self._kv_cache = self._create_linstor_kv('/')
2051 self._kv_cache_dirty = False
2052 return self._kv_cache
2054 def _get_kv_cache(self):
2055 if self._kv_cache_dirty:
2056 self._kv_cache = self._create_kv_cache()
2057 return self._kv_cache
2059 def _create_resource_cache(self):
2060 self._resource_cache = self._linstor.resource_list_raise()
2061 self._resource_cache_dirty = False
2062 return self._resource_cache
2064 def _get_resource_cache(self):
2065 if self._resource_cache_dirty:
2066 self._resource_cache = self._create_resource_cache()
2067 return self._resource_cache
2069 def _mark_resource_cache_as_dirty(self):
2070 self._resource_cache_dirty = True
2071 self._volume_info_cache_dirty = True
2073 # --------------------------------------------------------------------------
2075 def _ensure_volume_exists(self, volume_uuid):
2076 if volume_uuid not in self._volumes:
2077 raise LinstorVolumeManagerError(
2078 'volume `{}` doesn\'t exist'.format(volume_uuid),
2079 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
2080 )
2082 def _find_best_size_candidates(self):
2083 result = self._linstor.resource_group_qmvs(self._group_name)
2084 error_str = self._get_error_str(result)
2085 if error_str:
2086 raise LinstorVolumeManagerError(
2087 'Failed to get max volume size allowed of SR `{}`: {}'.format(
2088 self._group_name,
2089 error_str
2090 )
2091 )
2092 return result[0].candidates
2094 def _fetch_resource_names(self, ignore_deleted=True):
2095 resource_names = set()
2096 dfns = self._linstor.resource_dfn_list_raise().resource_definitions
2097 for dfn in dfns:
2098 if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and (
2099 ignore_deleted or
2100 linstor.consts.FLAG_DELETE not in dfn.flags
2101 ):
2102 resource_names.add(dfn.name)
2103 return resource_names
2105 def _get_volumes_info(self, volume_name=None):
2106 all_volume_info = {}
2108 if not self._volume_info_cache_dirty:
2109 return self._volume_info_cache
2111 def process_resource(resource):
2112 if resource.name not in all_volume_info:
2113 current = all_volume_info[resource.name] = self.VolumeInfo(
2114 resource.name
2115 )
2116 else:
2117 current = all_volume_info[resource.name]
2119 if linstor.consts.FLAG_DISKLESS not in resource.flags:
2120 current.diskful.append(resource.node_name)
2122 for volume in resource.volumes:
2123 # We ignore diskless pools of the form "DfltDisklessStorPool".
2124 if volume.storage_pool_name != self._group_name:
2125 continue
2126 # Only fetch first volume.
2127 if volume.number != 0:
2128 continue
2130 allocated_size = volume.allocated_size
2131 if allocated_size > current.allocated_size:
2132 current.allocated_size = allocated_size
2134 usable_size = volume.usable_size
2135 if usable_size > 0 and (
2136 usable_size < current.virtual_size or
2137 not current.virtual_size
2138 ):
2139 current.virtual_size = usable_size
2141 try:
2142 for resource in self._get_resource_cache().resources:
2143 process_resource(resource)
2144 for volume in all_volume_info.values():
2145 if volume.allocated_size <= 0:
2146 raise LinstorVolumeManagerError('Failed to get allocated size of `{}`'.format(resource.name))
2148 if volume.virtual_size <= 0:
2149 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(volume.name))
2151 volume.allocated_size *= 1024
2152 volume.virtual_size *= 1024
2153 except LinstorVolumeManagerError:
2154 self._mark_resource_cache_as_dirty()
2155 raise
2157 self._volume_info_cache_dirty = False
2158 self._volume_info_cache = all_volume_info
2160 return all_volume_info
2162 def _get_volume_node_names_and_size(self, volume_name):
2163 node_names = set()
2164 size = -1
2165 for resource in self._linstor.resource_list_raise(
2166 filter_by_resources=[volume_name]
2167 ).resources:
2168 for volume in resource.volumes:
2169 # We ignore diskless pools of the form "DfltDisklessStorPool".
2170 if volume.storage_pool_name != self._group_name:
2171 continue
2173 node_names.add(resource.node_name)
2175 usable_size = volume.usable_size
2176 if usable_size <= 0:
2177 continue
2179 if size < 0:
2180 size = usable_size
2181 else:
2182 size = min(size, usable_size)
2184 if size <= 0:
2185 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(resource.name))
2187 return (node_names, size * 1024)
2189 def _compute_size(self, attr):
2190 capacity = 0
2191 for pool in self._get_storage_pools(force=True):
2192 space = pool.free_space
2193 if space:
2194 size = getattr(space, attr)
2195 if size < 0:
2196 raise LinstorVolumeManagerError(
2197 'Failed to get pool {} attr of `{}`'
2198 .format(attr, pool.node_name)
2199 )
2200 capacity += size
2201 return capacity * 1024
2203 def _get_node_names(self):
2204 node_names = set()
2205 for pool in self._get_storage_pools():
2206 node_names.add(pool.node_name)
2207 return node_names
2209 def _get_storage_pools(self, force=False):
2210 cur_time = time.time()
2211 elsaped_time = cur_time - self._storage_pools_time
2213 if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL:
2214 self._storage_pools = self._linstor.storage_pool_list_raise(
2215 filter_by_stor_pools=[self._group_name]
2216 ).storage_pools
2217 self._storage_pools_time = time.time()
2219 return self._storage_pools
2221 def _create_volume(
2222 self,
2223 volume_uuid,
2224 volume_name,
2225 size,
2226 place_resources,
2227 high_availability
2228 ):
2229 size = self.round_up_volume_size(size)
2230 self._mark_resource_cache_as_dirty()
2232 group_name = self._ha_group_name if high_availability else self._group_name
2233 def create_definition():
2234 first_attempt = True
2235 while True:
2236 try:
2237 self._check_volume_creation_errors(
2238 self._linstor.resource_group_spawn(
2239 rsc_grp_name=group_name,
2240 rsc_dfn_name=volume_name,
2241 vlm_sizes=['{}B'.format(size)],
2242 definitions_only=True
2243 ),
2244 volume_uuid,
2245 self._group_name
2246 )
2247 break
2248 except LinstorVolumeManagerError as e:
2249 if (
2250 not first_attempt or
2251 not high_availability or
2252 e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
2253 ):
2254 raise
2256 first_attempt = False
2257 self._create_resource_group(
2258 self._linstor,
2259 group_name,
2260 self._group_name,
2261 3,
2262 True
2263 )
2265 self._configure_volume_peer_slots(self._linstor, volume_name)
2267 def clean():
2268 try:
2269 self._destroy_volume(volume_uuid, force=True, preserve_properties=True)
2270 except Exception as e:
2271 self._logger(
2272 'Unable to destroy volume {} after creation fail: {}'
2273 .format(volume_uuid, e)
2274 )
2276 def create():
2277 try:
2278 create_definition()
2279 if place_resources:
2280 # Basic case when we use the default redundancy of the group.
2281 self._check_volume_creation_errors(
2282 self._linstor.resource_auto_place(
2283 rsc_name=volume_name,
2284 place_count=self._redundancy,
2285 diskless_on_remaining=False
2286 ),
2287 volume_uuid,
2288 self._group_name
2289 )
2290 except LinstorVolumeManagerError as e:
2291 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2292 clean()
2293 raise
2294 except Exception:
2295 clean()
2296 raise
2298 util.retry(create, maxretry=5)
2300 def _create_volume_with_properties(
2301 self,
2302 volume_uuid,
2303 volume_name,
2304 size,
2305 place_resources,
2306 high_availability
2307 ):
2308 if self.check_volume_exists(volume_uuid):
2309 raise LinstorVolumeManagerError(
2310 'Could not create volume `{}` from SR `{}`, it already exists'
2311 .format(volume_uuid, self._group_name) + ' in properties',
2312 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
2313 )
2315 if volume_name in self._fetch_resource_names():
2316 raise LinstorVolumeManagerError(
2317 'Could not create volume `{}` from SR `{}`, '.format(
2318 volume_uuid, self._group_name
2319 ) + 'resource of the same name already exists in LINSTOR'
2320 )
2322 # I am paranoid.
2323 volume_properties = self._get_volume_properties(volume_uuid)
2324 if (volume_properties.get(self.PROP_NOT_EXISTS) is not None):
2325 raise LinstorVolumeManagerError(
2326 'Could not create volume `{}`, '.format(volume_uuid) +
2327 'properties already exist'
2328 )
2330 try:
2331 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING
2332 volume_properties[self.PROP_VOLUME_NAME] = volume_name
2334 self._create_volume(
2335 volume_uuid,
2336 volume_name,
2337 size,
2338 place_resources,
2339 high_availability
2340 )
2342 assert volume_properties.namespace == \
2343 self._build_volume_namespace(volume_uuid)
2344 return volume_properties
2345 except LinstorVolumeManagerError as e:
2346 # Do not destroy existing resource!
2347 # In theory we can't get this error because we check this event
2348 # before the `self._create_volume` case.
2349 # It can only happen if the same volume uuid is used in the same
2350 # call in another host.
2351 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2352 self._destroy_volume(volume_uuid, force=True)
2353 raise
2355 def _find_device_path(self, volume_uuid, volume_name):
2356 current_device_path = self._request_device_path(
2357 volume_uuid, volume_name, activate=True
2358 )
2360 # We use realpath here to get the /dev/drbd<id> path instead of
2361 # /dev/drbd/by-res/<resource_name>.
2362 expected_device_path = self.build_device_path(volume_name)
2363 util.wait_for_path(expected_device_path, 5)
2365 device_realpath = os.path.realpath(expected_device_path)
2366 if current_device_path != device_realpath:
2367 raise LinstorVolumeManagerError(
2368 'Invalid path, current={}, expected={} (realpath={})'
2369 .format(
2370 current_device_path,
2371 expected_device_path,
2372 device_realpath
2373 )
2374 )
2375 return expected_device_path
2377 def _request_device_path(self, volume_uuid, volume_name, activate=False):
2378 node_name = socket.gethostname()
2380 resource = next(filter(
2381 lambda resource: resource.node_name == node_name and
2382 resource.name == volume_name,
2383 self._get_resource_cache().resources
2384 ), None)
2386 if not resource:
2387 if activate:
2388 self._mark_resource_cache_as_dirty()
2389 self._activate_device_path(
2390 self._linstor, node_name, volume_name
2391 )
2392 return self._request_device_path(volume_uuid, volume_name)
2393 raise LinstorVolumeManagerError(
2394 'Empty dev path for `{}`, but definition "seems" to exist'
2395 .format(volume_uuid)
2396 )
2397 # Contains a path of the /dev/drbd<id> form.
2398 return resource.volumes[0].device_path
2400 def _destroy_resource(self, resource_name, force=False):
2401 result = self._linstor.resource_dfn_delete(resource_name)
2402 error_str = self._get_error_str(result)
2403 if not error_str:
2404 self._mark_resource_cache_as_dirty()
2405 return
2407 if not force:
2408 self._mark_resource_cache_as_dirty()
2409 raise LinstorVolumeManagerError(
2410 'Could not destroy resource `{}` from SR `{}`: {}'
2411 .format(resource_name, self._group_name, error_str)
2412 )
2414 # If force is used, ensure there is no opener.
2415 all_openers = get_all_volume_openers(resource_name, '0')
2416 for openers in all_openers.values():
2417 if openers:
2418 self._mark_resource_cache_as_dirty()
2419 raise LinstorVolumeManagerError(
2420 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)'
2421 .format(resource_name, self._group_name, error_str, all_openers)
2422 )
2424 # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue?
2425 resource_states = filter(
2426 lambda resource_state: resource_state.name == resource_name,
2427 self._get_resource_cache().resource_states
2428 )
2430 # Mark only after computation of states.
2431 self._mark_resource_cache_as_dirty()
2433 for resource_state in resource_states:
2434 volume_state = resource_state.volume_states[0]
2435 if resource_state.in_use:
2436 demote_drbd_resource(resource_state.node_name, resource_name)
2437 break
2438 self._destroy_resource(resource_name)
2440 def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False):
2441 volume_properties = self._get_volume_properties(volume_uuid)
2442 try:
2443 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
2444 if volume_name in self._fetch_resource_names():
2445 self._destroy_resource(volume_name, force)
2447 # Assume this call is atomic.
2448 if not preserve_properties:
2449 volume_properties.clear()
2450 except Exception as e:
2451 raise LinstorVolumeManagerError(
2452 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e)
2453 )
2455 def _build_volumes(self, repair):
2456 properties = self._kv_cache
2457 resource_names = self._fetch_resource_names()
2459 self._volumes = set()
2461 updating_uuid_volumes = self._get_volumes_by_property(
2462 self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False
2463 )
2464 if updating_uuid_volumes and not repair:
2465 raise LinstorVolumeManagerError(
2466 'Cannot build LINSTOR volume list: '
2467 'It exists invalid "updating uuid volumes", repair is required'
2468 )
2470 existing_volumes = self._get_volumes_by_property(
2471 self.REG_NOT_EXISTS, ignore_inexisting_volumes=False
2472 )
2473 for volume_uuid, not_exists in existing_volumes.items():
2474 properties.namespace = self._build_volume_namespace(volume_uuid)
2476 src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC)
2477 if src_uuid:
2478 self._logger(
2479 'Ignoring volume during manager initialization with prop '
2480 ' PROP_UPDATING_UUID_SRC: {} (properties={})'
2481 .format(
2482 volume_uuid,
2483 self._get_filtered_properties(properties)
2484 )
2485 )
2486 continue
2488 # Insert volume in list if the volume exists. Or if the volume
2489 # is being created and a slave wants to use it (repair = False).
2490 #
2491 # If we are on the master and if repair is True and state is
2492 # Creating, it's probably a bug or crash: the creation process has
2493 # been stopped.
2494 if not_exists == self.STATE_EXISTS or (
2495 not repair and not_exists == self.STATE_CREATING
2496 ):
2497 self._volumes.add(volume_uuid)
2498 continue
2500 if not repair:
2501 self._logger(
2502 'Ignoring bad volume during manager initialization: {} '
2503 '(properties={})'.format(
2504 volume_uuid,
2505 self._get_filtered_properties(properties)
2506 )
2507 )
2508 continue
2510 # Remove bad volume.
2511 try:
2512 self._logger(
2513 'Removing bad volume during manager initialization: {} '
2514 '(properties={})'.format(
2515 volume_uuid,
2516 self._get_filtered_properties(properties)
2517 )
2518 )
2519 volume_name = properties.get(self.PROP_VOLUME_NAME)
2521 # Little optimization, don't call `self._destroy_volume`,
2522 # we already have resource name list.
2523 if volume_name in resource_names:
2524 self._destroy_resource(volume_name, force=True)
2526 # Assume this call is atomic.
2527 properties.clear()
2528 except Exception as e:
2529 # Do not raise, we don't want to block user action.
2530 self._logger(
2531 'Cannot clean volume {}: {}'.format(volume_uuid, e)
2532 )
2534 # The volume can't be removed, maybe it's still in use,
2535 # in this case rename it with the "DELETED_" prefix.
2536 # This prefix is mandatory if it exists a snap transaction to
2537 # rollback because the original VDI UUID can try to be renamed
2538 # with the UUID we are trying to delete...
2539 if not volume_uuid.startswith('DELETED_'):
2540 self.update_volume_uuid(
2541 volume_uuid, 'DELETED_' + volume_uuid, force=True
2542 )
2544 for dest_uuid, src_uuid in updating_uuid_volumes.items():
2545 dest_namespace = self._build_volume_namespace(dest_uuid)
2547 properties.namespace = dest_namespace
2548 if int(properties.get(self.PROP_NOT_EXISTS)):
2549 properties.clear()
2550 continue
2552 properties.namespace = self._build_volume_namespace(src_uuid)
2553 properties.clear()
2555 properties.namespace = dest_namespace
2556 properties.pop(self.PROP_UPDATING_UUID_SRC)
2558 if src_uuid in self._volumes:
2559 self._volumes.remove(src_uuid)
2560 self._volumes.add(dest_uuid)
2562 def _get_sr_properties(self):
2563 return self._create_linstor_kv(self._build_sr_namespace())
2565 def _get_volumes_by_property(
2566 self, reg_prop, ignore_inexisting_volumes=True
2567 ):
2568 base_properties = self._get_kv_cache()
2569 base_properties.namespace = self._build_volume_namespace()
2571 volume_properties = {}
2572 for volume_uuid in self._volumes:
2573 volume_properties[volume_uuid] = ''
2575 for key, value in base_properties.items():
2576 res = reg_prop.match(key)
2577 if res:
2578 volume_uuid = res.groups()[0]
2579 if not ignore_inexisting_volumes or \
2580 volume_uuid in self._volumes:
2581 volume_properties[volume_uuid] = value
2583 return volume_properties
2585 def _create_linstor_kv(self, namespace):
2586 return linstor.KV(
2587 self._group_name,
2588 uri=self._linstor.controller_host(),
2589 namespace=namespace
2590 )
2592 def _get_volume_properties(self, volume_uuid):
2593 properties = self._get_kv_cache()
2594 properties.namespace = self._build_volume_namespace(volume_uuid)
2595 return properties
2597 @classmethod
2598 def _build_sr_namespace(cls):
2599 return '/{}/'.format(cls.NAMESPACE_SR)
2601 @classmethod
2602 def _build_volume_namespace(cls, volume_uuid=None):
2603 # Return a path to all volumes if `volume_uuid` is not given.
2604 if volume_uuid is None:
2605 return '/{}/'.format(cls.NAMESPACE_VOLUME)
2606 return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid)
2608 @classmethod
2609 def _get_error_str(cls, result):
2610 return ', '.join([
2611 err.message for err in cls._filter_errors(result)
2612 ])
2614 @classmethod
2615 def _create_linstor_instance(
2616 cls, uri, keep_uri_unmodified=False, attempt_count=30
2617 ):
2618 retry = False
2620 def connect(uri):
2621 if not uri:
2622 uri = get_controller_uri()
2623 if not uri:
2624 raise LinstorVolumeManagerError(
2625 'Unable to find controller uri...'
2626 )
2627 instance = linstor.Linstor(uri, keep_alive=True)
2628 instance.connect()
2629 return instance
2631 try:
2632 return connect(uri)
2633 except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError):
2634 pass
2636 if not keep_uri_unmodified:
2637 uri = None
2639 return util.retry(
2640 lambda: connect(uri),
2641 maxretry=attempt_count,
2642 period=1,
2643 exceptions=[
2644 linstor.errors.LinstorNetworkError,
2645 LinstorVolumeManagerError
2646 ]
2647 )
2649 @classmethod
2650 def _configure_volume_peer_slots(cls, lin, volume_name):
2651 result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3)
2652 error_str = cls._get_error_str(result)
2653 if error_str:
2654 raise LinstorVolumeManagerError(
2655 'Could not configure volume peer slots of {}: {}'
2656 .format(volume_name, error_str)
2657 )
2659 @classmethod
2660 def _activate_device_path(cls, lin, node_name, volume_name):
2661 result = lin.resource_make_available(node_name, volume_name, diskful=False)
2662 if linstor.Linstor.all_api_responses_no_error(result):
2663 return
2664 errors = linstor.Linstor.filter_api_call_response_errors(result)
2665 if len(errors) == 1 and errors[0].is_error(
2666 linstor.consts.FAIL_EXISTS_RSC
2667 ):
2668 return
2670 raise LinstorVolumeManagerError(
2671 'Unable to activate device path of `{}` on node `{}`: {}'
2672 .format(volume_name, node_name, ', '.join(
2673 [str(x) for x in result]))
2674 )
2676 @classmethod
2677 def _request_database_path(cls, lin, activate=False):
2678 node_name = socket.gethostname()
2680 try:
2681 resource = next(filter(
2682 lambda resource: resource.node_name == node_name and
2683 resource.name == DATABASE_VOLUME_NAME,
2684 lin.resource_list_raise().resources
2685 ), None)
2686 except Exception as e:
2687 raise LinstorVolumeManagerError(
2688 'Unable to fetch database resource: {}'
2689 .format(e)
2690 )
2692 if not resource:
2693 if activate:
2694 cls._activate_device_path(
2695 lin, node_name, DATABASE_VOLUME_NAME
2696 )
2697 return cls._request_database_path(
2698 DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME
2699 )
2700 raise LinstorVolumeManagerError(
2701 'Empty dev path for `{}`, but definition "seems" to exist'
2702 .format(DATABASE_PATH)
2703 )
2704 # Contains a path of the /dev/drbd<id> form.
2705 return resource.volumes[0].device_path
2707 @classmethod
2708 def _create_database_volume(
2709 cls, lin, group_name, storage_pool_name, node_names, redundancy
2710 ):
2711 try:
2712 dfns = lin.resource_dfn_list_raise().resource_definitions
2713 except Exception as e:
2714 raise LinstorVolumeManagerError(
2715 'Unable to get definitions during database creation: {}'
2716 .format(e)
2717 )
2719 if dfns:
2720 raise LinstorVolumeManagerError(
2721 'Could not create volume `{}` from SR `{}`, '.format(
2722 DATABASE_VOLUME_NAME, group_name
2723 ) + 'LINSTOR volume list must be empty.'
2724 )
2726 # Workaround to use thin lvm. Without this line an error is returned:
2727 # "Not enough available nodes"
2728 # I don't understand why but this command protect against this bug.
2729 try:
2730 pools = lin.storage_pool_list_raise(
2731 filter_by_stor_pools=[storage_pool_name]
2732 )
2733 except Exception as e:
2734 raise LinstorVolumeManagerError(
2735 'Failed to get storage pool list before database creation: {}'
2736 .format(e)
2737 )
2739 # Ensure we have a correct list of storage pools.
2740 assert pools.storage_pools # We must have at least one storage pool!
2741 nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools))
2742 for node_name in nodes_with_pool:
2743 assert node_name in node_names
2744 util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool))
2746 # Create the database definition.
2747 size = cls.round_up_volume_size(DATABASE_SIZE)
2748 cls._check_volume_creation_errors(lin.resource_group_spawn(
2749 rsc_grp_name=group_name,
2750 rsc_dfn_name=DATABASE_VOLUME_NAME,
2751 vlm_sizes=['{}B'.format(size)],
2752 definitions_only=True
2753 ), DATABASE_VOLUME_NAME, group_name)
2754 cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME)
2756 # Create real resources on the first nodes.
2757 resources = []
2759 diskful_nodes = []
2760 diskless_nodes = []
2761 for node_name in node_names:
2762 if node_name in nodes_with_pool:
2763 diskful_nodes.append(node_name)
2764 else:
2765 diskless_nodes.append(node_name)
2767 assert diskful_nodes
2768 for node_name in diskful_nodes[:redundancy]:
2769 util.SMlog('Create database diskful on {}'.format(node_name))
2770 resources.append(linstor.ResourceData(
2771 node_name=node_name,
2772 rsc_name=DATABASE_VOLUME_NAME,
2773 storage_pool=storage_pool_name
2774 ))
2775 # Create diskless resources on the remaining set.
2776 for node_name in diskful_nodes[redundancy:] + diskless_nodes:
2777 util.SMlog('Create database diskless on {}'.format(node_name))
2778 resources.append(linstor.ResourceData(
2779 node_name=node_name,
2780 rsc_name=DATABASE_VOLUME_NAME,
2781 diskless=True
2782 ))
2784 result = lin.resource_create(resources)
2785 error_str = cls._get_error_str(result)
2786 if error_str:
2787 raise LinstorVolumeManagerError(
2788 'Could not create database volume from SR `{}`: {}'.format(
2789 group_name, error_str
2790 )
2791 )
2793 # Create database and ensure path exists locally and
2794 # on replicated devices.
2795 current_device_path = cls._request_database_path(lin, activate=True)
2797 # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be
2798 # plugged.
2799 for node_name in node_names:
2800 cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME)
2802 # We use realpath here to get the /dev/drbd<id> path instead of
2803 # /dev/drbd/by-res/<resource_name>.
2804 expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME)
2805 util.wait_for_path(expected_device_path, 5)
2807 device_realpath = os.path.realpath(expected_device_path)
2808 if current_device_path != device_realpath:
2809 raise LinstorVolumeManagerError(
2810 'Invalid path, current={}, expected={} (realpath={})'
2811 .format(
2812 current_device_path,
2813 expected_device_path,
2814 device_realpath
2815 )
2816 )
2818 try:
2819 util.retry(
2820 lambda: util.pread2([DATABASE_MKFS, expected_device_path]),
2821 maxretry=5
2822 )
2823 except Exception as e:
2824 raise LinstorVolumeManagerError(
2825 'Failed to execute {} on database volume: {}'
2826 .format(DATABASE_MKFS, e)
2827 )
2829 return expected_device_path
2831 @classmethod
2832 def _destroy_database_volume(cls, lin, group_name):
2833 error_str = cls._get_error_str(
2834 lin.resource_dfn_delete(DATABASE_VOLUME_NAME)
2835 )
2836 if error_str:
2837 raise LinstorVolumeManagerError(
2838 'Could not destroy resource `{}` from SR `{}`: {}'
2839 .format(DATABASE_VOLUME_NAME, group_name, error_str)
2840 )
2842 @classmethod
2843 def _mount_database_volume(cls, volume_path, mount=True, force=False):
2844 try:
2845 # 1. Create a backup config folder.
2846 database_not_empty = bool(os.listdir(DATABASE_PATH))
2847 backup_path = cls._create_database_backup_path()
2849 # 2. Move the config in the mounted volume.
2850 if database_not_empty:
2851 cls._move_files(DATABASE_PATH, backup_path)
2853 cls._mount_volume(volume_path, DATABASE_PATH, mount)
2855 if database_not_empty:
2856 cls._move_files(backup_path, DATABASE_PATH, force)
2858 # 3. Remove useless backup directory.
2859 try:
2860 os.rmdir(backup_path)
2861 except Exception as e:
2862 raise LinstorVolumeManagerError(
2863 'Failed to remove backup path {} of LINSTOR config: {}'
2864 .format(backup_path, e)
2865 )
2866 except Exception as e:
2867 def force_exec(fn):
2868 try:
2869 fn()
2870 except Exception:
2871 pass
2873 if mount == cls._is_mounted(DATABASE_PATH):
2874 force_exec(lambda: cls._move_files(
2875 DATABASE_PATH, backup_path
2876 ))
2877 force_exec(lambda: cls._mount_volume(
2878 volume_path, DATABASE_PATH, not mount
2879 ))
2881 if mount != cls._is_mounted(DATABASE_PATH):
2882 force_exec(lambda: cls._move_files(
2883 backup_path, DATABASE_PATH
2884 ))
2886 force_exec(lambda: os.rmdir(backup_path))
2887 raise e
2889 @classmethod
2890 def _force_destroy_database_volume(cls, lin, group_name):
2891 try:
2892 cls._destroy_database_volume(lin, group_name)
2893 except Exception:
2894 pass
2896 @classmethod
2897 def _destroy_storage_pool(cls, lin, group_name, node_name):
2898 def destroy():
2899 result = lin.storage_pool_delete(node_name, group_name)
2900 errors = cls._filter_errors(result)
2901 if cls._check_errors(errors, [
2902 linstor.consts.FAIL_NOT_FOUND_STOR_POOL,
2903 linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN
2904 ]):
2905 return
2907 if errors:
2908 raise LinstorVolumeManagerError(
2909 'Failed to destroy SP `{}` on node `{}`: {}'.format(
2910 group_name,
2911 node_name,
2912 cls._get_error_str(errors)
2913 )
2914 )
2916 # We must retry to avoid errors like:
2917 # "can not be deleted as volumes / snapshot-volumes are still using it"
2918 # after LINSTOR database volume destruction.
2919 return util.retry(destroy, maxretry=10)
2921 @classmethod
2922 def _create_resource_group(
2923 cls,
2924 lin,
2925 group_name,
2926 storage_pool_name,
2927 redundancy,
2928 destroy_old_group
2929 ):
2930 rg_creation_attempt = 0
2931 while True:
2932 result = lin.resource_group_create(
2933 name=group_name,
2934 place_count=redundancy,
2935 storage_pool=storage_pool_name,
2936 diskless_on_remaining=False
2937 )
2938 error_str = cls._get_error_str(result)
2939 if not error_str:
2940 break
2942 errors = cls._filter_errors(result)
2943 if destroy_old_group and cls._check_errors(errors, [
2944 linstor.consts.FAIL_EXISTS_RSC_GRP
2945 ]):
2946 rg_creation_attempt += 1
2947 if rg_creation_attempt < 2:
2948 try:
2949 cls._destroy_resource_group(lin, group_name)
2950 except Exception as e:
2951 error_str = 'Failed to destroy old and empty RG: {}'.format(e)
2952 else:
2953 continue
2955 raise LinstorVolumeManagerError(
2956 'Could not create RG `{}`: {}'.format(
2957 group_name, error_str
2958 )
2959 )
2961 result = lin.volume_group_create(group_name)
2962 error_str = cls._get_error_str(result)
2963 if error_str:
2964 raise LinstorVolumeManagerError(
2965 'Could not create VG `{}`: {}'.format(
2966 group_name, error_str
2967 )
2968 )
2970 @classmethod
2971 def _destroy_resource_group(cls, lin, group_name):
2972 def destroy():
2973 result = lin.resource_group_delete(group_name)
2974 errors = cls._filter_errors(result)
2975 if cls._check_errors(errors, [
2976 linstor.consts.FAIL_NOT_FOUND_RSC_GRP
2977 ]):
2978 return
2980 if errors:
2981 raise LinstorVolumeManagerError(
2982 'Failed to destroy RG `{}`: {}'
2983 .format(group_name, cls._get_error_str(errors))
2984 )
2986 return util.retry(destroy, maxretry=10)
2988 @classmethod
2989 def _build_group_name(cls, base_name):
2990 # If thin provisioning is used we have a path like this:
2991 # `VG/LV`. "/" is not accepted by LINSTOR.
2992 return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_'))
2994 # Used to store important data in a HA context,
2995 # i.e. a replication count of 3.
2996 @classmethod
2997 def _build_ha_group_name(cls, base_name):
2998 return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_'))
3000 @classmethod
3001 def _check_volume_creation_errors(cls, result, volume_uuid, group_name):
3002 errors = cls._filter_errors(result)
3003 if cls._check_errors(errors, [
3004 linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN
3005 ]):
3006 raise LinstorVolumeManagerError(
3007 'Failed to create volume `{}` from SR `{}`, it already exists'
3008 .format(volume_uuid, group_name),
3009 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
3010 )
3012 if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]):
3013 raise LinstorVolumeManagerError(
3014 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist'
3015 .format(volume_uuid, group_name),
3016 LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
3017 )
3019 if errors:
3020 raise LinstorVolumeManagerError(
3021 'Failed to create volume `{}` from SR `{}`: {}'.format(
3022 volume_uuid,
3023 group_name,
3024 cls._get_error_str(errors)
3025 )
3026 )
3028 @classmethod
3029 def _move_files(cls, src_dir, dest_dir, force=False):
3030 def listdir(dir):
3031 ignored = ['lost+found']
3032 return [file for file in os.listdir(dir) if file not in ignored]
3034 try:
3035 if not force:
3036 files = listdir(dest_dir)
3037 if files:
3038 raise LinstorVolumeManagerError(
3039 'Cannot move files from {} to {} because destination '
3040 'contains: {}'.format(src_dir, dest_dir, files)
3041 )
3042 except LinstorVolumeManagerError:
3043 raise
3044 except Exception as e:
3045 raise LinstorVolumeManagerError(
3046 'Cannot list dir {}: {}'.format(dest_dir, e)
3047 )
3049 try:
3050 for file in listdir(src_dir):
3051 try:
3052 dest_file = os.path.join(dest_dir, file)
3053 if not force and os.path.exists(dest_file):
3054 raise LinstorVolumeManagerError(
3055 'Cannot move {} because it already exists in the '
3056 'destination'.format(file)
3057 )
3058 shutil.move(os.path.join(src_dir, file), dest_file)
3059 except LinstorVolumeManagerError:
3060 raise
3061 except Exception as e:
3062 raise LinstorVolumeManagerError(
3063 'Cannot move {}: {}'.format(file, e)
3064 )
3065 except Exception as e:
3066 if not force:
3067 try:
3068 cls._move_files(dest_dir, src_dir, force=True)
3069 except Exception:
3070 pass
3072 raise LinstorVolumeManagerError(
3073 'Failed to move files from {} to {}: {}'.format(
3074 src_dir, dest_dir, e
3075 )
3076 )
3078 @staticmethod
3079 def _create_database_backup_path():
3080 path = DATABASE_PATH + '-' + str(uuid.uuid4())
3081 try:
3082 os.mkdir(path)
3083 return path
3084 except Exception as e:
3085 raise LinstorVolumeManagerError(
3086 'Failed to create backup path {} of LINSTOR config: {}'
3087 .format(path, e)
3088 )
3090 @staticmethod
3091 def _get_filtered_properties(properties):
3092 return dict(properties.items())
3094 @staticmethod
3095 def _filter_errors(result):
3096 return [
3097 err for err in result
3098 if hasattr(err, 'is_error') and err.is_error()
3099 ]
3101 @staticmethod
3102 def _check_errors(result, codes):
3103 for err in result:
3104 for code in codes:
3105 if err.is_error(code):
3106 return True
3107 return False
3109 @classmethod
3110 def _controller_is_running(cls):
3111 return cls._service_is_running('linstor-controller')
3113 @classmethod
3114 def _start_controller(cls, start=True):
3115 return cls._start_service('linstor-controller', start)
3117 @staticmethod
3118 def _start_service(name, start=True):
3119 action = 'start' if start else 'stop'
3120 (ret, out, err) = util.doexec([
3121 'systemctl', action, name
3122 ])
3123 if ret != 0:
3124 raise LinstorVolumeManagerError(
3125 'Failed to {} {}: {} {}'
3126 .format(action, name, out, err)
3127 )
3129 @staticmethod
3130 def _service_is_running(name):
3131 (ret, out, err) = util.doexec([
3132 'systemctl', 'is-active', '--quiet', name
3133 ])
3134 return not ret
3136 @staticmethod
3137 def _is_mounted(mountpoint):
3138 (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint])
3139 return ret == 0
3141 @classmethod
3142 def _mount_volume(cls, volume_path, mountpoint, mount=True):
3143 if mount:
3144 try:
3145 util.pread(['mount', volume_path, mountpoint])
3146 except Exception as e:
3147 raise LinstorVolumeManagerError(
3148 'Failed to mount volume {} on {}: {}'
3149 .format(volume_path, mountpoint, e)
3150 )
3151 else:
3152 try:
3153 if cls._is_mounted(mountpoint):
3154 util.pread(['umount', mountpoint])
3155 except Exception as e:
3156 raise LinstorVolumeManagerError(
3157 'Failed to umount volume {} on {}: {}'
3158 .format(volume_path, mountpoint, e)
3159 )
3162# ==============================================================================
3164# Check if a path is a DRBD resource and log the process name/pid
3165# that opened it.
3166def log_drbd_openers(path):
3167 # Ignore if it's not a symlink to DRBD resource.
3168 if not path.startswith(DRBD_BY_RES_PATH):
3169 return
3171 # Compute resource name.
3172 res_name_end = path.find('/', len(DRBD_BY_RES_PATH))
3173 if res_name_end == -1:
3174 return
3175 res_name = path[len(DRBD_BY_RES_PATH):res_name_end]
3177 volume_end = path.rfind('/')
3178 if volume_end == res_name_end:
3179 return
3180 volume = path[volume_end + 1:]
3182 try:
3183 # Ensure path is a DRBD.
3184 drbd_path = os.path.realpath(path)
3185 stats = os.stat(drbd_path)
3186 if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147:
3187 return
3189 # Find where the device is open.
3190 (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name])
3191 if ret != 0:
3192 util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format(
3193 res_name, stderr
3194 ))
3195 return
3197 # Is it a local device?
3198 if stdout.startswith('{} role:Primary'.format(res_name)):
3199 util.SMlog(
3200 'DRBD resource `{}` is open on local host: {}'
3201 .format(path, get_local_volume_openers(res_name, volume))
3202 )
3203 return
3205 # Is it a remote device?
3206 util.SMlog(
3207 'DRBD resource `{}` is open on hosts: {}'
3208 .format(path, get_all_volume_openers(res_name, volume))
3209 )
3210 except Exception as e:
3211 util.SMlog(
3212 'Got exception while trying to determine where DRBD resource ' +
3213 '`{}` is open: {}'.format(path, e)
3214 )