Coverage for drivers/linstorvolumemanager.py : 10%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python3
2#
3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16#
18from sm_typing import override
20import errno
21import json
22import linstor
23import os.path
24import re
25import shutil
26import socket
27import stat
28import time
29import util
30import uuid
32# Persistent prefix to add to RAW persistent volumes.
33PERSISTENT_PREFIX = 'xcp-persistent-'
35# Contains the data of the "/var/lib/linstor" directory.
36DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database'
37DATABASE_SIZE = 1 << 30 # 1GB.
38DATABASE_PATH = '/var/lib/linstor'
39DATABASE_MKFS = 'mkfs.ext4'
41REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary")
42REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$')
44DRBD_BY_RES_PATH = '/dev/drbd/by-res/'
46PLUGIN = 'linstor-manager'
49# ==============================================================================
51def get_local_volume_openers(resource_name, volume):
52 if not resource_name or volume is None:
53 raise Exception('Cannot get DRBD openers without resource name and/or volume.')
55 path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format(
56 resource_name, volume
57 )
59 with open(path, 'r') as openers:
60 # Not a big cost, so read all lines directly.
61 lines = openers.readlines()
63 result = {}
65 opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)')
66 for line in lines:
67 match = opener_re.match(line)
68 assert match
70 groups = match.groups()
71 process_name = groups[0]
72 pid = groups[1]
73 open_duration_ms = groups[2]
74 result[pid] = {
75 'process-name': process_name,
76 'open-duration': open_duration_ms
77 }
79 return json.dumps(result)
81def get_all_volume_openers(resource_name, volume):
82 PLUGIN_CMD = 'getDrbdOpeners'
84 volume = str(volume)
85 openers = {}
87 # Make sure this call never stucks because this function can be called
88 # during HA init and in this case we can wait forever.
89 session = util.timeout_call(10, util.get_localAPI_session)
91 hosts = session.xenapi.host.get_all_records()
92 for host_ref, host_record in hosts.items():
93 node_name = host_record['hostname']
94 try:
95 if not session.xenapi.host_metrics.get_record(
96 host_record['metrics']
97 )['live']:
98 # Ensure we call plugin on online hosts only.
99 continue
101 openers[node_name] = json.loads(
102 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {
103 'resourceName': resource_name,
104 'volume': volume
105 })
106 )
107 except Exception as e:
108 util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format(
109 resource_name, node_name, e
110 ))
112 return openers
115# ==============================================================================
117def round_up(value, divisor):
118 assert divisor
119 divisor = int(divisor)
120 return ((int(value) + divisor - 1) // divisor) * divisor
123def round_down(value, divisor):
124 assert divisor
125 value = int(value)
126 return value - (value % int(divisor))
129# ==============================================================================
131def get_remote_host_ip(node_name):
132 (ret, stdout, stderr) = util.doexec([
133 'drbdsetup', 'show', DATABASE_VOLUME_NAME, '--json'
134 ])
135 if ret != 0:
136 return
138 try:
139 conf = json.loads(stdout)
140 if not conf:
141 return
143 for connection in conf[0]['connections']:
144 if connection['net']['_name'] == node_name:
145 value = connection['path']['_remote_host']
146 res = REG_DRBDSETUP_IP.match(value)
147 if res:
148 return res.groups()[0]
149 break
150 except Exception:
151 pass
154def _get_controller_uri():
155 PLUGIN_CMD = 'hasControllerRunning'
157 # Try to find controller using drbdadm.
158 (ret, stdout, stderr) = util.doexec([
159 'drbdadm', 'status', DATABASE_VOLUME_NAME
160 ])
161 if ret == 0:
162 # If we are here, the database device exists locally.
164 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
165 # Nice case, we have the controller running on this local host.
166 return 'linstor://localhost'
168 # Try to find the host using DRBD connections.
169 res = REG_DRBDADM_PRIMARY.search(stdout)
170 if res:
171 node_name = res.groups()[0]
172 ip = get_remote_host_ip(node_name)
173 if ip:
174 return 'linstor://' + ip
176 # Worst case: we use many hosts in the pool (>= 4), so we can't find the
177 # primary using drbdadm because we don't have all connections to the
178 # replicated volume. `drbdadm status xcp-persistent-database` returns
179 # 3 connections by default.
180 try:
181 session = util.timeout_call(10, util.get_localAPI_session)
183 for host_ref, host_record in session.xenapi.host.get_all_records().items():
184 node_name = host_record['hostname']
185 try:
186 if util.strtobool(
187 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {})
188 ):
189 return 'linstor://' + host_record['address']
190 except Exception as e:
191 # Can throw and exception if a host is offline. So catch it.
192 util.SMlog('Unable to search controller on `{}`: {}'.format(
193 node_name, e
194 ))
195 except:
196 # Not found, maybe we are trying to create the SR...
197 pass
199def get_controller_uri():
200 retries = 0
201 while True:
202 uri = _get_controller_uri()
203 if uri:
204 return uri
206 retries += 1
207 if retries >= 10:
208 break
209 time.sleep(1)
212def get_controller_node_name():
213 PLUGIN_CMD = 'hasControllerRunning'
215 (ret, stdout, stderr) = util.doexec([
216 'drbdadm', 'status', DATABASE_VOLUME_NAME
217 ])
219 if ret == 0:
220 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
221 return 'localhost'
223 res = REG_DRBDADM_PRIMARY.search(stdout)
224 if res:
225 return res.groups()[0]
227 session = util.timeout_call(5, util.get_localAPI_session)
229 for host_ref, host_record in session.xenapi.host.get_all_records().items():
230 node_name = host_record['hostname']
231 try:
232 if not session.xenapi.host_metrics.get_record(
233 host_record['metrics']
234 )['live']:
235 continue
237 if util.strtobool(session.xenapi.host.call_plugin(
238 host_ref, PLUGIN, PLUGIN_CMD, {}
239 )):
240 return node_name
241 except Exception as e:
242 util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format(
243 node_name, e
244 ))
247def demote_drbd_resource(node_name, resource_name):
248 PLUGIN_CMD = 'demoteDrbdResource'
250 session = util.timeout_call(5, util.get_localAPI_session)
252 for host_ref, host_record in session.xenapi.host.get_all_records().items():
253 if host_record['hostname'] != node_name:
254 continue
256 try:
257 session.xenapi.host.call_plugin(
258 host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name}
259 )
260 except Exception as e:
261 util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format(
262 resource_name, node_name, e
263 ))
264 raise Exception(
265 'Can\'t demote resource `{}`, unable to find node `{}`'
266 .format(resource_name, node_name)
267 )
269# ==============================================================================
271class LinstorVolumeManagerError(Exception):
272 ERR_GENERIC = 0,
273 ERR_VOLUME_EXISTS = 1,
274 ERR_VOLUME_NOT_EXISTS = 2,
275 ERR_VOLUME_DESTROY = 3,
276 ERR_GROUP_NOT_EXISTS = 4
278 def __init__(self, message, code=ERR_GENERIC):
279 super(LinstorVolumeManagerError, self).__init__(message)
280 self._code = code
282 @property
283 def code(self):
284 return self._code
287# ==============================================================================
289# Note:
290# If a storage pool is not accessible after a network change:
291# linstor node interface modify <NODE> default --ip <IP>
294class LinstorVolumeManager(object):
295 """
296 API to manager LINSTOR volumes in XCP-ng.
297 A volume in this context is a physical part of the storage layer.
298 """
300 __slots__ = (
301 '_linstor', '_logger', '_redundancy',
302 '_base_group_name', '_group_name', '_ha_group_name',
303 '_volumes', '_storage_pools', '_storage_pools_time',
304 '_kv_cache', '_resource_cache', '_volume_info_cache',
305 '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty'
306 )
308 DEV_ROOT_PATH = DRBD_BY_RES_PATH
310 # Default sector size.
311 BLOCK_SIZE = 512
313 # List of volume properties.
314 PROP_METADATA = 'metadata'
315 PROP_NOT_EXISTS = 'not-exists'
316 PROP_VOLUME_NAME = 'volume-name'
317 PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp'
319 # A volume can only be locked for a limited duration.
320 # The goal is to give enough time to slaves to execute some actions on
321 # a device before an UUID update or a coalesce for example.
322 # Expiration is expressed in seconds.
323 LOCKED_EXPIRATION_DELAY = 1 * 60
325 # Used when volume uuid is being updated.
326 PROP_UPDATING_UUID_SRC = 'updating-uuid-src'
328 # States of property PROP_NOT_EXISTS.
329 STATE_EXISTS = '0'
330 STATE_NOT_EXISTS = '1'
331 STATE_CREATING = '2'
333 # Property namespaces.
334 NAMESPACE_SR = 'xcp/sr'
335 NAMESPACE_VOLUME = 'xcp/volume'
337 # Regex to match properties.
338 REG_PROP = '^([^/]+)/{}$'
340 REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA))
341 REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS))
342 REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME))
343 REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC))
345 # Prefixes of SR/VOLUME in the LINSTOR DB.
346 # A LINSTOR (resource, group, ...) name cannot start with a number.
347 # So we add a prefix behind our SR/VOLUME uuids.
348 PREFIX_SR = 'xcp-sr-'
349 PREFIX_HA = 'xcp-ha-'
350 PREFIX_VOLUME = 'xcp-volume-'
352 # Limit request number when storage pool info is asked, we fetch
353 # the current pool status after N elapsed seconds.
354 STORAGE_POOLS_FETCH_INTERVAL = 15
356 @staticmethod
357 def default_logger(*args):
358 print(args)
360 # --------------------------------------------------------------------------
361 # API.
362 # --------------------------------------------------------------------------
364 class VolumeInfo(object):
365 __slots__ = (
366 'name',
367 'allocated_size', # Allocated size, place count is not used.
368 'virtual_size', # Total virtual available size of this volume
369 # (i.e. the user size at creation).
370 'diskful' # Array of nodes that have a diskful volume.
371 )
373 def __init__(self, name):
374 self.name = name
375 self.allocated_size = 0
376 self.virtual_size = 0
377 self.diskful = []
379 @override
380 def __repr__(self) -> str:
381 return 'VolumeInfo("{}", {}, {}, {})'.format(
382 self.name, self.allocated_size, self.virtual_size,
383 self.diskful
384 )
386 # --------------------------------------------------------------------------
388 def __init__(
389 self, uri, group_name, repair=False, logger=default_logger.__func__,
390 attempt_count=30
391 ):
392 """
393 Create a new LinstorVolumeManager object.
394 :param str uri: URI to communicate with the LINSTOR controller.
395 :param str group_name: The SR group name to use.
396 :param bool repair: If true we try to remove bad volumes due to a crash
397 or unexpected behavior.
398 :param function logger: Function to log messages.
399 :param int attempt_count: Number of attempts to join the controller.
400 """
402 self._linstor = self._create_linstor_instance(
403 uri, attempt_count=attempt_count
404 )
407 mismatched_nodes = [
408 node for node in self._linstor.node_list().pop().nodes if node.connection_status == "VERSION_MISMATCH"
409 ]
411 if mismatched_nodes:
412 raise LinstorVolumeManagerError(
413 "Some linstor nodes are not using the same version. " +
414 f"Incriminated nodes are: {','.join([node.name for node in mismatched_nodes])}"
415 )
417 self._base_group_name = group_name
419 # Ensure group exists.
420 group_name = self._build_group_name(group_name)
421 groups = self._linstor.resource_group_list_raise([group_name]).resource_groups
422 if not groups:
423 raise LinstorVolumeManagerError(
424 'Unable to find `{}` Linstor SR'.format(group_name)
425 )
427 # Ok. ;)
428 self._logger = logger
429 self._redundancy = groups[0].select_filter.place_count
430 self._group_name = group_name
431 self._ha_group_name = self._build_ha_group_name(self._base_group_name)
432 self._volumes = set()
433 self._storage_pools_time = 0
435 # To increase performance and limit request count to LINSTOR services,
436 # we use caches.
437 self._kv_cache = self._create_kv_cache()
438 self._resource_cache = None
439 self._resource_cache_dirty = True
440 self._volume_info_cache = None
441 self._volume_info_cache_dirty = True
442 self._build_volumes(repair=repair)
444 @property
445 def group_name(self):
446 """
447 Give the used group name.
448 :return: The group name.
449 :rtype: str
450 """
451 return self._base_group_name
453 @property
454 def redundancy(self):
455 """
456 Give the used redundancy.
457 :return: The redundancy.
458 :rtype: int
459 """
460 return self._redundancy
462 @property
463 def volumes(self):
464 """
465 Give the volumes uuid set.
466 :return: The volumes uuid set.
467 :rtype: set(str)
468 """
469 return self._volumes
471 @property
472 def max_volume_size_allowed(self):
473 """
474 Give the max volume size currently available in B.
475 :return: The current size.
476 :rtype: int
477 """
479 candidates = self._find_best_size_candidates()
480 if not candidates:
481 raise LinstorVolumeManagerError(
482 'Failed to get max volume size allowed'
483 )
485 size = candidates[0].max_volume_size
486 if size < 0:
487 raise LinstorVolumeManagerError(
488 'Invalid max volume size allowed given: {}'.format(size)
489 )
490 return self.round_down_volume_size(size * 1024)
492 @property
493 def physical_size(self):
494 """
495 Give the total physical size of the SR.
496 :return: The physical size.
497 :rtype: int
498 """
499 return self._compute_size('total_capacity')
501 @property
502 def physical_free_size(self):
503 """
504 Give the total free physical size of the SR.
505 :return: The physical free size.
506 :rtype: int
507 """
508 return self._compute_size('free_capacity')
510 @property
511 def allocated_volume_size(self):
512 """
513 Give the allocated size for all volumes. The place count is not
514 used here. When thick lvm is used, the size for one volume should
515 be equal to the virtual volume size. With thin lvm, the size is equal
516 or lower to the volume size.
517 :return: The allocated size of all volumes.
518 :rtype: int
519 """
521 # Paths: /res_name/vol_number/size
522 sizes = {}
524 for resource in self._get_resource_cache().resources:
525 if resource.name not in sizes:
526 current = sizes[resource.name] = {}
527 else:
528 current = sizes[resource.name]
530 for volume in resource.volumes:
531 # We ignore diskless pools of the form "DfltDisklessStorPool".
532 if volume.storage_pool_name != self._group_name:
533 continue
535 allocated_size = max(volume.allocated_size, 0)
536 current_allocated_size = current.get(volume.number) or -1
537 if allocated_size > current_allocated_size:
538 current[volume.number] = allocated_size
540 total_size = 0
541 for volumes in sizes.values():
542 for size in volumes.values():
543 total_size += size
545 return total_size * 1024
547 def get_min_physical_size(self):
548 """
549 Give the minimum physical size of the SR.
550 I.e. the size of the smallest disk + the number of pools.
551 :return: The physical min size.
552 :rtype: tuple(int, int)
553 """
554 size = None
555 pool_count = 0
556 for pool in self._get_storage_pools(force=True):
557 space = pool.free_space
558 if space:
559 pool_count += 1
560 current_size = space.total_capacity
561 if current_size < 0:
562 raise LinstorVolumeManagerError(
563 'Failed to get pool total_capacity attr of `{}`'
564 .format(pool.node_name)
565 )
566 if size is None or current_size < size:
567 size = current_size
568 return (pool_count, (size or 0) * 1024)
570 @property
571 def metadata(self):
572 """
573 Get the metadata of the SR.
574 :return: Dictionary that contains metadata.
575 :rtype: dict(str, dict)
576 """
578 sr_properties = self._get_sr_properties()
579 metadata = sr_properties.get(self.PROP_METADATA)
580 if metadata is not None:
581 metadata = json.loads(metadata)
582 if isinstance(metadata, dict):
583 return metadata
584 raise LinstorVolumeManagerError(
585 'Expected dictionary in SR metadata: {}'.format(
586 self._group_name
587 )
588 )
590 return {}
592 @metadata.setter
593 def metadata(self, metadata):
594 """
595 Set the metadata of the SR.
596 :param dict metadata: Dictionary that contains metadata.
597 """
599 assert isinstance(metadata, dict)
600 sr_properties = self._get_sr_properties()
601 sr_properties[self.PROP_METADATA] = json.dumps(metadata)
603 @property
604 def disconnected_hosts(self):
605 """
606 Get the list of disconnected hosts.
607 :return: Set that contains disconnected hosts.
608 :rtype: set(str)
609 """
611 disconnected_hosts = set()
612 for pool in self._get_storage_pools():
613 for report in pool.reports:
614 if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \
615 linstor.consts.WARN_NOT_CONNECTED:
616 disconnected_hosts.add(pool.node_name)
617 break
618 return disconnected_hosts
620 def check_volume_exists(self, volume_uuid):
621 """
622 Check if a volume exists in the SR.
623 :return: True if volume exists.
624 :rtype: bool
625 """
626 return volume_uuid in self._volumes
628 def create_volume(
629 self,
630 volume_uuid,
631 size,
632 persistent=True,
633 volume_name=None,
634 high_availability=False
635 ):
636 """
637 Create a new volume on the SR.
638 :param str volume_uuid: The volume uuid to use.
639 :param int size: volume size in B.
640 :param bool persistent: If false the volume will be unavailable
641 on the next constructor call LinstorSR(...).
642 :param str volume_name: If set, this name is used in the LINSTOR
643 database instead of a generated name.
644 :param bool high_availability: If set, the volume is created in
645 the HA group.
646 :return: The current device path of the volume.
647 :rtype: str
648 """
650 self._logger('Creating LINSTOR volume {}...'.format(volume_uuid))
651 if not volume_name:
652 volume_name = self.build_volume_name(util.gen_uuid())
653 volume_properties = self._create_volume_with_properties(
654 volume_uuid,
655 volume_name,
656 size,
657 True, # place_resources
658 high_availability
659 )
661 # Volume created! Now try to find the device path.
662 try:
663 self._logger(
664 'Find device path of LINSTOR volume {}...'.format(volume_uuid)
665 )
666 device_path = self._find_device_path(volume_uuid, volume_name)
667 if persistent:
668 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
669 self._volumes.add(volume_uuid)
670 self._logger(
671 'LINSTOR volume {} created!'.format(volume_uuid)
672 )
673 return device_path
674 except Exception:
675 # There is an issue to find the path.
676 # At this point the volume has just been created, so force flag can be used.
677 self._destroy_volume(volume_uuid, force=True)
678 raise
680 def mark_volume_as_persistent(self, volume_uuid):
681 """
682 Mark volume as persistent if created with persistent=False.
683 :param str volume_uuid: The volume uuid to mark.
684 """
686 self._ensure_volume_exists(volume_uuid)
688 # Mark volume as persistent.
689 volume_properties = self._get_volume_properties(volume_uuid)
690 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
692 def destroy_volume(self, volume_uuid):
693 """
694 Destroy a volume.
695 :param str volume_uuid: The volume uuid to destroy.
696 """
698 self._ensure_volume_exists(volume_uuid)
699 self.ensure_volume_is_not_locked(volume_uuid)
701 # Mark volume as destroyed.
702 volume_properties = self._get_volume_properties(volume_uuid)
703 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
705 try:
706 self._volumes.remove(volume_uuid)
707 self._destroy_volume(volume_uuid)
708 except Exception as e:
709 raise LinstorVolumeManagerError(
710 str(e),
711 LinstorVolumeManagerError.ERR_VOLUME_DESTROY
712 )
714 def lock_volume(self, volume_uuid, locked=True):
715 """
716 Prevent modifications of the volume properties during
717 "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked
718 when used. This method is useful to attach/detach correctly a volume on
719 a slave. Without it the GC can rename a volume, in this case the old
720 volume path can be used by a slave...
721 :param str volume_uuid: The volume uuid to protect/unprotect.
722 :param bool locked: Lock/unlock the volume.
723 """
725 self._ensure_volume_exists(volume_uuid)
727 self._logger(
728 '{} volume {} as locked'.format(
729 'Mark' if locked else 'Unmark',
730 volume_uuid
731 )
732 )
734 volume_properties = self._get_volume_properties(volume_uuid)
735 if locked:
736 volume_properties[
737 self.PROP_IS_READONLY_TIMESTAMP
738 ] = str(time.time())
739 elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties:
740 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
742 def ensure_volume_is_not_locked(self, volume_uuid, timeout=None):
743 """
744 Ensure a volume is not locked. Wait if necessary.
745 :param str volume_uuid: The volume uuid to check.
746 :param int timeout: If the volume is always locked after the expiration
747 of the timeout, an exception is thrown.
748 """
749 return self.ensure_volume_list_is_not_locked([volume_uuid], timeout)
751 def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None):
752 checked = set()
753 for volume_uuid in volume_uuids:
754 if volume_uuid in self._volumes:
755 checked.add(volume_uuid)
757 if not checked:
758 return
760 waiting = False
762 volume_properties = self._get_kv_cache()
764 start = time.time()
765 while True:
766 # Can't delete in for loop, use a copy of the list.
767 remaining = checked.copy()
768 for volume_uuid in checked:
769 volume_properties.namespace = \
770 self._build_volume_namespace(volume_uuid)
771 timestamp = volume_properties.get(
772 self.PROP_IS_READONLY_TIMESTAMP
773 )
774 if timestamp is None:
775 remaining.remove(volume_uuid)
776 continue
778 now = time.time()
779 if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY:
780 self._logger(
781 'Remove readonly timestamp on {}'.format(volume_uuid)
782 )
783 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
784 remaining.remove(volume_uuid)
785 continue
787 if not waiting:
788 self._logger(
789 'Volume {} is locked, waiting...'.format(volume_uuid)
790 )
791 waiting = True
792 break
794 if not remaining:
795 break
796 checked = remaining
798 if timeout is not None and now - start > timeout:
799 raise LinstorVolumeManagerError(
800 'volume `{}` is locked and timeout has been reached'
801 .format(volume_uuid),
802 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
803 )
805 # We must wait to use the volume. After that we can modify it
806 # ONLY if the SR is locked to avoid bad reads on the slaves.
807 time.sleep(1)
808 volume_properties = self._create_kv_cache()
810 if waiting:
811 self._logger('No volume locked now!')
813 def remove_volume_if_diskless(self, volume_uuid):
814 """
815 Remove disless path from local node.
816 :param str volume_uuid: The volume uuid to remove.
817 """
819 self._ensure_volume_exists(volume_uuid)
821 volume_properties = self._get_volume_properties(volume_uuid)
822 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
824 node_name = socket.gethostname()
826 for resource in self._get_resource_cache().resources:
827 if resource.name == volume_name and resource.node_name == node_name:
828 if linstor.consts.FLAG_TIE_BREAKER in resource.flags:
829 return
830 break
832 result = self._linstor.resource_delete_if_diskless(
833 node_name=node_name, rsc_name=volume_name
834 )
835 if not linstor.Linstor.all_api_responses_no_error(result):
836 raise LinstorVolumeManagerError(
837 'Unable to delete diskless path of `{}` on node `{}`: {}'
838 .format(volume_name, node_name, ', '.join(
839 [str(x) for x in result]))
840 )
842 def introduce_volume(self, volume_uuid):
843 pass # TODO: Implement me.
845 def resize_volume(self, volume_uuid, new_size):
846 """
847 Resize a volume.
848 :param str volume_uuid: The volume uuid to resize.
849 :param int new_size: New size in B.
850 """
852 volume_name = self.get_volume_name(volume_uuid)
853 self.ensure_volume_is_not_locked(volume_uuid)
854 new_size = self.round_up_volume_size(new_size) // 1024
856 retry_count = 30
857 while True:
858 result = self._linstor.volume_dfn_modify(
859 rsc_name=volume_name,
860 volume_nr=0,
861 size=new_size
862 )
864 self._mark_resource_cache_as_dirty()
866 error_str = self._get_error_str(result)
867 if not error_str:
868 break
870 # After volume creation, DRBD volume can be unusable during many seconds.
871 # So we must retry the definition change if the device is not up to date.
872 # Often the case for thick provisioning.
873 if retry_count and error_str.find('non-UpToDate DRBD device') >= 0:
874 time.sleep(2)
875 retry_count -= 1
876 continue
878 raise LinstorVolumeManagerError(
879 'Could not resize volume `{}` from SR `{}`: {}'
880 .format(volume_uuid, self._group_name, error_str)
881 )
883 def get_volume_name(self, volume_uuid):
884 """
885 Get the name of a particular volume.
886 :param str volume_uuid: The volume uuid of the name to get.
887 :return: The volume name.
888 :rtype: str
889 """
891 self._ensure_volume_exists(volume_uuid)
892 volume_properties = self._get_volume_properties(volume_uuid)
893 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
894 if volume_name:
895 return volume_name
896 raise LinstorVolumeManagerError(
897 'Failed to get volume name of {}'.format(volume_uuid)
898 )
900 def get_volume_size(self, volume_uuid):
901 """
902 Get the size of a particular volume.
903 :param str volume_uuid: The volume uuid of the size to get.
904 :return: The volume size.
905 :rtype: int
906 """
908 volume_name = self.get_volume_name(volume_uuid)
909 dfns = self._linstor.resource_dfn_list_raise(
910 query_volume_definitions=True,
911 filter_by_resource_definitions=[volume_name]
912 ).resource_definitions
914 size = dfns[0].volume_definitions[0].size
915 if size < 0:
916 raise LinstorVolumeManagerError(
917 'Failed to get volume size of: {}'.format(volume_uuid)
918 )
919 return size * 1024
921 def set_auto_promote_timeout(self, volume_uuid, timeout):
922 """
923 Define the blocking time of open calls when a DRBD
924 is already open on another host.
925 :param str volume_uuid: The volume uuid to modify.
926 """
928 volume_name = self.get_volume_name(volume_uuid)
929 result = self._linstor.resource_dfn_modify(volume_name, {
930 'DrbdOptions/Resource/auto-promote-timeout': timeout
931 })
932 error_str = self._get_error_str(result)
933 if error_str:
934 raise LinstorVolumeManagerError(
935 'Could not change the auto promote timeout of `{}`: {}'
936 .format(volume_uuid, error_str)
937 )
939 def set_drbd_ha_properties(self, volume_name, enabled=True):
940 """
941 Set or not HA DRBD properties required by drbd-reactor and
942 by specific volumes.
943 :param str volume_name: The volume to modify.
944 :param bool enabled: Enable or disable HA properties.
945 """
947 properties = {
948 'DrbdOptions/auto-quorum': 'disabled',
949 'DrbdOptions/Resource/auto-promote': 'no',
950 'DrbdOptions/Resource/on-no-data-accessible': 'io-error',
951 'DrbdOptions/Resource/on-no-quorum': 'io-error',
952 'DrbdOptions/Resource/on-suspended-primary-outdated': 'force-secondary',
953 'DrbdOptions/Resource/quorum': 'majority'
954 }
955 if enabled:
956 result = self._linstor.resource_dfn_modify(volume_name, properties)
957 else:
958 result = self._linstor.resource_dfn_modify(volume_name, {}, delete_props=list(properties.keys()))
960 error_str = self._get_error_str(result)
961 if error_str:
962 raise LinstorVolumeManagerError(
963 'Could not modify HA DRBD properties on volume `{}`: {}'
964 .format(volume_name, error_str)
965 )
967 def get_volume_info(self, volume_uuid):
968 """
969 Get the volume info of a particular volume.
970 :param str volume_uuid: The volume uuid of the volume info to get.
971 :return: The volume info.
972 :rtype: VolumeInfo
973 """
975 volume_name = self.get_volume_name(volume_uuid)
976 return self._get_volumes_info()[volume_name]
978 def get_device_path(self, volume_uuid):
979 """
980 Get the dev path of a volume, create a diskless if necessary.
981 :param str volume_uuid: The volume uuid to get the dev path.
982 :return: The current device path of the volume.
983 :rtype: str
984 """
986 volume_name = self.get_volume_name(volume_uuid)
987 return self._find_device_path(volume_uuid, volume_name)
989 def get_volume_uuid_from_device_path(self, device_path):
990 """
991 Get the volume uuid of a device_path.
992 :param str device_path: The dev path to find the volume uuid.
993 :return: The volume uuid of the local device path.
994 :rtype: str
995 """
997 expected_volume_name = \
998 self.get_volume_name_from_device_path(device_path)
1000 volume_names = self.get_volumes_with_name()
1001 for volume_uuid, volume_name in volume_names.items():
1002 if volume_name == expected_volume_name:
1003 return volume_uuid
1005 raise LinstorVolumeManagerError(
1006 'Unable to find volume uuid from dev path `{}`'.format(device_path)
1007 )
1009 def get_volume_name_from_device_path(self, device_path):
1010 """
1011 Get the volume name of a device_path.
1012 :param str device_path: The dev path to find the volume name.
1013 :return: The volume name of the device path.
1014 :rtype: str
1015 """
1017 # Assume that we have a path like this:
1018 # - "/dev/drbd/by-res/xcp-volume-<UUID>/0"
1019 # - "../xcp-volume-<UUID>/0"
1020 if device_path.startswith(DRBD_BY_RES_PATH):
1021 prefix_len = len(DRBD_BY_RES_PATH)
1022 elif device_path.startswith('../'):
1023 prefix_len = 3
1024 else:
1025 raise LinstorVolumeManagerError('Unexpected device path: `{}`'.format(device_path))
1027 res_name_end = device_path.find('/', prefix_len)
1028 assert res_name_end != -1
1029 return device_path[prefix_len:res_name_end]
1031 def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False):
1032 """
1033 Change the uuid of a volume.
1034 :param str volume_uuid: The volume to modify.
1035 :param str new_volume_uuid: The new volume uuid to use.
1036 :param bool force: If true we doesn't check if volume_uuid is in the
1037 volume list. I.e. the volume can be marked as deleted but the volume
1038 can still be in the LINSTOR KV store if the deletion has failed.
1039 In specific cases like "undo" after a failed clone we must rename a bad
1040 deleted VDI.
1041 """
1043 self._logger(
1044 'Trying to update volume UUID {} to {}...'
1045 .format(volume_uuid, new_volume_uuid)
1046 )
1047 assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value'
1049 if not force:
1050 self._ensure_volume_exists(volume_uuid)
1051 self.ensure_volume_is_not_locked(volume_uuid)
1053 if new_volume_uuid in self._volumes:
1054 raise LinstorVolumeManagerError(
1055 'Volume `{}` already exists'.format(new_volume_uuid),
1056 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
1057 )
1059 volume_properties = self._get_volume_properties(volume_uuid)
1060 if volume_properties.get(self.PROP_UPDATING_UUID_SRC):
1061 raise LinstorVolumeManagerError(
1062 'Cannot update volume uuid {}: invalid state'
1063 .format(volume_uuid)
1064 )
1066 # 1. Copy in temp variables metadata and volume_name.
1067 metadata = volume_properties.get(self.PROP_METADATA)
1068 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
1070 # 2. Switch to new volume namespace.
1071 volume_properties.namespace = self._build_volume_namespace(
1072 new_volume_uuid
1073 )
1075 if list(volume_properties.items()):
1076 raise LinstorVolumeManagerError(
1077 'Cannot update volume uuid {} to {}: '
1078 .format(volume_uuid, new_volume_uuid) +
1079 'this last one is not empty'
1080 )
1082 try:
1083 # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC.
1084 # If we crash after that, the new properties can be removed
1085 # properly.
1086 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
1087 volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid
1089 # 4. Copy the properties.
1090 # Note: On new volumes, during clone for example, the metadata
1091 # may be missing. So we must test it to avoid this error:
1092 # "None has to be a str/unicode, but is <type 'NoneType'>"
1093 if metadata:
1094 volume_properties[self.PROP_METADATA] = metadata
1095 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1097 # 5. Ok!
1098 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
1099 except Exception as err:
1100 try:
1101 # Clear the new volume properties in case of failure.
1102 assert volume_properties.namespace == \
1103 self._build_volume_namespace(new_volume_uuid)
1104 volume_properties.clear()
1105 except Exception as e:
1106 self._logger(
1107 'Failed to clear new volume properties: {} (ignoring...)'
1108 .format(e)
1109 )
1110 raise LinstorVolumeManagerError(
1111 'Failed to copy volume properties: {}'.format(err)
1112 )
1114 try:
1115 # 6. After this point, it's ok we can remove the
1116 # PROP_UPDATING_UUID_SRC property and clear the src properties
1117 # without problems.
1119 # 7. Switch to old volume namespace.
1120 volume_properties.namespace = self._build_volume_namespace(
1121 volume_uuid
1122 )
1123 volume_properties.clear()
1125 # 8. Switch a last time to new volume namespace.
1126 volume_properties.namespace = self._build_volume_namespace(
1127 new_volume_uuid
1128 )
1129 volume_properties.pop(self.PROP_UPDATING_UUID_SRC)
1130 except Exception as e:
1131 raise LinstorVolumeManagerError(
1132 'Failed to clear volume properties '
1133 'after volume uuid update: {}'.format(e)
1134 )
1136 try:
1137 self._volumes.remove(volume_uuid)
1138 except KeyError:
1139 # Can be missing if we are building the volume set attr AND
1140 # we are processing a deleted resource.
1141 assert force
1143 self._volumes.add(new_volume_uuid)
1145 self._logger(
1146 'UUID update succeeded of {} to {}! (properties={})'
1147 .format(
1148 volume_uuid, new_volume_uuid,
1149 self._get_filtered_properties(volume_properties)
1150 )
1151 )
1153 def update_volume_name(self, volume_uuid, volume_name):
1154 """
1155 Change the volume name of a volume.
1156 :param str volume_uuid: The volume to modify.
1157 :param str volume_name: The volume_name to use.
1158 """
1160 self._ensure_volume_exists(volume_uuid)
1161 self.ensure_volume_is_not_locked(volume_uuid)
1162 if not volume_name.startswith(self.PREFIX_VOLUME):
1163 raise LinstorVolumeManagerError(
1164 'Volume name `{}` must be start with `{}`'
1165 .format(volume_name, self.PREFIX_VOLUME)
1166 )
1168 if volume_name not in self._fetch_resource_names():
1169 raise LinstorVolumeManagerError(
1170 'Volume `{}` doesn\'t exist'.format(volume_name)
1171 )
1173 volume_properties = self._get_volume_properties(volume_uuid)
1174 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1176 def get_usage_states(self, volume_uuid):
1177 """
1178 Check if a volume is currently used.
1179 :param str volume_uuid: The volume uuid to check.
1180 :return: A dictionnary that contains states.
1181 :rtype: dict(str, bool or None)
1182 """
1184 states = {}
1186 volume_name = self.get_volume_name(volume_uuid)
1187 for resource_state in self._linstor.resource_list_raise(
1188 filter_by_resources=[volume_name]
1189 ).resource_states:
1190 states[resource_state.node_name] = resource_state.in_use
1192 return states
1194 def get_volume_openers(self, volume_uuid):
1195 """
1196 Get openers of a volume.
1197 :param str volume_uuid: The volume uuid to monitor.
1198 :return: A dictionnary that contains openers.
1199 :rtype: dict(str, obj)
1200 """
1201 return get_all_volume_openers(self.get_volume_name(volume_uuid), '0')
1203 def get_volumes_with_name(self):
1204 """
1205 Give a volume dictionnary that contains names actually owned.
1206 :return: A volume/name dict.
1207 :rtype: dict(str, str)
1208 """
1209 return self._get_volumes_by_property(self.REG_VOLUME_NAME)
1211 def get_volumes_with_info(self):
1212 """
1213 Give a volume dictionnary that contains VolumeInfos.
1214 :return: A volume/VolumeInfo dict.
1215 :rtype: dict(str, VolumeInfo)
1216 """
1218 volumes = {}
1220 all_volume_info = self._get_volumes_info()
1221 volume_names = self.get_volumes_with_name()
1222 for volume_uuid, volume_name in volume_names.items():
1223 if volume_name:
1224 volume_info = all_volume_info.get(volume_name)
1225 if volume_info:
1226 volumes[volume_uuid] = volume_info
1227 continue
1229 # Well I suppose if this volume is not available,
1230 # LINSTOR has been used directly without using this API.
1231 volumes[volume_uuid] = self.VolumeInfo('')
1233 return volumes
1235 def get_volumes_with_metadata(self):
1236 """
1237 Give a volume dictionnary that contains metadata.
1238 :return: A volume/metadata dict.
1239 :rtype: dict(str, dict)
1240 """
1242 volumes = {}
1244 metadata = self._get_volumes_by_property(self.REG_METADATA)
1245 for volume_uuid, volume_metadata in metadata.items():
1246 if volume_metadata:
1247 volume_metadata = json.loads(volume_metadata)
1248 if isinstance(volume_metadata, dict):
1249 volumes[volume_uuid] = volume_metadata
1250 continue
1251 raise LinstorVolumeManagerError(
1252 'Expected dictionary in volume metadata: {}'
1253 .format(volume_uuid)
1254 )
1256 volumes[volume_uuid] = {}
1258 return volumes
1260 def get_volume_metadata(self, volume_uuid):
1261 """
1262 Get the metadata of a volume.
1263 :return: Dictionary that contains metadata.
1264 :rtype: dict
1265 """
1267 self._ensure_volume_exists(volume_uuid)
1268 volume_properties = self._get_volume_properties(volume_uuid)
1269 metadata = volume_properties.get(self.PROP_METADATA)
1270 if metadata:
1271 metadata = json.loads(metadata)
1272 if isinstance(metadata, dict):
1273 return metadata
1274 raise LinstorVolumeManagerError(
1275 'Expected dictionary in volume metadata: {}'
1276 .format(volume_uuid)
1277 )
1278 return {}
1280 def set_volume_metadata(self, volume_uuid, metadata):
1281 """
1282 Set the metadata of a volume.
1283 :param dict metadata: Dictionary that contains metadata.
1284 """
1286 self._ensure_volume_exists(volume_uuid)
1287 self.ensure_volume_is_not_locked(volume_uuid)
1289 assert isinstance(metadata, dict)
1290 volume_properties = self._get_volume_properties(volume_uuid)
1291 volume_properties[self.PROP_METADATA] = json.dumps(metadata)
1293 def update_volume_metadata(self, volume_uuid, metadata):
1294 """
1295 Update the metadata of a volume. It modify only the given keys.
1296 It doesn't remove unreferenced key instead of set_volume_metadata.
1297 :param dict metadata: Dictionary that contains metadata.
1298 """
1300 self._ensure_volume_exists(volume_uuid)
1301 self.ensure_volume_is_not_locked(volume_uuid)
1303 assert isinstance(metadata, dict)
1304 volume_properties = self._get_volume_properties(volume_uuid)
1306 current_metadata = json.loads(
1307 volume_properties.get(self.PROP_METADATA, '{}')
1308 )
1309 if not isinstance(metadata, dict):
1310 raise LinstorVolumeManagerError(
1311 'Expected dictionary in volume metadata: {}'
1312 .format(volume_uuid)
1313 )
1315 for key, value in metadata.items():
1316 current_metadata[key] = value
1317 volume_properties[self.PROP_METADATA] = json.dumps(current_metadata)
1319 def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True):
1320 """
1321 Clone a volume. Do not copy the data, this method creates a new volume
1322 with the same size.
1323 :param str volume_uuid: The volume to clone.
1324 :param str clone_uuid: The cloned volume.
1325 :param bool persistent: If false the volume will be unavailable
1326 on the next constructor call LinstorSR(...).
1327 :return: The current device path of the cloned volume.
1328 :rtype: str
1329 """
1331 volume_name = self.get_volume_name(volume_uuid)
1332 self.ensure_volume_is_not_locked(volume_uuid)
1334 # 1. Find ideal nodes + size to use.
1335 ideal_node_names, size = self._get_volume_node_names_and_size(
1336 volume_name
1337 )
1338 if size <= 0:
1339 raise LinstorVolumeManagerError(
1340 'Invalid size of {} for volume `{}`'.format(size, volume_name)
1341 )
1343 # 2. Create clone!
1344 return self.create_volume(clone_uuid, size, persistent)
1346 def remove_resourceless_volumes(self):
1347 """
1348 Remove all volumes without valid or non-empty name
1349 (i.e. without LINSTOR resource). It's different than
1350 LinstorVolumeManager constructor that takes a `repair` param that
1351 removes volumes with `PROP_NOT_EXISTS` to 1.
1352 """
1354 resource_names = self._fetch_resource_names()
1355 for volume_uuid, volume_name in self.get_volumes_with_name().items():
1356 if not volume_name or volume_name not in resource_names:
1357 # Don't force, we can be sure of what's happening.
1358 self.destroy_volume(volume_uuid)
1360 def destroy(self):
1361 """
1362 Destroy this SR. Object should not be used after that.
1363 :param bool force: Try to destroy volumes before if true.
1364 """
1366 # 1. Ensure volume list is empty. No cost.
1367 if self._volumes:
1368 raise LinstorVolumeManagerError(
1369 'Cannot destroy LINSTOR volume manager: '
1370 'It exists remaining volumes'
1371 )
1373 # 2. Fetch ALL resource names.
1374 # This list may therefore contain volumes created outside
1375 # the scope of the driver.
1376 resource_names = self._fetch_resource_names(ignore_deleted=False)
1377 try:
1378 resource_names.remove(DATABASE_VOLUME_NAME)
1379 except KeyError:
1380 # Really strange to reach that point.
1381 # Normally we always have the database volume in the list.
1382 pass
1384 # 3. Ensure the resource name list is entirely empty...
1385 if resource_names:
1386 raise LinstorVolumeManagerError(
1387 'Cannot destroy LINSTOR volume manager: '
1388 'It exists remaining volumes (created externally or being deleted)'
1389 )
1391 # 4. Destroying...
1392 controller_is_running = self._controller_is_running()
1393 uri = 'linstor://localhost'
1394 try:
1395 if controller_is_running:
1396 self._start_controller(start=False)
1398 # 4.1. Umount LINSTOR database.
1399 self._mount_database_volume(
1400 self.build_device_path(DATABASE_VOLUME_NAME),
1401 mount=False,
1402 force=True
1403 )
1405 # 4.2. Refresh instance.
1406 self._start_controller(start=True)
1407 self._linstor = self._create_linstor_instance(
1408 uri, keep_uri_unmodified=True
1409 )
1411 # 4.3. Destroy database volume.
1412 self._destroy_resource(DATABASE_VOLUME_NAME)
1414 # 4.4. Refresh linstor connection.
1415 # Without we get this error:
1416 # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.."
1417 # Because the deletion of the databse was not seen by Linstor for some reason.
1418 # It seems a simple refresh of the Linstor connection make it aware of the deletion.
1419 self._linstor.disconnect()
1420 self._linstor.connect()
1422 # 4.5. Destroy remaining drbd nodes on hosts.
1423 # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups.
1424 # It needs to be done locally by each host so we go through the linstor-manager plugin.
1425 # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with:
1426 # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it."
1427 session = util.timeout_call(5, util.get_localAPI_session)
1428 for host_ref in session.xenapi.host.get_all():
1429 try:
1430 response = session.xenapi.host.call_plugin(
1431 host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name}
1432 )
1433 except Exception as e:
1434 util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e))
1436 # 4.6. Destroy group and storage pools.
1437 self._destroy_resource_group(self._linstor, self._group_name)
1438 self._destroy_resource_group(self._linstor, self._ha_group_name)
1439 for pool in self._get_storage_pools(force=True):
1440 self._destroy_storage_pool(
1441 self._linstor, pool.name, pool.node_name
1442 )
1443 except Exception as e:
1444 self._start_controller(start=controller_is_running)
1445 raise e
1447 try:
1448 self._start_controller(start=False)
1449 for file in os.listdir(DATABASE_PATH):
1450 if file != 'lost+found':
1451 os.remove(DATABASE_PATH + '/' + file)
1452 except Exception as e:
1453 util.SMlog(
1454 'Ignoring failure after LINSTOR SR destruction: {}'
1455 .format(e)
1456 )
1458 def find_up_to_date_diskful_nodes(self, volume_uuid):
1459 """
1460 Find all nodes that contain a specific volume using diskful disks.
1461 The disk must be up to data to be used.
1462 :param str volume_uuid: The volume to use.
1463 :return: The available nodes.
1464 :rtype: tuple(set(str), str)
1465 """
1467 volume_name = self.get_volume_name(volume_uuid)
1469 in_use_by = None
1470 node_names = set()
1472 resource_states = filter(
1473 lambda resource_state: resource_state.name == volume_name,
1474 self._get_resource_cache().resource_states
1475 )
1477 for resource_state in resource_states:
1478 volume_state = resource_state.volume_states[0]
1479 if volume_state.disk_state == 'UpToDate':
1480 node_names.add(resource_state.node_name)
1481 if resource_state.in_use:
1482 in_use_by = resource_state.node_name
1484 return (node_names, in_use_by)
1486 def invalidate_resource_cache(self):
1487 """
1488 If resources are impacted by external commands like vhdutil,
1489 it's necessary to call this function to invalidate current resource
1490 cache.
1491 """
1492 self._mark_resource_cache_as_dirty()
1494 def has_node(self, node_name):
1495 """
1496 Check if a node exists in the LINSTOR database.
1497 :rtype: bool
1498 """
1499 result = self._linstor.node_list()
1500 error_str = self._get_error_str(result)
1501 if error_str:
1502 raise LinstorVolumeManagerError(
1503 'Failed to list nodes using `{}`: {}'
1504 .format(node_name, error_str)
1505 )
1506 return bool(result[0].node(node_name))
1508 def create_node(self, node_name, ip):
1509 """
1510 Create a new node in the LINSTOR database.
1511 :param str node_name: Node name to use.
1512 :param str ip: Host IP to communicate.
1513 """
1514 result = self._linstor.node_create(
1515 node_name,
1516 linstor.consts.VAL_NODE_TYPE_CMBD,
1517 ip
1518 )
1519 errors = self._filter_errors(result)
1520 if errors:
1521 error_str = self._get_error_str(errors)
1522 raise LinstorVolumeManagerError(
1523 'Failed to create node `{}`: {}'.format(node_name, error_str)
1524 )
1526 def destroy_node(self, node_name):
1527 """
1528 Destroy a node in the LINSTOR database.
1529 :param str node_name: Node name to remove.
1530 """
1531 result = self._linstor.node_delete(node_name)
1532 errors = self._filter_errors(result)
1533 if errors:
1534 error_str = self._get_error_str(errors)
1535 raise LinstorVolumeManagerError(
1536 'Failed to destroy node `{}`: {}'.format(node_name, error_str)
1537 )
1539 def create_node_interface(self, node_name, name, ip):
1540 """
1541 Create a new node interface in the LINSTOR database.
1542 :param str node_name: Node name of the interface to use.
1543 :param str name: Interface to create.
1544 :param str ip: IP of the interface.
1545 """
1546 result = self._linstor.netinterface_create(node_name, name, ip)
1547 errors = self._filter_errors(result)
1548 if errors:
1549 error_str = self._get_error_str(errors)
1550 raise LinstorVolumeManagerError(
1551 'Failed to create node interface on `{}`: {}'.format(node_name, error_str)
1552 )
1554 def destroy_node_interface(self, node_name, name):
1555 """
1556 Destroy a node interface in the LINSTOR database.
1557 :param str node_name: Node name of the interface to remove.
1558 :param str name: Interface to remove.
1559 """
1561 if name == 'default':
1562 raise LinstorVolumeManagerError(
1563 'Unable to delete the default interface of a node!'
1564 )
1566 result = self._linstor.netinterface_delete(node_name, name)
1567 errors = self._filter_errors(result)
1568 if errors:
1569 error_str = self._get_error_str(errors)
1570 raise LinstorVolumeManagerError(
1571 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str)
1572 )
1574 def modify_node_interface(self, node_name, name, ip):
1575 """
1576 Modify a node interface in the LINSTOR database. Create it if necessary.
1577 :param str node_name: Node name of the interface to use.
1578 :param str name: Interface to modify or create.
1579 :param str ip: IP of the interface.
1580 """
1581 result = self._linstor.netinterface_create(node_name, name, ip)
1582 errors = self._filter_errors(result)
1583 if not errors:
1584 return
1586 if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]):
1587 result = self._linstor.netinterface_modify(node_name, name, ip)
1588 errors = self._filter_errors(result)
1589 if not errors:
1590 return
1592 error_str = self._get_error_str(errors)
1593 raise LinstorVolumeManagerError(
1594 'Unable to modify interface on `{}`: {}'.format(node_name, error_str)
1595 )
1597 def list_node_interfaces(self, node_name):
1598 """
1599 List all node interfaces.
1600 :param str node_name: Node name to use to list interfaces.
1601 :rtype: list
1602 :
1603 """
1604 result = self._linstor.net_interface_list(node_name)
1605 if not result:
1606 raise LinstorVolumeManagerError(
1607 'Unable to list interfaces on `{}`: no list received'.format(node_name)
1608 )
1610 interfaces = {}
1611 for interface in result:
1612 interface = interface._rest_data
1613 interfaces[interface['name']] = {
1614 'address': interface['address'],
1615 'active': interface['is_active']
1616 }
1617 return interfaces
1619 def get_node_preferred_interface(self, node_name):
1620 """
1621 Get the preferred interface used by a node.
1622 :param str node_name: Node name of the interface to get.
1623 :rtype: str
1624 """
1625 try:
1626 nodes = self._linstor.node_list_raise([node_name]).nodes
1627 if nodes:
1628 properties = nodes[0].props
1629 return properties.get('PrefNic', 'default')
1630 return nodes
1631 except Exception as e:
1632 raise LinstorVolumeManagerError(
1633 'Failed to get preferred interface: `{}`'.format(e)
1634 )
1636 def set_node_preferred_interface(self, node_name, name):
1637 """
1638 Set the preferred interface to use on a node.
1639 :param str node_name: Node name of the interface.
1640 :param str name: Preferred interface to use.
1641 """
1642 result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name})
1643 errors = self._filter_errors(result)
1644 if errors:
1645 error_str = self._get_error_str(errors)
1646 raise LinstorVolumeManagerError(
1647 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str)
1648 )
1650 def get_nodes_info(self):
1651 """
1652 Get all nodes + statuses, used or not by the pool.
1653 :rtype: dict(str, dict)
1654 """
1655 try:
1656 nodes = {}
1657 for node in self._linstor.node_list_raise().nodes:
1658 nodes[node.name] = node.connection_status
1659 return nodes
1660 except Exception as e:
1661 raise LinstorVolumeManagerError(
1662 'Failed to get all nodes: `{}`'.format(e)
1663 )
1665 def get_storage_pools_info(self):
1666 """
1667 Give all storage pools of current group name.
1668 :rtype: dict(str, list)
1669 """
1670 storage_pools = {}
1671 for pool in self._get_storage_pools(force=True):
1672 if pool.node_name not in storage_pools:
1673 storage_pools[pool.node_name] = []
1675 size = -1
1676 capacity = -1
1678 space = pool.free_space
1679 if space:
1680 size = space.free_capacity
1681 if size < 0:
1682 size = -1
1683 else:
1684 size *= 1024
1685 capacity = space.total_capacity
1686 if capacity <= 0:
1687 capacity = -1
1688 else:
1689 capacity *= 1024
1691 storage_pools[pool.node_name].append({
1692 'name': pool.name,
1693 'linstor-uuid': pool.uuid,
1694 'free-size': size,
1695 'capacity': capacity
1696 })
1698 return storage_pools
1700 def get_resources_info(self):
1701 """
1702 Give all resources of current group name.
1703 :rtype: dict(str, list)
1704 """
1705 resources = {}
1706 resource_list = self._get_resource_cache()
1707 volume_names = self.get_volumes_with_name()
1708 for resource in resource_list.resources:
1709 if resource.name not in resources:
1710 resources[resource.name] = { 'nodes': {}, 'uuid': '' }
1711 resource_nodes = resources[resource.name]['nodes']
1713 resource_nodes[resource.node_name] = {
1714 'volumes': [],
1715 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags,
1716 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags
1717 }
1718 resource_volumes = resource_nodes[resource.node_name]['volumes']
1720 for volume in resource.volumes:
1721 # We ignore diskless pools of the form "DfltDisklessStorPool".
1722 if volume.storage_pool_name != self._group_name:
1723 continue
1725 usable_size = volume.usable_size
1726 if usable_size < 0:
1727 usable_size = -1
1728 else:
1729 usable_size *= 1024
1731 allocated_size = volume.allocated_size
1732 if allocated_size < 0:
1733 allocated_size = -1
1734 else:
1735 allocated_size *= 1024
1737 resource_volumes.append({
1738 'storage-pool-name': volume.storage_pool_name,
1739 'linstor-uuid': volume.uuid,
1740 'number': volume.number,
1741 'device-path': volume.device_path,
1742 'usable-size': usable_size,
1743 'allocated-size': allocated_size
1744 })
1746 for resource_state in resource_list.resource_states:
1747 resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name]
1748 resource['in-use'] = resource_state.in_use
1750 volumes = resource['volumes']
1751 for volume_state in resource_state.volume_states:
1752 volume = next((x for x in volumes if x['number'] == volume_state.number), None)
1753 if volume:
1754 volume['disk-state'] = volume_state.disk_state
1756 for volume_uuid, volume_name in volume_names.items():
1757 resource = resources.get(volume_name)
1758 if resource:
1759 resource['uuid'] = volume_uuid
1761 return resources
1763 def get_database_path(self):
1764 """
1765 Get the database path.
1766 :return: The current database path.
1767 :rtype: str
1768 """
1769 return self._request_database_path(self._linstor, activate=True)
1771 @classmethod
1772 def get_all_group_names(cls, base_name):
1773 """
1774 Get all group names. I.e. list of current group + HA.
1775 :param str base_name: The SR group_name to use.
1776 :return: List of group names.
1777 :rtype: list
1778 """
1779 return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)]
1781 @classmethod
1782 def create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__):
1783 """
1784 Create a new SR on the given nodes.
1785 :param str group_name: The SR group_name to use.
1786 :param set(str) ips: Node ips.
1787 :param int redundancy: How many copy of volumes should we store?
1788 :param bool thin_provisioning: Use thin or thick provisioning.
1789 :param function logger: Function to log messages.
1790 :return: A new LinstorSr instance.
1791 :rtype: LinstorSr
1792 """
1794 try:
1795 cls._start_controller(start=True)
1796 sr = cls._create_sr(group_name, ips, redundancy, thin_provisioning, logger)
1797 finally:
1798 # Controller must be stopped and volume unmounted because
1799 # it is the role of the drbd-reactor daemon to do the right
1800 # actions.
1801 cls._start_controller(start=False)
1802 cls._mount_volume(
1803 cls.build_device_path(DATABASE_VOLUME_NAME),
1804 DATABASE_PATH,
1805 mount=False
1806 )
1807 return sr
1809 @classmethod
1810 def _create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__):
1811 # 1. Check if SR already exists.
1812 uri = 'linstor://localhost'
1814 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1816 node_names = list(ips.keys())
1817 for node_name, ip in ips.items():
1818 while True:
1819 # Try to create node.
1820 result = lin.node_create(
1821 node_name,
1822 linstor.consts.VAL_NODE_TYPE_CMBD,
1823 ip
1824 )
1826 errors = cls._filter_errors(result)
1827 if cls._check_errors(
1828 errors, [linstor.consts.FAIL_EXISTS_NODE]
1829 ):
1830 # If it already exists, remove, then recreate.
1831 result = lin.node_delete(node_name)
1832 error_str = cls._get_error_str(result)
1833 if error_str:
1834 raise LinstorVolumeManagerError(
1835 'Failed to remove old node `{}`: {}'
1836 .format(node_name, error_str)
1837 )
1838 elif not errors:
1839 break # Created!
1840 else:
1841 raise LinstorVolumeManagerError(
1842 'Failed to create node `{}` with ip `{}`: {}'.format(
1843 node_name, ip, cls._get_error_str(errors)
1844 )
1845 )
1847 driver_pool_name = group_name
1848 base_group_name = group_name
1849 group_name = cls._build_group_name(group_name)
1850 storage_pool_name = group_name
1851 pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools
1852 if pools:
1853 existing_node_names = [pool.node_name for pool in pools]
1854 raise LinstorVolumeManagerError(
1855 'Unable to create SR `{}`. It already exists on node(s): {}'
1856 .format(group_name, existing_node_names)
1857 )
1859 if lin.resource_group_list_raise(
1860 cls.get_all_group_names(base_group_name)
1861 ).resource_groups:
1862 if not lin.resource_dfn_list_raise().resource_definitions:
1863 backup_path = cls._create_database_backup_path()
1864 logger(
1865 'Group name already exists `{}` without LVs. '
1866 'Ignoring and moving the config files in {}'.format(group_name, backup_path)
1867 )
1868 cls._move_files(DATABASE_PATH, backup_path)
1869 else:
1870 raise LinstorVolumeManagerError(
1871 'Unable to create SR `{}`: The group name already exists'
1872 .format(group_name)
1873 )
1875 if thin_provisioning:
1876 driver_pool_parts = driver_pool_name.split('/')
1877 if not len(driver_pool_parts) == 2:
1878 raise LinstorVolumeManagerError(
1879 'Invalid group name using thin provisioning. '
1880 'Expected format: \'VG/LV`\''
1881 )
1883 # 2. Create storage pool on each node + resource group.
1884 reg_volume_group_not_found = re.compile(
1885 ".*Volume group '.*' not found$"
1886 )
1888 i = 0
1889 try:
1890 # 2.a. Create storage pools.
1891 storage_pool_count = 0
1892 while i < len(node_names):
1893 node_name = node_names[i]
1895 result = lin.storage_pool_create(
1896 node_name=node_name,
1897 storage_pool_name=storage_pool_name,
1898 storage_driver='LVM_THIN' if thin_provisioning else 'LVM',
1899 driver_pool_name=driver_pool_name
1900 )
1902 errors = linstor.Linstor.filter_api_call_response_errors(
1903 result
1904 )
1905 if errors:
1906 if len(errors) == 1 and errors[0].is_error(
1907 linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR
1908 ) and reg_volume_group_not_found.match(errors[0].message):
1909 logger(
1910 'Volume group `{}` not found on `{}`. Ignoring...'
1911 .format(group_name, node_name)
1912 )
1913 cls._destroy_storage_pool(lin, storage_pool_name, node_name)
1914 else:
1915 error_str = cls._get_error_str(result)
1916 raise LinstorVolumeManagerError(
1917 'Could not create SP `{}` on node `{}`: {}'
1918 .format(group_name, node_name, error_str)
1919 )
1920 else:
1921 storage_pool_count += 1
1922 i += 1
1924 if not storage_pool_count:
1925 raise LinstorVolumeManagerError(
1926 'Unable to create SR `{}`: No VG group found'.format(
1927 group_name,
1928 )
1929 )
1931 # 2.b. Create resource groups.
1932 ha_group_name = cls._build_ha_group_name(base_group_name)
1933 cls._create_resource_group(
1934 lin,
1935 group_name,
1936 storage_pool_name,
1937 redundancy,
1938 True
1939 )
1940 cls._create_resource_group(
1941 lin,
1942 ha_group_name,
1943 storage_pool_name,
1944 3,
1945 True
1946 )
1948 # 3. Create the LINSTOR database volume and mount it.
1949 try:
1950 logger('Creating database volume...')
1951 volume_path = cls._create_database_volume(
1952 lin, ha_group_name, storage_pool_name, node_names, redundancy
1953 )
1954 except LinstorVolumeManagerError as e:
1955 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
1956 logger('Destroying database volume after creation fail...')
1957 cls._force_destroy_database_volume(lin, group_name)
1958 raise
1960 try:
1961 logger('Mounting database volume...')
1963 # First we must disable the controller to move safely the
1964 # LINSTOR config.
1965 cls._start_controller(start=False)
1967 cls._mount_database_volume(volume_path)
1968 except Exception as e:
1969 # Ensure we are connected because controller has been
1970 # restarted during mount call.
1971 logger('Destroying database volume after mount fail...')
1973 try:
1974 cls._start_controller(start=True)
1975 except Exception:
1976 pass
1978 lin = cls._create_linstor_instance(
1979 uri, keep_uri_unmodified=True
1980 )
1981 cls._force_destroy_database_volume(lin, group_name)
1982 raise e
1984 cls._start_controller(start=True)
1985 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1987 # 4. Remove storage pools/resource/volume group in the case of errors.
1988 except Exception as e:
1989 logger('Destroying resource group and storage pools after fail...')
1990 try:
1991 cls._destroy_resource_group(lin, group_name)
1992 cls._destroy_resource_group(lin, ha_group_name)
1993 except Exception as e2:
1994 logger('Failed to destroy resource group: {}'.format(e2))
1995 pass
1996 j = 0
1997 i = min(i, len(node_names) - 1)
1998 while j <= i:
1999 try:
2000 cls._destroy_storage_pool(lin, storage_pool_name, node_names[j])
2001 except Exception as e2:
2002 logger('Failed to destroy resource group: {}'.format(e2))
2003 pass
2004 j += 1
2005 raise e
2007 # 5. Return new instance.
2008 instance = cls.__new__(cls)
2009 instance._linstor = lin
2010 instance._logger = logger
2011 instance._redundancy = redundancy
2012 instance._base_group_name = base_group_name
2013 instance._group_name = group_name
2014 instance._volumes = set()
2015 instance._storage_pools_time = 0
2016 instance._kv_cache = instance._create_kv_cache()
2017 instance._resource_cache = None
2018 instance._resource_cache_dirty = True
2019 instance._volume_info_cache = None
2020 instance._volume_info_cache_dirty = True
2021 return instance
2023 @classmethod
2024 def build_device_path(cls, volume_name):
2025 """
2026 Build a device path given a volume name.
2027 :param str volume_name: The volume name to use.
2028 :return: A valid or not device path.
2029 :rtype: str
2030 """
2032 return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name)
2034 @classmethod
2035 def build_volume_name(cls, base_name):
2036 """
2037 Build a volume name given a base name (i.e. a UUID).
2038 :param str base_name: The volume name to use.
2039 :return: A valid or not device path.
2040 :rtype: str
2041 """
2042 return '{}{}'.format(cls.PREFIX_VOLUME, base_name)
2044 @classmethod
2045 def round_up_volume_size(cls, volume_size):
2046 """
2047 Align volume size on higher multiple of BLOCK_SIZE.
2048 :param int volume_size: The volume size to align.
2049 :return: An aligned volume size.
2050 :rtype: int
2051 """
2052 return round_up(volume_size, cls.BLOCK_SIZE)
2054 @classmethod
2055 def round_down_volume_size(cls, volume_size):
2056 """
2057 Align volume size on lower multiple of BLOCK_SIZE.
2058 :param int volume_size: The volume size to align.
2059 :return: An aligned volume size.
2060 :rtype: int
2061 """
2062 return round_down(volume_size, cls.BLOCK_SIZE)
2064 # --------------------------------------------------------------------------
2065 # Private helpers.
2066 # --------------------------------------------------------------------------
2068 def _create_kv_cache(self):
2069 self._kv_cache = self._create_linstor_kv('/')
2070 self._kv_cache_dirty = False
2071 return self._kv_cache
2073 def _get_kv_cache(self):
2074 if self._kv_cache_dirty:
2075 self._kv_cache = self._create_kv_cache()
2076 return self._kv_cache
2078 def _create_resource_cache(self):
2079 self._resource_cache = self._linstor.resource_list_raise()
2080 self._resource_cache_dirty = False
2081 return self._resource_cache
2083 def _get_resource_cache(self):
2084 if self._resource_cache_dirty:
2085 self._resource_cache = self._create_resource_cache()
2086 return self._resource_cache
2088 def _mark_resource_cache_as_dirty(self):
2089 self._resource_cache_dirty = True
2090 self._volume_info_cache_dirty = True
2092 # --------------------------------------------------------------------------
2094 def _ensure_volume_exists(self, volume_uuid):
2095 if volume_uuid not in self._volumes:
2096 raise LinstorVolumeManagerError(
2097 'volume `{}` doesn\'t exist'.format(volume_uuid),
2098 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
2099 )
2101 def _find_best_size_candidates(self):
2102 result = self._linstor.resource_group_qmvs(self._group_name)
2103 error_str = self._get_error_str(result)
2104 if error_str:
2105 raise LinstorVolumeManagerError(
2106 'Failed to get max volume size allowed of SR `{}`: {}'.format(
2107 self._group_name,
2108 error_str
2109 )
2110 )
2111 return result[0].candidates
2113 def _fetch_resource_names(self, ignore_deleted=True):
2114 resource_names = set()
2115 dfns = self._linstor.resource_dfn_list_raise().resource_definitions
2116 for dfn in dfns:
2117 if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and (
2118 ignore_deleted or
2119 linstor.consts.FLAG_DELETE not in dfn.flags
2120 ):
2121 resource_names.add(dfn.name)
2122 return resource_names
2124 def _get_volumes_info(self, volume_name=None):
2125 all_volume_info = {}
2127 if not self._volume_info_cache_dirty:
2128 return self._volume_info_cache
2130 def process_resource(resource):
2131 if resource.name not in all_volume_info:
2132 current = all_volume_info[resource.name] = self.VolumeInfo(
2133 resource.name
2134 )
2135 else:
2136 current = all_volume_info[resource.name]
2138 if linstor.consts.FLAG_DISKLESS not in resource.flags:
2139 current.diskful.append(resource.node_name)
2141 for volume in resource.volumes:
2142 # We ignore diskless pools of the form "DfltDisklessStorPool".
2143 if volume.storage_pool_name != self._group_name:
2144 continue
2145 # Only fetch first volume.
2146 if volume.number != 0:
2147 continue
2149 allocated_size = volume.allocated_size
2150 if allocated_size > current.allocated_size:
2151 current.allocated_size = allocated_size
2153 usable_size = volume.usable_size
2154 if usable_size > 0 and (
2155 usable_size < current.virtual_size or
2156 not current.virtual_size
2157 ):
2158 current.virtual_size = usable_size
2160 try:
2161 for resource in self._get_resource_cache().resources:
2162 process_resource(resource)
2163 for volume in all_volume_info.values():
2164 if volume.allocated_size <= 0:
2165 raise LinstorVolumeManagerError('Failed to get allocated size of `{}`'.format(resource.name))
2167 if volume.virtual_size <= 0:
2168 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(volume.name))
2170 volume.allocated_size *= 1024
2171 volume.virtual_size *= 1024
2172 except LinstorVolumeManagerError:
2173 self._mark_resource_cache_as_dirty()
2174 raise
2176 self._volume_info_cache_dirty = False
2177 self._volume_info_cache = all_volume_info
2179 return all_volume_info
2181 def _get_volume_node_names_and_size(self, volume_name):
2182 node_names = set()
2183 size = -1
2184 for resource in self._linstor.resource_list_raise(
2185 filter_by_resources=[volume_name]
2186 ).resources:
2187 for volume in resource.volumes:
2188 # We ignore diskless pools of the form "DfltDisklessStorPool".
2189 if volume.storage_pool_name != self._group_name:
2190 continue
2192 node_names.add(resource.node_name)
2194 usable_size = volume.usable_size
2195 if usable_size <= 0:
2196 continue
2198 if size < 0:
2199 size = usable_size
2200 else:
2201 size = min(size, usable_size)
2203 if size <= 0:
2204 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(resource.name))
2206 return (node_names, size * 1024)
2208 def _compute_size(self, attr):
2209 capacity = 0
2210 for pool in self._get_storage_pools(force=True):
2211 space = pool.free_space
2212 if space:
2213 size = getattr(space, attr)
2214 if size < 0:
2215 raise LinstorVolumeManagerError(
2216 'Failed to get pool {} attr of `{}`'
2217 .format(attr, pool.node_name)
2218 )
2219 capacity += size
2220 return capacity * 1024
2222 def _get_node_names(self):
2223 node_names = set()
2224 for pool in self._get_storage_pools():
2225 node_names.add(pool.node_name)
2226 return node_names
2228 def _get_storage_pools(self, force=False):
2229 cur_time = time.time()
2230 elsaped_time = cur_time - self._storage_pools_time
2232 if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL:
2233 self._storage_pools = self._linstor.storage_pool_list_raise(
2234 filter_by_stor_pools=[self._group_name]
2235 ).storage_pools
2236 self._storage_pools_time = time.time()
2238 return self._storage_pools
2240 def _create_volume(
2241 self,
2242 volume_uuid,
2243 volume_name,
2244 size,
2245 place_resources,
2246 high_availability
2247 ):
2248 size = self.round_up_volume_size(size)
2249 self._mark_resource_cache_as_dirty()
2251 group_name = self._ha_group_name if high_availability else self._group_name
2252 def create_definition():
2253 first_attempt = True
2254 while True:
2255 try:
2256 self._check_volume_creation_errors(
2257 self._linstor.resource_group_spawn(
2258 rsc_grp_name=group_name,
2259 rsc_dfn_name=volume_name,
2260 vlm_sizes=['{}B'.format(size)],
2261 definitions_only=True
2262 ),
2263 volume_uuid,
2264 self._group_name
2265 )
2266 break
2267 except LinstorVolumeManagerError as e:
2268 if (
2269 not first_attempt or
2270 not high_availability or
2271 e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
2272 ):
2273 raise
2275 first_attempt = False
2276 self._create_resource_group(
2277 self._linstor,
2278 group_name,
2279 self._group_name,
2280 3,
2281 True
2282 )
2284 self._configure_volume_peer_slots(self._linstor, volume_name)
2286 def clean():
2287 try:
2288 self._destroy_volume(volume_uuid, force=True, preserve_properties=True)
2289 except Exception as e:
2290 self._logger(
2291 'Unable to destroy volume {} after creation fail: {}'
2292 .format(volume_uuid, e)
2293 )
2295 def create():
2296 try:
2297 create_definition()
2298 if place_resources:
2299 # Basic case when we use the default redundancy of the group.
2300 self._check_volume_creation_errors(
2301 self._linstor.resource_auto_place(
2302 rsc_name=volume_name,
2303 place_count=self._redundancy,
2304 diskless_on_remaining=False
2305 ),
2306 volume_uuid,
2307 self._group_name
2308 )
2309 except LinstorVolumeManagerError as e:
2310 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2311 clean()
2312 raise
2313 except Exception:
2314 clean()
2315 raise
2317 util.retry(create, maxretry=5)
2319 def _create_volume_with_properties(
2320 self,
2321 volume_uuid,
2322 volume_name,
2323 size,
2324 place_resources,
2325 high_availability
2326 ):
2327 if self.check_volume_exists(volume_uuid):
2328 raise LinstorVolumeManagerError(
2329 'Could not create volume `{}` from SR `{}`, it already exists'
2330 .format(volume_uuid, self._group_name) + ' in properties',
2331 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
2332 )
2334 if volume_name in self._fetch_resource_names():
2335 raise LinstorVolumeManagerError(
2336 'Could not create volume `{}` from SR `{}`, '.format(
2337 volume_uuid, self._group_name
2338 ) + 'resource of the same name already exists in LINSTOR'
2339 )
2341 # I am paranoid.
2342 volume_properties = self._get_volume_properties(volume_uuid)
2343 if (volume_properties.get(self.PROP_NOT_EXISTS) is not None):
2344 raise LinstorVolumeManagerError(
2345 'Could not create volume `{}`, '.format(volume_uuid) +
2346 'properties already exist'
2347 )
2349 try:
2350 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING
2351 volume_properties[self.PROP_VOLUME_NAME] = volume_name
2353 self._create_volume(
2354 volume_uuid,
2355 volume_name,
2356 size,
2357 place_resources,
2358 high_availability
2359 )
2361 assert volume_properties.namespace == \
2362 self._build_volume_namespace(volume_uuid)
2363 return volume_properties
2364 except LinstorVolumeManagerError as e:
2365 # Do not destroy existing resource!
2366 # In theory we can't get this error because we check this event
2367 # before the `self._create_volume` case.
2368 # It can only happen if the same volume uuid is used in the same
2369 # call in another host.
2370 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2371 self._destroy_volume(volume_uuid, force=True)
2372 raise
2374 def _find_device_path(self, volume_uuid, volume_name):
2375 current_device_path = self._request_device_path(
2376 volume_uuid, volume_name, activate=True
2377 )
2379 # We use realpath here to get the /dev/drbd<id> path instead of
2380 # /dev/drbd/by-res/<resource_name>.
2381 expected_device_path = self.build_device_path(volume_name)
2382 util.wait_for_path(expected_device_path, 5)
2384 device_realpath = os.path.realpath(expected_device_path)
2385 if current_device_path != device_realpath:
2386 raise LinstorVolumeManagerError(
2387 'Invalid path, current={}, expected={} (realpath={})'
2388 .format(
2389 current_device_path,
2390 expected_device_path,
2391 device_realpath
2392 )
2393 )
2394 return expected_device_path
2396 def _request_device_path(self, volume_uuid, volume_name, activate=False):
2397 node_name = socket.gethostname()
2399 resource = next(filter(
2400 lambda resource: resource.node_name == node_name and
2401 resource.name == volume_name,
2402 self._get_resource_cache().resources
2403 ), None)
2405 if not resource:
2406 if activate:
2407 self._mark_resource_cache_as_dirty()
2408 self._activate_device_path(
2409 self._linstor, node_name, volume_name
2410 )
2411 return self._request_device_path(volume_uuid, volume_name)
2412 raise LinstorVolumeManagerError(
2413 'Unable to get dev path for `{}`, no resource found but definition "seems" to exist'
2414 .format(volume_uuid)
2415 )
2417 # Contains a path of the /dev/drbd<id> form.
2418 device_path = resource.volumes[0].device_path
2419 if not device_path:
2420 raise LinstorVolumeManagerError('Empty dev path for `{}`!'.format(volume_uuid))
2421 return device_path
2423 def _destroy_resource(self, resource_name, force=False):
2424 result = self._linstor.resource_dfn_delete(resource_name)
2425 error_str = self._get_error_str(result)
2426 if not error_str:
2427 self._mark_resource_cache_as_dirty()
2428 return
2430 if not force:
2431 self._mark_resource_cache_as_dirty()
2432 raise LinstorVolumeManagerError(
2433 'Could not destroy resource `{}` from SR `{}`: {}'
2434 .format(resource_name, self._group_name, error_str)
2435 )
2437 # If force is used, ensure there is no opener.
2438 all_openers = get_all_volume_openers(resource_name, '0')
2439 for openers in all_openers.values():
2440 if openers:
2441 self._mark_resource_cache_as_dirty()
2442 raise LinstorVolumeManagerError(
2443 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)'
2444 .format(resource_name, self._group_name, error_str, all_openers)
2445 )
2447 # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue?
2448 resource_states = filter(
2449 lambda resource_state: resource_state.name == resource_name,
2450 self._get_resource_cache().resource_states
2451 )
2453 # Mark only after computation of states.
2454 self._mark_resource_cache_as_dirty()
2456 for resource_state in resource_states:
2457 volume_state = resource_state.volume_states[0]
2458 if resource_state.in_use:
2459 demote_drbd_resource(resource_state.node_name, resource_name)
2460 break
2461 self._destroy_resource(resource_name)
2463 def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False):
2464 volume_properties = self._get_volume_properties(volume_uuid)
2465 try:
2466 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
2467 if volume_name in self._fetch_resource_names():
2468 self._destroy_resource(volume_name, force)
2470 # Assume this call is atomic.
2471 if not preserve_properties:
2472 volume_properties.clear()
2473 except Exception as e:
2474 raise LinstorVolumeManagerError(
2475 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e)
2476 )
2478 def _build_volumes(self, repair):
2479 properties = self._kv_cache
2480 resource_names = self._fetch_resource_names()
2482 self._volumes = set()
2484 updating_uuid_volumes = self._get_volumes_by_property(
2485 self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False
2486 )
2487 if updating_uuid_volumes and not repair:
2488 raise LinstorVolumeManagerError(
2489 'Cannot build LINSTOR volume list: '
2490 'It exists invalid "updating uuid volumes", repair is required'
2491 )
2493 existing_volumes = self._get_volumes_by_property(
2494 self.REG_NOT_EXISTS, ignore_inexisting_volumes=False
2495 )
2496 for volume_uuid, not_exists in existing_volumes.items():
2497 properties.namespace = self._build_volume_namespace(volume_uuid)
2499 src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC)
2500 if src_uuid:
2501 self._logger(
2502 'Ignoring volume during manager initialization with prop '
2503 ' PROP_UPDATING_UUID_SRC: {} (properties={})'
2504 .format(
2505 volume_uuid,
2506 self._get_filtered_properties(properties)
2507 )
2508 )
2509 continue
2511 # Insert volume in list if the volume exists. Or if the volume
2512 # is being created and a slave wants to use it (repair = False).
2513 #
2514 # If we are on the master and if repair is True and state is
2515 # Creating, it's probably a bug or crash: the creation process has
2516 # been stopped.
2517 if not_exists == self.STATE_EXISTS or (
2518 not repair and not_exists == self.STATE_CREATING
2519 ):
2520 self._volumes.add(volume_uuid)
2521 continue
2523 if not repair:
2524 self._logger(
2525 'Ignoring bad volume during manager initialization: {} '
2526 '(properties={})'.format(
2527 volume_uuid,
2528 self._get_filtered_properties(properties)
2529 )
2530 )
2531 continue
2533 # Remove bad volume.
2534 try:
2535 self._logger(
2536 'Removing bad volume during manager initialization: {} '
2537 '(properties={})'.format(
2538 volume_uuid,
2539 self._get_filtered_properties(properties)
2540 )
2541 )
2542 volume_name = properties.get(self.PROP_VOLUME_NAME)
2544 # Little optimization, don't call `self._destroy_volume`,
2545 # we already have resource name list.
2546 if volume_name in resource_names:
2547 self._destroy_resource(volume_name, force=True)
2549 # Assume this call is atomic.
2550 properties.clear()
2551 except Exception as e:
2552 # Do not raise, we don't want to block user action.
2553 self._logger(
2554 'Cannot clean volume {}: {}'.format(volume_uuid, e)
2555 )
2557 # The volume can't be removed, maybe it's still in use,
2558 # in this case rename it with the "DELETED_" prefix.
2559 # This prefix is mandatory if it exists a snap transaction to
2560 # rollback because the original VDI UUID can try to be renamed
2561 # with the UUID we are trying to delete...
2562 if not volume_uuid.startswith('DELETED_'):
2563 self.update_volume_uuid(
2564 volume_uuid, 'DELETED_' + volume_uuid, force=True
2565 )
2567 for dest_uuid, src_uuid in updating_uuid_volumes.items():
2568 dest_namespace = self._build_volume_namespace(dest_uuid)
2570 properties.namespace = dest_namespace
2571 if int(properties.get(self.PROP_NOT_EXISTS)):
2572 properties.clear()
2573 continue
2575 properties.namespace = self._build_volume_namespace(src_uuid)
2576 properties.clear()
2578 properties.namespace = dest_namespace
2579 properties.pop(self.PROP_UPDATING_UUID_SRC)
2581 if src_uuid in self._volumes:
2582 self._volumes.remove(src_uuid)
2583 self._volumes.add(dest_uuid)
2585 def _get_sr_properties(self):
2586 return self._create_linstor_kv(self._build_sr_namespace())
2588 def _get_volumes_by_property(
2589 self, reg_prop, ignore_inexisting_volumes=True
2590 ):
2591 base_properties = self._get_kv_cache()
2592 base_properties.namespace = self._build_volume_namespace()
2594 volume_properties = {}
2595 for volume_uuid in self._volumes:
2596 volume_properties[volume_uuid] = ''
2598 for key, value in base_properties.items():
2599 res = reg_prop.match(key)
2600 if res:
2601 volume_uuid = res.groups()[0]
2602 if not ignore_inexisting_volumes or \
2603 volume_uuid in self._volumes:
2604 volume_properties[volume_uuid] = value
2606 return volume_properties
2608 def _create_linstor_kv(self, namespace):
2609 return linstor.KV(
2610 self._group_name,
2611 uri=self._linstor.controller_host(),
2612 namespace=namespace
2613 )
2615 def _get_volume_properties(self, volume_uuid):
2616 properties = self._get_kv_cache()
2617 properties.namespace = self._build_volume_namespace(volume_uuid)
2618 return properties
2620 @classmethod
2621 def _build_sr_namespace(cls):
2622 return '/{}/'.format(cls.NAMESPACE_SR)
2624 @classmethod
2625 def _build_volume_namespace(cls, volume_uuid=None):
2626 # Return a path to all volumes if `volume_uuid` is not given.
2627 if volume_uuid is None:
2628 return '/{}/'.format(cls.NAMESPACE_VOLUME)
2629 return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid)
2631 @classmethod
2632 def _get_error_str(cls, result):
2633 return ', '.join([
2634 err.message for err in cls._filter_errors(result)
2635 ])
2637 @classmethod
2638 def _create_linstor_instance(
2639 cls, uri, keep_uri_unmodified=False, attempt_count=30
2640 ):
2641 retry = False
2643 def connect(uri):
2644 if not uri:
2645 uri = get_controller_uri()
2646 if not uri:
2647 raise LinstorVolumeManagerError(
2648 'Unable to find controller uri...'
2649 )
2650 instance = linstor.Linstor(uri, keep_alive=True)
2651 instance.connect()
2652 return instance
2654 try:
2655 return connect(uri)
2656 except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError):
2657 pass
2659 if not keep_uri_unmodified:
2660 uri = None
2662 return util.retry(
2663 lambda: connect(uri),
2664 maxretry=attempt_count,
2665 period=1,
2666 exceptions=[
2667 linstor.errors.LinstorNetworkError,
2668 LinstorVolumeManagerError
2669 ]
2670 )
2672 @classmethod
2673 def _configure_volume_peer_slots(cls, lin, volume_name):
2674 result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3)
2675 error_str = cls._get_error_str(result)
2676 if error_str:
2677 raise LinstorVolumeManagerError(
2678 'Could not configure volume peer slots of {}: {}'
2679 .format(volume_name, error_str)
2680 )
2682 @classmethod
2683 def _activate_device_path(cls, lin, node_name, volume_name):
2684 result = lin.resource_make_available(node_name, volume_name, diskful=False)
2685 if linstor.Linstor.all_api_responses_no_error(result):
2686 return
2687 errors = linstor.Linstor.filter_api_call_response_errors(result)
2688 if len(errors) == 1 and errors[0].is_error(
2689 linstor.consts.FAIL_EXISTS_RSC
2690 ):
2691 return
2693 raise LinstorVolumeManagerError(
2694 'Unable to activate device path of `{}` on node `{}`: {}'
2695 .format(volume_name, node_name, ', '.join(
2696 [str(x) for x in result]))
2697 )
2699 @classmethod
2700 def _request_database_path(cls, lin, activate=False):
2701 node_name = socket.gethostname()
2703 try:
2704 resource = next(filter(
2705 lambda resource: resource.node_name == node_name and
2706 resource.name == DATABASE_VOLUME_NAME,
2707 lin.resource_list_raise().resources
2708 ), None)
2709 except Exception as e:
2710 raise LinstorVolumeManagerError(
2711 'Unable to fetch database resource: {}'
2712 .format(e)
2713 )
2715 if not resource:
2716 if activate:
2717 cls._activate_device_path(
2718 lin, node_name, DATABASE_VOLUME_NAME
2719 )
2720 return cls._request_database_path(
2721 DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME
2722 )
2723 raise LinstorVolumeManagerError(
2724 'Empty dev path for `{}`, but definition "seems" to exist'
2725 .format(DATABASE_PATH)
2726 )
2727 # Contains a path of the /dev/drbd<id> form.
2728 return resource.volumes[0].device_path
2730 @classmethod
2731 def _create_database_volume(
2732 cls, lin, group_name, storage_pool_name, node_names, redundancy
2733 ):
2734 try:
2735 dfns = lin.resource_dfn_list_raise().resource_definitions
2736 except Exception as e:
2737 raise LinstorVolumeManagerError(
2738 'Unable to get definitions during database creation: {}'
2739 .format(e)
2740 )
2742 if dfns:
2743 raise LinstorVolumeManagerError(
2744 'Could not create volume `{}` from SR `{}`, '.format(
2745 DATABASE_VOLUME_NAME, group_name
2746 ) + 'LINSTOR volume list must be empty.'
2747 )
2749 # Workaround to use thin lvm. Without this line an error is returned:
2750 # "Not enough available nodes"
2751 # I don't understand why but this command protect against this bug.
2752 try:
2753 pools = lin.storage_pool_list_raise(
2754 filter_by_stor_pools=[storage_pool_name]
2755 )
2756 except Exception as e:
2757 raise LinstorVolumeManagerError(
2758 'Failed to get storage pool list before database creation: {}'
2759 .format(e)
2760 )
2762 # Ensure we have a correct list of storage pools.
2763 assert pools.storage_pools # We must have at least one storage pool!
2764 nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools))
2765 for node_name in nodes_with_pool:
2766 assert node_name in node_names
2767 util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool))
2769 # Create the database definition.
2770 size = cls.round_up_volume_size(DATABASE_SIZE)
2771 cls._check_volume_creation_errors(lin.resource_group_spawn(
2772 rsc_grp_name=group_name,
2773 rsc_dfn_name=DATABASE_VOLUME_NAME,
2774 vlm_sizes=['{}B'.format(size)],
2775 definitions_only=True
2776 ), DATABASE_VOLUME_NAME, group_name)
2777 cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME)
2779 # Create real resources on the first nodes.
2780 resources = []
2782 diskful_nodes = []
2783 diskless_nodes = []
2784 for node_name in node_names:
2785 if node_name in nodes_with_pool:
2786 diskful_nodes.append(node_name)
2787 else:
2788 diskless_nodes.append(node_name)
2790 assert diskful_nodes
2791 for node_name in diskful_nodes[:redundancy]:
2792 util.SMlog('Create database diskful on {}'.format(node_name))
2793 resources.append(linstor.ResourceData(
2794 node_name=node_name,
2795 rsc_name=DATABASE_VOLUME_NAME,
2796 storage_pool=storage_pool_name
2797 ))
2798 # Create diskless resources on the remaining set.
2799 for node_name in diskful_nodes[redundancy:] + diskless_nodes:
2800 util.SMlog('Create database diskless on {}'.format(node_name))
2801 resources.append(linstor.ResourceData(
2802 node_name=node_name,
2803 rsc_name=DATABASE_VOLUME_NAME,
2804 diskless=True
2805 ))
2807 result = lin.resource_create(resources)
2808 error_str = cls._get_error_str(result)
2809 if error_str:
2810 raise LinstorVolumeManagerError(
2811 'Could not create database volume from SR `{}`: {}'.format(
2812 group_name, error_str
2813 )
2814 )
2816 # Create database and ensure path exists locally and
2817 # on replicated devices.
2818 current_device_path = cls._request_database_path(lin, activate=True)
2820 # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be
2821 # plugged.
2822 for node_name in node_names:
2823 cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME)
2825 # We use realpath here to get the /dev/drbd<id> path instead of
2826 # /dev/drbd/by-res/<resource_name>.
2827 expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME)
2828 util.wait_for_path(expected_device_path, 5)
2830 device_realpath = os.path.realpath(expected_device_path)
2831 if current_device_path != device_realpath:
2832 raise LinstorVolumeManagerError(
2833 'Invalid path, current={}, expected={} (realpath={})'
2834 .format(
2835 current_device_path,
2836 expected_device_path,
2837 device_realpath
2838 )
2839 )
2841 try:
2842 util.retry(
2843 lambda: util.pread2([DATABASE_MKFS, expected_device_path]),
2844 maxretry=5
2845 )
2846 except Exception as e:
2847 raise LinstorVolumeManagerError(
2848 'Failed to execute {} on database volume: {}'
2849 .format(DATABASE_MKFS, e)
2850 )
2852 return expected_device_path
2854 @classmethod
2855 def _destroy_database_volume(cls, lin, group_name):
2856 error_str = cls._get_error_str(
2857 lin.resource_dfn_delete(DATABASE_VOLUME_NAME)
2858 )
2859 if error_str:
2860 raise LinstorVolumeManagerError(
2861 'Could not destroy resource `{}` from SR `{}`: {}'
2862 .format(DATABASE_VOLUME_NAME, group_name, error_str)
2863 )
2865 @classmethod
2866 def _mount_database_volume(cls, volume_path, mount=True, force=False):
2867 try:
2868 # 1. Create a backup config folder.
2869 database_not_empty = bool(os.listdir(DATABASE_PATH))
2870 backup_path = cls._create_database_backup_path()
2872 # 2. Move the config in the mounted volume.
2873 if database_not_empty:
2874 cls._move_files(DATABASE_PATH, backup_path)
2876 cls._mount_volume(volume_path, DATABASE_PATH, mount)
2878 if database_not_empty:
2879 cls._move_files(backup_path, DATABASE_PATH, force)
2881 # 3. Remove useless backup directory.
2882 try:
2883 os.rmdir(backup_path)
2884 except Exception as e:
2885 raise LinstorVolumeManagerError(
2886 'Failed to remove backup path {} of LINSTOR config: {}'
2887 .format(backup_path, e)
2888 )
2889 except Exception as e:
2890 def force_exec(fn):
2891 try:
2892 fn()
2893 except Exception:
2894 pass
2896 if mount == cls._is_mounted(DATABASE_PATH):
2897 force_exec(lambda: cls._move_files(
2898 DATABASE_PATH, backup_path
2899 ))
2900 force_exec(lambda: cls._mount_volume(
2901 volume_path, DATABASE_PATH, not mount
2902 ))
2904 if mount != cls._is_mounted(DATABASE_PATH):
2905 force_exec(lambda: cls._move_files(
2906 backup_path, DATABASE_PATH
2907 ))
2909 force_exec(lambda: os.rmdir(backup_path))
2910 raise e
2912 @classmethod
2913 def _force_destroy_database_volume(cls, lin, group_name):
2914 try:
2915 cls._destroy_database_volume(lin, group_name)
2916 except Exception:
2917 pass
2919 @classmethod
2920 def _destroy_storage_pool(cls, lin, group_name, node_name):
2921 def destroy():
2922 result = lin.storage_pool_delete(node_name, group_name)
2923 errors = cls._filter_errors(result)
2924 if cls._check_errors(errors, [
2925 linstor.consts.FAIL_NOT_FOUND_STOR_POOL,
2926 linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN
2927 ]):
2928 return
2930 if errors:
2931 raise LinstorVolumeManagerError(
2932 'Failed to destroy SP `{}` on node `{}`: {}'.format(
2933 group_name,
2934 node_name,
2935 cls._get_error_str(errors)
2936 )
2937 )
2939 # We must retry to avoid errors like:
2940 # "can not be deleted as volumes / snapshot-volumes are still using it"
2941 # after LINSTOR database volume destruction.
2942 return util.retry(destroy, maxretry=10)
2944 @classmethod
2945 def _create_resource_group(
2946 cls,
2947 lin,
2948 group_name,
2949 storage_pool_name,
2950 redundancy,
2951 destroy_old_group
2952 ):
2953 rg_creation_attempt = 0
2954 while True:
2955 result = lin.resource_group_create(
2956 name=group_name,
2957 place_count=redundancy,
2958 storage_pool=storage_pool_name,
2959 diskless_on_remaining=False
2960 )
2961 error_str = cls._get_error_str(result)
2962 if not error_str:
2963 break
2965 errors = cls._filter_errors(result)
2966 if destroy_old_group and cls._check_errors(errors, [
2967 linstor.consts.FAIL_EXISTS_RSC_GRP
2968 ]):
2969 rg_creation_attempt += 1
2970 if rg_creation_attempt < 2:
2971 try:
2972 cls._destroy_resource_group(lin, group_name)
2973 except Exception as e:
2974 error_str = 'Failed to destroy old and empty RG: {}'.format(e)
2975 else:
2976 continue
2978 raise LinstorVolumeManagerError(
2979 'Could not create RG `{}`: {}'.format(
2980 group_name, error_str
2981 )
2982 )
2984 result = lin.volume_group_create(group_name)
2985 error_str = cls._get_error_str(result)
2986 if error_str:
2987 raise LinstorVolumeManagerError(
2988 'Could not create VG `{}`: {}'.format(
2989 group_name, error_str
2990 )
2991 )
2993 @classmethod
2994 def _destroy_resource_group(cls, lin, group_name):
2995 def destroy():
2996 result = lin.resource_group_delete(group_name)
2997 errors = cls._filter_errors(result)
2998 if cls._check_errors(errors, [
2999 linstor.consts.FAIL_NOT_FOUND_RSC_GRP
3000 ]):
3001 return
3003 if errors:
3004 raise LinstorVolumeManagerError(
3005 'Failed to destroy RG `{}`: {}'
3006 .format(group_name, cls._get_error_str(errors))
3007 )
3009 return util.retry(destroy, maxretry=10)
3011 @classmethod
3012 def _build_group_name(cls, base_name):
3013 # If thin provisioning is used we have a path like this:
3014 # `VG/LV`. "/" is not accepted by LINSTOR.
3015 return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_'))
3017 # Used to store important data in a HA context,
3018 # i.e. a replication count of 3.
3019 @classmethod
3020 def _build_ha_group_name(cls, base_name):
3021 return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_'))
3023 @classmethod
3024 def _check_volume_creation_errors(cls, result, volume_uuid, group_name):
3025 errors = cls._filter_errors(result)
3026 if cls._check_errors(errors, [
3027 linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN
3028 ]):
3029 raise LinstorVolumeManagerError(
3030 'Failed to create volume `{}` from SR `{}`, it already exists'
3031 .format(volume_uuid, group_name),
3032 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
3033 )
3035 if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]):
3036 raise LinstorVolumeManagerError(
3037 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist'
3038 .format(volume_uuid, group_name),
3039 LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
3040 )
3042 if errors:
3043 raise LinstorVolumeManagerError(
3044 'Failed to create volume `{}` from SR `{}`: {}'.format(
3045 volume_uuid,
3046 group_name,
3047 cls._get_error_str(errors)
3048 )
3049 )
3051 @classmethod
3052 def _move_files(cls, src_dir, dest_dir, force=False):
3053 def listdir(dir):
3054 ignored = ['lost+found']
3055 return [file for file in os.listdir(dir) if file not in ignored]
3057 try:
3058 if not force:
3059 files = listdir(dest_dir)
3060 if files:
3061 raise LinstorVolumeManagerError(
3062 'Cannot move files from {} to {} because destination '
3063 'contains: {}'.format(src_dir, dest_dir, files)
3064 )
3065 except LinstorVolumeManagerError:
3066 raise
3067 except Exception as e:
3068 raise LinstorVolumeManagerError(
3069 'Cannot list dir {}: {}'.format(dest_dir, e)
3070 )
3072 try:
3073 for file in listdir(src_dir):
3074 try:
3075 dest_file = os.path.join(dest_dir, file)
3076 if not force and os.path.exists(dest_file):
3077 raise LinstorVolumeManagerError(
3078 'Cannot move {} because it already exists in the '
3079 'destination'.format(file)
3080 )
3081 shutil.move(os.path.join(src_dir, file), dest_file)
3082 except LinstorVolumeManagerError:
3083 raise
3084 except Exception as e:
3085 raise LinstorVolumeManagerError(
3086 'Cannot move {}: {}'.format(file, e)
3087 )
3088 except Exception as e:
3089 if not force:
3090 try:
3091 cls._move_files(dest_dir, src_dir, force=True)
3092 except Exception:
3093 pass
3095 raise LinstorVolumeManagerError(
3096 'Failed to move files from {} to {}: {}'.format(
3097 src_dir, dest_dir, e
3098 )
3099 )
3101 @staticmethod
3102 def _create_database_backup_path():
3103 path = DATABASE_PATH + '-' + str(uuid.uuid4())
3104 try:
3105 os.mkdir(path)
3106 return path
3107 except Exception as e:
3108 raise LinstorVolumeManagerError(
3109 'Failed to create backup path {} of LINSTOR config: {}'
3110 .format(path, e)
3111 )
3113 @staticmethod
3114 def _get_filtered_properties(properties):
3115 return dict(properties.items())
3117 @staticmethod
3118 def _filter_errors(result):
3119 return [
3120 err for err in result
3121 if hasattr(err, 'is_error') and err.is_error()
3122 ]
3124 @staticmethod
3125 def _check_errors(result, codes):
3126 for err in result:
3127 for code in codes:
3128 if err.is_error(code):
3129 return True
3130 return False
3132 @classmethod
3133 def _controller_is_running(cls):
3134 return cls._service_is_running('linstor-controller')
3136 @classmethod
3137 def _start_controller(cls, start=True):
3138 return cls._start_service('linstor-controller', start)
3140 @staticmethod
3141 def _start_service(name, start=True):
3142 action = 'start' if start else 'stop'
3143 (ret, out, err) = util.doexec([
3144 'systemctl', action, name
3145 ])
3146 if ret != 0:
3147 raise LinstorVolumeManagerError(
3148 'Failed to {} {}: {} {}'
3149 .format(action, name, out, err)
3150 )
3152 @staticmethod
3153 def _service_is_running(name):
3154 (ret, out, err) = util.doexec([
3155 'systemctl', 'is-active', '--quiet', name
3156 ])
3157 return not ret
3159 @staticmethod
3160 def _is_mounted(mountpoint):
3161 (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint])
3162 return ret == 0
3164 @classmethod
3165 def _mount_volume(cls, volume_path, mountpoint, mount=True):
3166 if mount:
3167 try:
3168 util.pread(['mount', volume_path, mountpoint])
3169 except Exception as e:
3170 raise LinstorVolumeManagerError(
3171 'Failed to mount volume {} on {}: {}'
3172 .format(volume_path, mountpoint, e)
3173 )
3174 else:
3175 try:
3176 if cls._is_mounted(mountpoint):
3177 util.pread(['umount', mountpoint])
3178 except Exception as e:
3179 raise LinstorVolumeManagerError(
3180 'Failed to umount volume {} on {}: {}'
3181 .format(volume_path, mountpoint, e)
3182 )
3185# ==============================================================================
3187# Check if a path is a DRBD resource and log the process name/pid
3188# that opened it.
3189def log_drbd_openers(path):
3190 # Ignore if it's not a symlink to DRBD resource.
3191 if not path.startswith(DRBD_BY_RES_PATH):
3192 return
3194 # Compute resource name.
3195 res_name_end = path.find('/', len(DRBD_BY_RES_PATH))
3196 if res_name_end == -1:
3197 return
3198 res_name = path[len(DRBD_BY_RES_PATH):res_name_end]
3200 volume_end = path.rfind('/')
3201 if volume_end == res_name_end:
3202 return
3203 volume = path[volume_end + 1:]
3205 try:
3206 # Ensure path is a DRBD.
3207 drbd_path = os.path.realpath(path)
3208 stats = os.stat(drbd_path)
3209 if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147:
3210 return
3212 # Find where the device is open.
3213 (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name])
3214 if ret != 0:
3215 util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format(
3216 res_name, stderr
3217 ))
3218 return
3220 # Is it a local device?
3221 if stdout.startswith('{} role:Primary'.format(res_name)):
3222 util.SMlog(
3223 'DRBD resource `{}` is open on local host: {}'
3224 .format(path, get_local_volume_openers(res_name, volume))
3225 )
3226 return
3228 # Is it a remote device?
3229 util.SMlog(
3230 'DRBD resource `{}` is open on hosts: {}'
3231 .format(path, get_all_volume_openers(res_name, volume))
3232 )
3233 except Exception as e:
3234 util.SMlog(
3235 'Got exception while trying to determine where DRBD resource ' +
3236 '`{}` is open: {}'.format(path, e)
3237 )