Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python3 

2# 

3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr 

4# 

5# This program is free software: you can redistribute it and/or modify 

6# it under the terms of the GNU General Public License as published by 

7# the Free Software Foundation, either version 3 of the License, or 

8# (at your option) any later version. 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU General Public License for more details. 

13# 

14# You should have received a copy of the GNU General Public License 

15# along with this program. If not, see <https://www.gnu.org/licenses/>. 

16# 

17 

18from sm_typing import override 

19 

20import errno 

21import json 

22import linstor 

23import os.path 

24import re 

25import shutil 

26import socket 

27import stat 

28import time 

29import util 

30import uuid 

31 

32# Persistent prefix to add to RAW persistent volumes. 

33PERSISTENT_PREFIX = 'xcp-persistent-' 

34 

35# Contains the data of the "/var/lib/linstor" directory. 

36DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database' 

37DATABASE_SIZE = 1 << 30 # 1GB. 

38DATABASE_PATH = '/var/lib/linstor' 

39DATABASE_MKFS = 'mkfs.ext4' 

40 

41REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary") 

42REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$') 

43 

44DRBD_BY_RES_PATH = '/dev/drbd/by-res/' 

45 

46PLUGIN = 'linstor-manager' 

47 

48 

49# ============================================================================== 

50 

51def get_local_volume_openers(resource_name, volume): 

52 if not resource_name or volume is None: 

53 raise Exception('Cannot get DRBD openers without resource name and/or volume.') 

54 

55 path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format( 

56 resource_name, volume 

57 ) 

58 

59 with open(path, 'r') as openers: 

60 # Not a big cost, so read all lines directly. 

61 lines = openers.readlines() 

62 

63 result = {} 

64 

65 opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)') 

66 for line in lines: 

67 match = opener_re.match(line) 

68 assert match 

69 

70 groups = match.groups() 

71 process_name = groups[0] 

72 pid = groups[1] 

73 open_duration_ms = groups[2] 

74 result[pid] = { 

75 'process-name': process_name, 

76 'open-duration': open_duration_ms 

77 } 

78 

79 return json.dumps(result) 

80 

81def get_all_volume_openers(resource_name, volume): 

82 PLUGIN_CMD = 'getDrbdOpeners' 

83 

84 volume = str(volume) 

85 openers = {} 

86 

87 # Make sure this call never stucks because this function can be called 

88 # during HA init and in this case we can wait forever. 

89 session = util.timeout_call(10, util.get_localAPI_session) 

90 

91 hosts = session.xenapi.host.get_all_records() 

92 for host_ref, host_record in hosts.items(): 

93 node_name = host_record['hostname'] 

94 try: 

95 if not session.xenapi.host_metrics.get_record( 

96 host_record['metrics'] 

97 )['live']: 

98 # Ensure we call plugin on online hosts only. 

99 continue 

100 

101 openers[node_name] = json.loads( 

102 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, { 

103 'resourceName': resource_name, 

104 'volume': volume 

105 }) 

106 ) 

107 except Exception as e: 

108 util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format( 

109 resource_name, node_name, e 

110 )) 

111 

112 return openers 

113 

114 

115# ============================================================================== 

116 

117def round_up(value, divisor): 

118 assert divisor 

119 divisor = int(divisor) 

120 return ((int(value) + divisor - 1) // divisor) * divisor 

121 

122 

123def round_down(value, divisor): 

124 assert divisor 

125 value = int(value) 

126 return value - (value % int(divisor)) 

127 

128 

129# ============================================================================== 

130 

131def get_remote_host_ip(node_name): 

132 (ret, stdout, stderr) = util.doexec([ 

133 'drbdsetup', 'show', DATABASE_VOLUME_NAME, '--json' 

134 ]) 

135 if ret != 0: 

136 return 

137 

138 try: 

139 conf = json.loads(stdout) 

140 if not conf: 

141 return 

142 

143 for connection in conf[0]['connections']: 

144 if connection['net']['_name'] == node_name: 

145 value = connection['path']['_remote_host'] 

146 res = REG_DRBDSETUP_IP.match(value) 

147 if res: 

148 return res.groups()[0] 

149 break 

150 except Exception: 

151 pass 

152 

153 

154def _get_controller_uri(): 

155 PLUGIN_CMD = 'hasControllerRunning' 

156 

157 # Try to find controller using drbdadm. 

158 (ret, stdout, stderr) = util.doexec([ 

159 'drbdadm', 'status', DATABASE_VOLUME_NAME 

160 ]) 

161 if ret == 0: 

162 # If we are here, the database device exists locally. 

163 

164 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): 

165 # Nice case, we have the controller running on this local host. 

166 return 'linstor://localhost' 

167 

168 # Try to find the host using DRBD connections. 

169 res = REG_DRBDADM_PRIMARY.search(stdout) 

170 if res: 

171 node_name = res.groups()[0] 

172 ip = get_remote_host_ip(node_name) 

173 if ip: 

174 return 'linstor://' + ip 

175 

176 # Worst case: we use many hosts in the pool (>= 4), so we can't find the 

177 # primary using drbdadm because we don't have all connections to the 

178 # replicated volume. `drbdadm status xcp-persistent-database` returns 

179 # 3 connections by default. 

180 try: 

181 session = util.timeout_call(10, util.get_localAPI_session) 

182 

183 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

184 node_name = host_record['hostname'] 

185 try: 

186 if util.strtobool( 

187 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {}) 

188 ): 

189 return 'linstor://' + host_record['address'] 

190 except Exception as e: 

191 # Can throw and exception if a host is offline. So catch it. 

192 util.SMlog('Unable to search controller on `{}`: {}'.format( 

193 node_name, e 

194 )) 

195 except: 

196 # Not found, maybe we are trying to create the SR... 

197 pass 

198 

199def get_controller_uri(): 

200 retries = 0 

201 while True: 

202 uri = _get_controller_uri() 

203 if uri: 

204 return uri 

205 

206 retries += 1 

207 if retries >= 10: 

208 break 

209 time.sleep(1) 

210 

211 

212def get_controller_node_name(): 

213 PLUGIN_CMD = 'hasControllerRunning' 

214 

215 (ret, stdout, stderr) = util.doexec([ 

216 'drbdadm', 'status', DATABASE_VOLUME_NAME 

217 ]) 

218 

219 if ret == 0: 

220 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): 

221 return 'localhost' 

222 

223 res = REG_DRBDADM_PRIMARY.search(stdout) 

224 if res: 

225 return res.groups()[0] 

226 

227 session = util.timeout_call(5, util.get_localAPI_session) 

228 

229 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

230 node_name = host_record['hostname'] 

231 try: 

232 if not session.xenapi.host_metrics.get_record( 

233 host_record['metrics'] 

234 )['live']: 

235 continue 

236 

237 if util.strtobool(session.xenapi.host.call_plugin( 

238 host_ref, PLUGIN, PLUGIN_CMD, {} 

239 )): 

240 return node_name 

241 except Exception as e: 

242 util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format( 

243 node_name, e 

244 )) 

245 

246 

247def demote_drbd_resource(node_name, resource_name): 

248 PLUGIN_CMD = 'demoteDrbdResource' 

249 

250 session = util.timeout_call(5, util.get_localAPI_session) 

251 

252 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

253 if host_record['hostname'] != node_name: 

254 continue 

255 

256 try: 

257 session.xenapi.host.call_plugin( 

258 host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name} 

259 ) 

260 except Exception as e: 

261 util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format( 

262 resource_name, node_name, e 

263 )) 

264 raise Exception( 

265 'Can\'t demote resource `{}`, unable to find node `{}`' 

266 .format(resource_name, node_name) 

267 ) 

268 

269# ============================================================================== 

270 

271class LinstorVolumeManagerError(Exception): 

272 ERR_GENERIC = 0, 

273 ERR_VOLUME_EXISTS = 1, 

274 ERR_VOLUME_NOT_EXISTS = 2, 

275 ERR_VOLUME_DESTROY = 3, 

276 ERR_GROUP_NOT_EXISTS = 4 

277 

278 def __init__(self, message, code=ERR_GENERIC): 

279 super(LinstorVolumeManagerError, self).__init__(message) 

280 self._code = code 

281 

282 @property 

283 def code(self): 

284 return self._code 

285 

286 

287# ============================================================================== 

288 

289# Note: 

290# If a storage pool is not accessible after a network change: 

291# linstor node interface modify <NODE> default --ip <IP> 

292 

293 

294class LinstorVolumeManager(object): 

295 """ 

296 API to manager LINSTOR volumes in XCP-ng. 

297 A volume in this context is a physical part of the storage layer. 

298 """ 

299 

300 __slots__ = ( 

301 '_linstor', '_logger', '_redundancy', 

302 '_base_group_name', '_group_name', '_ha_group_name', 

303 '_volumes', '_storage_pools', '_storage_pools_time', 

304 '_kv_cache', '_resource_cache', '_volume_info_cache', 

305 '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty' 

306 ) 

307 

308 DEV_ROOT_PATH = DRBD_BY_RES_PATH 

309 

310 # Default sector size. 

311 BLOCK_SIZE = 512 

312 

313 # List of volume properties. 

314 PROP_METADATA = 'metadata' 

315 PROP_NOT_EXISTS = 'not-exists' 

316 PROP_VOLUME_NAME = 'volume-name' 

317 PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp' 

318 

319 # A volume can only be locked for a limited duration. 

320 # The goal is to give enough time to slaves to execute some actions on 

321 # a device before an UUID update or a coalesce for example. 

322 # Expiration is expressed in seconds. 

323 LOCKED_EXPIRATION_DELAY = 1 * 60 

324 

325 # Used when volume uuid is being updated. 

326 PROP_UPDATING_UUID_SRC = 'updating-uuid-src' 

327 

328 # States of property PROP_NOT_EXISTS. 

329 STATE_EXISTS = '0' 

330 STATE_NOT_EXISTS = '1' 

331 STATE_CREATING = '2' 

332 

333 # Property namespaces. 

334 NAMESPACE_SR = 'xcp/sr' 

335 NAMESPACE_VOLUME = 'xcp/volume' 

336 

337 # Regex to match properties. 

338 REG_PROP = '^([^/]+)/{}$' 

339 

340 REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA)) 

341 REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS)) 

342 REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME)) 

343 REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC)) 

344 

345 # Prefixes of SR/VOLUME in the LINSTOR DB. 

346 # A LINSTOR (resource, group, ...) name cannot start with a number. 

347 # So we add a prefix behind our SR/VOLUME uuids. 

348 PREFIX_SR = 'xcp-sr-' 

349 PREFIX_HA = 'xcp-ha-' 

350 PREFIX_VOLUME = 'xcp-volume-' 

351 

352 # Limit request number when storage pool info is asked, we fetch 

353 # the current pool status after N elapsed seconds. 

354 STORAGE_POOLS_FETCH_INTERVAL = 15 

355 

356 @staticmethod 

357 def default_logger(*args): 

358 print(args) 

359 

360 # -------------------------------------------------------------------------- 

361 # API. 

362 # -------------------------------------------------------------------------- 

363 

364 class VolumeInfo(object): 

365 __slots__ = ( 

366 'name', 

367 'allocated_size', # Allocated size, place count is not used. 

368 'virtual_size', # Total virtual available size of this volume 

369 # (i.e. the user size at creation). 

370 'diskful' # Array of nodes that have a diskful volume. 

371 ) 

372 

373 def __init__(self, name): 

374 self.name = name 

375 self.allocated_size = 0 

376 self.virtual_size = 0 

377 self.diskful = [] 

378 

379 @override 

380 def __repr__(self) -> str: 

381 return 'VolumeInfo("{}", {}, {}, {})'.format( 

382 self.name, self.allocated_size, self.virtual_size, 

383 self.diskful 

384 ) 

385 

386 # -------------------------------------------------------------------------- 

387 

388 def __init__( 

389 self, uri, group_name, repair=False, logger=default_logger.__func__, 

390 attempt_count=30 

391 ): 

392 """ 

393 Create a new LinstorVolumeManager object. 

394 :param str uri: URI to communicate with the LINSTOR controller. 

395 :param str group_name: The SR goup name to use. 

396 :param bool repair: If true we try to remove bad volumes due to a crash 

397 or unexpected behavior. 

398 :param function logger: Function to log messages. 

399 :param int attempt_count: Number of attempts to join the controller. 

400 """ 

401 

402 self._linstor = self._create_linstor_instance( 

403 uri, attempt_count=attempt_count 

404 ) 

405 self._base_group_name = group_name 

406 

407 # Ensure group exists. 

408 group_name = self._build_group_name(group_name) 

409 groups = self._linstor.resource_group_list_raise([group_name]).resource_groups 

410 if not groups: 

411 raise LinstorVolumeManagerError( 

412 'Unable to find `{}` Linstor SR'.format(group_name) 

413 ) 

414 

415 # Ok. ;) 

416 self._logger = logger 

417 self._redundancy = groups[0].select_filter.place_count 

418 self._group_name = group_name 

419 self._ha_group_name = self._build_ha_group_name(self._base_group_name) 

420 self._volumes = set() 

421 self._storage_pools_time = 0 

422 

423 # To increate performance and limit request count to LINSTOR services, 

424 # we use caches. 

425 self._kv_cache = self._create_kv_cache() 

426 self._resource_cache = None 

427 self._resource_cache_dirty = True 

428 self._volume_info_cache = None 

429 self._volume_info_cache_dirty = True 

430 self._build_volumes(repair=repair) 

431 

432 @property 

433 def group_name(self): 

434 """ 

435 Give the used group name. 

436 :return: The group name. 

437 :rtype: str 

438 """ 

439 return self._base_group_name 

440 

441 @property 

442 def redundancy(self): 

443 """ 

444 Give the used redundancy. 

445 :return: The redundancy. 

446 :rtype: int 

447 """ 

448 return self._redundancy 

449 

450 @property 

451 def volumes(self): 

452 """ 

453 Give the volumes uuid set. 

454 :return: The volumes uuid set. 

455 :rtype: set(str) 

456 """ 

457 return self._volumes 

458 

459 @property 

460 def max_volume_size_allowed(self): 

461 """ 

462 Give the max volume size currently available in B. 

463 :return: The current size. 

464 :rtype: int 

465 """ 

466 

467 candidates = self._find_best_size_candidates() 

468 if not candidates: 

469 raise LinstorVolumeManagerError( 

470 'Failed to get max volume size allowed' 

471 ) 

472 

473 size = candidates[0].max_volume_size 

474 if size < 0: 

475 raise LinstorVolumeManagerError( 

476 'Invalid max volume size allowed given: {}'.format(size) 

477 ) 

478 return self.round_down_volume_size(size * 1024) 

479 

480 @property 

481 def physical_size(self): 

482 """ 

483 Give the total physical size of the SR. 

484 :return: The physical size. 

485 :rtype: int 

486 """ 

487 return self._compute_size('total_capacity') 

488 

489 @property 

490 def physical_free_size(self): 

491 """ 

492 Give the total free physical size of the SR. 

493 :return: The physical free size. 

494 :rtype: int 

495 """ 

496 return self._compute_size('free_capacity') 

497 

498 @property 

499 def allocated_volume_size(self): 

500 """ 

501 Give the allocated size for all volumes. The place count is not 

502 used here. When thick lvm is used, the size for one volume should 

503 be equal to the virtual volume size. With thin lvm, the size is equal 

504 or lower to the volume size. 

505 :return: The allocated size of all volumes. 

506 :rtype: int 

507 """ 

508 

509 # Paths: /res_name/vol_number/size 

510 sizes = {} 

511 

512 for resource in self._get_resource_cache().resources: 

513 if resource.name not in sizes: 

514 current = sizes[resource.name] = {} 

515 else: 

516 current = sizes[resource.name] 

517 

518 for volume in resource.volumes: 

519 # We ignore diskless pools of the form "DfltDisklessStorPool". 

520 if volume.storage_pool_name != self._group_name: 

521 continue 

522 

523 allocated_size = max(volume.allocated_size, 0) 

524 current_allocated_size = current.get(volume.number) or -1 

525 if allocated_size > current_allocated_size: 

526 current[volume.number] = allocated_size 

527 

528 total_size = 0 

529 for volumes in sizes.values(): 

530 for size in volumes.values(): 

531 total_size += size 

532 

533 return total_size * 1024 

534 

535 def get_min_physical_size(self): 

536 """ 

537 Give the minimum physical size of the SR. 

538 I.e. the size of the smallest disk + the number of pools. 

539 :return: The physical min size. 

540 :rtype: tuple(int, int) 

541 """ 

542 size = None 

543 pool_count = 0 

544 for pool in self._get_storage_pools(force=True): 

545 space = pool.free_space 

546 if space: 

547 pool_count += 1 

548 current_size = space.total_capacity 

549 if current_size < 0: 

550 raise LinstorVolumeManagerError( 

551 'Failed to get pool total_capacity attr of `{}`' 

552 .format(pool.node_name) 

553 ) 

554 if size is None or current_size < size: 

555 size = current_size 

556 return (pool_count, (size or 0) * 1024) 

557 

558 @property 

559 def metadata(self): 

560 """ 

561 Get the metadata of the SR. 

562 :return: Dictionary that contains metadata. 

563 :rtype: dict(str, dict) 

564 """ 

565 

566 sr_properties = self._get_sr_properties() 

567 metadata = sr_properties.get(self.PROP_METADATA) 

568 if metadata is not None: 

569 metadata = json.loads(metadata) 

570 if isinstance(metadata, dict): 

571 return metadata 

572 raise LinstorVolumeManagerError( 

573 'Expected dictionary in SR metadata: {}'.format( 

574 self._group_name 

575 ) 

576 ) 

577 

578 return {} 

579 

580 @metadata.setter 

581 def metadata(self, metadata): 

582 """ 

583 Set the metadata of the SR. 

584 :param dict metadata: Dictionary that contains metadata. 

585 """ 

586 

587 assert isinstance(metadata, dict) 

588 sr_properties = self._get_sr_properties() 

589 sr_properties[self.PROP_METADATA] = json.dumps(metadata) 

590 

591 @property 

592 def disconnected_hosts(self): 

593 """ 

594 Get the list of disconnected hosts. 

595 :return: Set that contains disconnected hosts. 

596 :rtype: set(str) 

597 """ 

598 

599 disconnected_hosts = set() 

600 for pool in self._get_storage_pools(): 

601 for report in pool.reports: 

602 if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \ 

603 linstor.consts.WARN_NOT_CONNECTED: 

604 disconnected_hosts.add(pool.node_name) 

605 break 

606 return disconnected_hosts 

607 

608 def check_volume_exists(self, volume_uuid): 

609 """ 

610 Check if a volume exists in the SR. 

611 :return: True if volume exists. 

612 :rtype: bool 

613 """ 

614 return volume_uuid in self._volumes 

615 

616 def create_volume( 

617 self, 

618 volume_uuid, 

619 size, 

620 persistent=True, 

621 volume_name=None, 

622 high_availability=False 

623 ): 

624 """ 

625 Create a new volume on the SR. 

626 :param str volume_uuid: The volume uuid to use. 

627 :param int size: volume size in B. 

628 :param bool persistent: If false the volume will be unavailable 

629 on the next constructor call LinstorSR(...). 

630 :param str volume_name: If set, this name is used in the LINSTOR 

631 database instead of a generated name. 

632 :param bool high_availability: If set, the volume is created in 

633 the HA group. 

634 :return: The current device path of the volume. 

635 :rtype: str 

636 """ 

637 

638 self._logger('Creating LINSTOR volume {}...'.format(volume_uuid)) 

639 if not volume_name: 

640 volume_name = self.build_volume_name(util.gen_uuid()) 

641 volume_properties = self._create_volume_with_properties( 

642 volume_uuid, 

643 volume_name, 

644 size, 

645 True, # place_resources 

646 high_availability 

647 ) 

648 

649 # Volume created! Now try to find the device path. 

650 try: 

651 self._logger( 

652 'Find device path of LINSTOR volume {}...'.format(volume_uuid) 

653 ) 

654 device_path = self._find_device_path(volume_uuid, volume_name) 

655 if persistent: 

656 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

657 self._volumes.add(volume_uuid) 

658 self._logger( 

659 'LINSTOR volume {} created!'.format(volume_uuid) 

660 ) 

661 return device_path 

662 except Exception: 

663 # There is an issue to find the path. 

664 # At this point the volume has just been created, so force flag can be used. 

665 self._destroy_volume(volume_uuid, force=True) 

666 raise 

667 

668 def mark_volume_as_persistent(self, volume_uuid): 

669 """ 

670 Mark volume as persistent if created with persistent=False. 

671 :param str volume_uuid: The volume uuid to mark. 

672 """ 

673 

674 self._ensure_volume_exists(volume_uuid) 

675 

676 # Mark volume as persistent. 

677 volume_properties = self._get_volume_properties(volume_uuid) 

678 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

679 

680 def destroy_volume(self, volume_uuid): 

681 """ 

682 Destroy a volume. 

683 :param str volume_uuid: The volume uuid to destroy. 

684 """ 

685 

686 self._ensure_volume_exists(volume_uuid) 

687 self.ensure_volume_is_not_locked(volume_uuid) 

688 

689 # Mark volume as destroyed. 

690 volume_properties = self._get_volume_properties(volume_uuid) 

691 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS 

692 

693 try: 

694 self._volumes.remove(volume_uuid) 

695 self._destroy_volume(volume_uuid) 

696 except Exception as e: 

697 raise LinstorVolumeManagerError( 

698 str(e), 

699 LinstorVolumeManagerError.ERR_VOLUME_DESTROY 

700 ) 

701 

702 def lock_volume(self, volume_uuid, locked=True): 

703 """ 

704 Prevent modifications of the volume properties during 

705 "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked 

706 when used. This method is useful to attach/detach correctly a volume on 

707 a slave. Without it the GC can rename a volume, in this case the old 

708 volume path can be used by a slave... 

709 :param str volume_uuid: The volume uuid to protect/unprotect. 

710 :param bool locked: Lock/unlock the volume. 

711 """ 

712 

713 self._ensure_volume_exists(volume_uuid) 

714 

715 self._logger( 

716 '{} volume {} as locked'.format( 

717 'Mark' if locked else 'Unmark', 

718 volume_uuid 

719 ) 

720 ) 

721 

722 volume_properties = self._get_volume_properties(volume_uuid) 

723 if locked: 

724 volume_properties[ 

725 self.PROP_IS_READONLY_TIMESTAMP 

726 ] = str(time.time()) 

727 elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties: 

728 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) 

729 

730 def ensure_volume_is_not_locked(self, volume_uuid, timeout=None): 

731 """ 

732 Ensure a volume is not locked. Wait if necessary. 

733 :param str volume_uuid: The volume uuid to check. 

734 :param int timeout: If the volume is always locked after the expiration 

735 of the timeout, an exception is thrown. 

736 """ 

737 return self.ensure_volume_list_is_not_locked([volume_uuid], timeout) 

738 

739 def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None): 

740 checked = set() 

741 for volume_uuid in volume_uuids: 

742 if volume_uuid in self._volumes: 

743 checked.add(volume_uuid) 

744 

745 if not checked: 

746 return 

747 

748 waiting = False 

749 

750 volume_properties = self._get_kv_cache() 

751 

752 start = time.time() 

753 while True: 

754 # Can't delete in for loop, use a copy of the list. 

755 remaining = checked.copy() 

756 for volume_uuid in checked: 

757 volume_properties.namespace = \ 

758 self._build_volume_namespace(volume_uuid) 

759 timestamp = volume_properties.get( 

760 self.PROP_IS_READONLY_TIMESTAMP 

761 ) 

762 if timestamp is None: 

763 remaining.remove(volume_uuid) 

764 continue 

765 

766 now = time.time() 

767 if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY: 

768 self._logger( 

769 'Remove readonly timestamp on {}'.format(volume_uuid) 

770 ) 

771 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) 

772 remaining.remove(volume_uuid) 

773 continue 

774 

775 if not waiting: 

776 self._logger( 

777 'Volume {} is locked, waiting...'.format(volume_uuid) 

778 ) 

779 waiting = True 

780 break 

781 

782 if not remaining: 

783 break 

784 checked = remaining 

785 

786 if timeout is not None and now - start > timeout: 

787 raise LinstorVolumeManagerError( 

788 'volume `{}` is locked and timeout has been reached' 

789 .format(volume_uuid), 

790 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS 

791 ) 

792 

793 # We must wait to use the volume. After that we can modify it 

794 # ONLY if the SR is locked to avoid bad reads on the slaves. 

795 time.sleep(1) 

796 volume_properties = self._create_kv_cache() 

797 

798 if waiting: 

799 self._logger('No volume locked now!') 

800 

801 def remove_volume_if_diskless(self, volume_uuid): 

802 """ 

803 Remove disless path from local node. 

804 :param str volume_uuid: The volume uuid to remove. 

805 """ 

806 

807 self._ensure_volume_exists(volume_uuid) 

808 

809 volume_properties = self._get_volume_properties(volume_uuid) 

810 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

811 

812 node_name = socket.gethostname() 

813 

814 for resource in self._get_resource_cache().resources: 

815 if resource.name == volume_name and resource.node_name == node_name: 

816 if linstor.consts.FLAG_TIE_BREAKER in resource.flags: 

817 return 

818 break 

819 

820 result = self._linstor.resource_delete_if_diskless( 

821 node_name=node_name, rsc_name=volume_name 

822 ) 

823 if not linstor.Linstor.all_api_responses_no_error(result): 

824 raise LinstorVolumeManagerError( 

825 'Unable to delete diskless path of `{}` on node `{}`: {}' 

826 .format(volume_name, node_name, ', '.join( 

827 [str(x) for x in result])) 

828 ) 

829 

830 def introduce_volume(self, volume_uuid): 

831 pass # TODO: Implement me. 

832 

833 def resize_volume(self, volume_uuid, new_size): 

834 """ 

835 Resize a volume. 

836 :param str volume_uuid: The volume uuid to resize. 

837 :param int new_size: New size in B. 

838 """ 

839 

840 volume_name = self.get_volume_name(volume_uuid) 

841 self.ensure_volume_is_not_locked(volume_uuid) 

842 new_size = self.round_up_volume_size(new_size) // 1024 

843 

844 retry_count = 30 

845 while True: 

846 result = self._linstor.volume_dfn_modify( 

847 rsc_name=volume_name, 

848 volume_nr=0, 

849 size=new_size 

850 ) 

851 

852 self._mark_resource_cache_as_dirty() 

853 

854 error_str = self._get_error_str(result) 

855 if not error_str: 

856 break 

857 

858 # After volume creation, DRBD volume can be unusable during many seconds. 

859 # So we must retry the definition change if the device is not up to date. 

860 # Often the case for thick provisioning. 

861 if retry_count and error_str.find('non-UpToDate DRBD device') >= 0: 

862 time.sleep(2) 

863 retry_count -= 1 

864 continue 

865 

866 raise LinstorVolumeManagerError( 

867 'Could not resize volume `{}` from SR `{}`: {}' 

868 .format(volume_uuid, self._group_name, error_str) 

869 ) 

870 

871 def get_volume_name(self, volume_uuid): 

872 """ 

873 Get the name of a particular volume. 

874 :param str volume_uuid: The volume uuid of the name to get. 

875 :return: The volume name. 

876 :rtype: str 

877 """ 

878 

879 self._ensure_volume_exists(volume_uuid) 

880 volume_properties = self._get_volume_properties(volume_uuid) 

881 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

882 if volume_name: 

883 return volume_name 

884 raise LinstorVolumeManagerError( 

885 'Failed to get volume name of {}'.format(volume_uuid) 

886 ) 

887 

888 def get_volume_size(self, volume_uuid): 

889 """ 

890 Get the size of a particular volume. 

891 :param str volume_uuid: The volume uuid of the size to get. 

892 :return: The volume size. 

893 :rtype: int 

894 """ 

895 

896 volume_name = self.get_volume_name(volume_uuid) 

897 dfns = self._linstor.resource_dfn_list_raise( 

898 query_volume_definitions=True, 

899 filter_by_resource_definitions=[volume_name] 

900 ).resource_definitions 

901 

902 size = dfns[0].volume_definitions[0].size 

903 if size < 0: 

904 raise LinstorVolumeManagerError( 

905 'Failed to get volume size of: {}'.format(volume_uuid) 

906 ) 

907 return size * 1024 

908 

909 def set_auto_promote_timeout(self, volume_uuid, timeout): 

910 """ 

911 Define the blocking time of open calls when a DRBD 

912 is already open on another host. 

913 :param str volume_uuid: The volume uuid to modify. 

914 """ 

915 

916 volume_name = self.get_volume_name(volume_uuid) 

917 result = self._linstor.resource_dfn_modify(volume_name, { 

918 'DrbdOptions/Resource/auto-promote-timeout': timeout 

919 }) 

920 error_str = self._get_error_str(result) 

921 if error_str: 

922 raise LinstorVolumeManagerError( 

923 'Could not change the auto promote timeout of `{}`: {}' 

924 .format(volume_uuid, error_str) 

925 ) 

926 

927 def set_drbd_ha_properties(self, volume_name, enabled=True): 

928 """ 

929 Set or not HA DRBD properties required by drbd-reactor and 

930 by specific volumes. 

931 :param str volume_name: The volume to modify. 

932 :param bool enabled: Enable or disable HA properties. 

933 """ 

934 

935 properties = { 

936 'DrbdOptions/auto-quorum': 'disabled', 

937 'DrbdOptions/Resource/auto-promote': 'no', 

938 'DrbdOptions/Resource/on-no-data-accessible': 'io-error', 

939 'DrbdOptions/Resource/on-no-quorum': 'io-error', 

940 'DrbdOptions/Resource/on-suspended-primary-outdated': 'force-secondary', 

941 'DrbdOptions/Resource/quorum': 'majority' 

942 } 

943 if enabled: 

944 result = self._linstor.resource_dfn_modify(volume_name, properties) 

945 else: 

946 result = self._linstor.resource_dfn_modify(volume_name, {}, delete_props=list(properties.keys())) 

947 

948 error_str = self._get_error_str(result) 

949 if error_str: 

950 raise LinstorVolumeManagerError( 

951 'Could not modify HA DRBD properties on volume `{}`: {}' 

952 .format(volume_name, error_str) 

953 ) 

954 

955 def get_volume_info(self, volume_uuid): 

956 """ 

957 Get the volume info of a particular volume. 

958 :param str volume_uuid: The volume uuid of the volume info to get. 

959 :return: The volume info. 

960 :rtype: VolumeInfo 

961 """ 

962 

963 volume_name = self.get_volume_name(volume_uuid) 

964 return self._get_volumes_info()[volume_name] 

965 

966 def get_device_path(self, volume_uuid): 

967 """ 

968 Get the dev path of a volume, create a diskless if necessary. 

969 :param str volume_uuid: The volume uuid to get the dev path. 

970 :return: The current device path of the volume. 

971 :rtype: str 

972 """ 

973 

974 volume_name = self.get_volume_name(volume_uuid) 

975 return self._find_device_path(volume_uuid, volume_name) 

976 

977 def get_volume_uuid_from_device_path(self, device_path): 

978 """ 

979 Get the volume uuid of a device_path. 

980 :param str device_path: The dev path to find the volume uuid. 

981 :return: The volume uuid of the local device path. 

982 :rtype: str 

983 """ 

984 

985 expected_volume_name = \ 

986 self.get_volume_name_from_device_path(device_path) 

987 

988 volume_names = self.get_volumes_with_name() 

989 for volume_uuid, volume_name in volume_names.items(): 

990 if volume_name == expected_volume_name: 

991 return volume_uuid 

992 

993 raise LinstorVolumeManagerError( 

994 'Unable to find volume uuid from dev path `{}`'.format(device_path) 

995 ) 

996 

997 def get_volume_name_from_device_path(self, device_path): 

998 """ 

999 Get the volume name of a device_path. 

1000 :param str device_path: The dev path to find the volume name. 

1001 :return: The volume name of the device path. 

1002 :rtype: str 

1003 """ 

1004 

1005 # Assume that we have a path like this: 

1006 # - "/dev/drbd/by-res/xcp-volume-<UUID>/0" 

1007 # - "../xcp-volume-<UUID>/0" 

1008 if device_path.startswith(DRBD_BY_RES_PATH): 

1009 prefix_len = len(DRBD_BY_RES_PATH) 

1010 else: 

1011 assert device_path.startswith('../') 

1012 prefix_len = 3 

1013 

1014 res_name_end = device_path.find('/', prefix_len) 

1015 assert res_name_end != -1 

1016 return device_path[prefix_len:res_name_end] 

1017 

1018 def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False): 

1019 """ 

1020 Change the uuid of a volume. 

1021 :param str volume_uuid: The volume to modify. 

1022 :param str new_volume_uuid: The new volume uuid to use. 

1023 :param bool force: If true we doesn't check if volume_uuid is in the 

1024 volume list. I.e. the volume can be marked as deleted but the volume 

1025 can still be in the LINSTOR KV store if the deletion has failed. 

1026 In specific cases like "undo" after a failed clone we must rename a bad 

1027 deleted VDI. 

1028 """ 

1029 

1030 self._logger( 

1031 'Trying to update volume UUID {} to {}...' 

1032 .format(volume_uuid, new_volume_uuid) 

1033 ) 

1034 assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value' 

1035 

1036 if not force: 

1037 self._ensure_volume_exists(volume_uuid) 

1038 self.ensure_volume_is_not_locked(volume_uuid) 

1039 

1040 if new_volume_uuid in self._volumes: 

1041 raise LinstorVolumeManagerError( 

1042 'Volume `{}` already exists'.format(new_volume_uuid), 

1043 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

1044 ) 

1045 

1046 volume_properties = self._get_volume_properties(volume_uuid) 

1047 if volume_properties.get(self.PROP_UPDATING_UUID_SRC): 

1048 raise LinstorVolumeManagerError( 

1049 'Cannot update volume uuid {}: invalid state' 

1050 .format(volume_uuid) 

1051 ) 

1052 

1053 # 1. Copy in temp variables metadata and volume_name. 

1054 metadata = volume_properties.get(self.PROP_METADATA) 

1055 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

1056 

1057 # 2. Switch to new volume namespace. 

1058 volume_properties.namespace = self._build_volume_namespace( 

1059 new_volume_uuid 

1060 ) 

1061 

1062 if list(volume_properties.items()): 

1063 raise LinstorVolumeManagerError( 

1064 'Cannot update volume uuid {} to {}: ' 

1065 .format(volume_uuid, new_volume_uuid) + 

1066 'this last one is not empty' 

1067 ) 

1068 

1069 try: 

1070 # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC. 

1071 # If we crash after that, the new properties can be removed 

1072 # properly. 

1073 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS 

1074 volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid 

1075 

1076 # 4. Copy the properties. 

1077 # Note: On new volumes, during clone for example, the metadata 

1078 # may be missing. So we must test it to avoid this error: 

1079 # "None has to be a str/unicode, but is <type 'NoneType'>" 

1080 if metadata: 

1081 volume_properties[self.PROP_METADATA] = metadata 

1082 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

1083 

1084 # 5. Ok! 

1085 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

1086 except Exception as err: 

1087 try: 

1088 # Clear the new volume properties in case of failure. 

1089 assert volume_properties.namespace == \ 

1090 self._build_volume_namespace(new_volume_uuid) 

1091 volume_properties.clear() 

1092 except Exception as e: 

1093 self._logger( 

1094 'Failed to clear new volume properties: {} (ignoring...)' 

1095 .format(e) 

1096 ) 

1097 raise LinstorVolumeManagerError( 

1098 'Failed to copy volume properties: {}'.format(err) 

1099 ) 

1100 

1101 try: 

1102 # 6. After this point, it's ok we can remove the 

1103 # PROP_UPDATING_UUID_SRC property and clear the src properties 

1104 # without problems. 

1105 

1106 # 7. Switch to old volume namespace. 

1107 volume_properties.namespace = self._build_volume_namespace( 

1108 volume_uuid 

1109 ) 

1110 volume_properties.clear() 

1111 

1112 # 8. Switch a last time to new volume namespace. 

1113 volume_properties.namespace = self._build_volume_namespace( 

1114 new_volume_uuid 

1115 ) 

1116 volume_properties.pop(self.PROP_UPDATING_UUID_SRC) 

1117 except Exception as e: 

1118 raise LinstorVolumeManagerError( 

1119 'Failed to clear volume properties ' 

1120 'after volume uuid update: {}'.format(e) 

1121 ) 

1122 

1123 self._volumes.remove(volume_uuid) 

1124 self._volumes.add(new_volume_uuid) 

1125 

1126 self._logger( 

1127 'UUID update succeeded of {} to {}! (properties={})' 

1128 .format( 

1129 volume_uuid, new_volume_uuid, 

1130 self._get_filtered_properties(volume_properties) 

1131 ) 

1132 ) 

1133 

1134 def update_volume_name(self, volume_uuid, volume_name): 

1135 """ 

1136 Change the volume name of a volume. 

1137 :param str volume_uuid: The volume to modify. 

1138 :param str volume_name: The volume_name to use. 

1139 """ 

1140 

1141 self._ensure_volume_exists(volume_uuid) 

1142 self.ensure_volume_is_not_locked(volume_uuid) 

1143 if not volume_name.startswith(self.PREFIX_VOLUME): 

1144 raise LinstorVolumeManagerError( 

1145 'Volume name `{}` must be start with `{}`' 

1146 .format(volume_name, self.PREFIX_VOLUME) 

1147 ) 

1148 

1149 if volume_name not in self._fetch_resource_names(): 

1150 raise LinstorVolumeManagerError( 

1151 'Volume `{}` doesn\'t exist'.format(volume_name) 

1152 ) 

1153 

1154 volume_properties = self._get_volume_properties(volume_uuid) 

1155 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

1156 

1157 def get_usage_states(self, volume_uuid): 

1158 """ 

1159 Check if a volume is currently used. 

1160 :param str volume_uuid: The volume uuid to check. 

1161 :return: A dictionnary that contains states. 

1162 :rtype: dict(str, bool or None) 

1163 """ 

1164 

1165 states = {} 

1166 

1167 volume_name = self.get_volume_name(volume_uuid) 

1168 for resource_state in self._linstor.resource_list_raise( 

1169 filter_by_resources=[volume_name] 

1170 ).resource_states: 

1171 states[resource_state.node_name] = resource_state.in_use 

1172 

1173 return states 

1174 

1175 def get_volume_openers(self, volume_uuid): 

1176 """ 

1177 Get openers of a volume. 

1178 :param str volume_uuid: The volume uuid to monitor. 

1179 :return: A dictionnary that contains openers. 

1180 :rtype: dict(str, obj) 

1181 """ 

1182 return get_all_volume_openers(self.get_volume_name(volume_uuid), '0') 

1183 

1184 def get_volumes_with_name(self): 

1185 """ 

1186 Give a volume dictionnary that contains names actually owned. 

1187 :return: A volume/name dict. 

1188 :rtype: dict(str, str) 

1189 """ 

1190 return self._get_volumes_by_property(self.REG_VOLUME_NAME) 

1191 

1192 def get_volumes_with_info(self): 

1193 """ 

1194 Give a volume dictionnary that contains VolumeInfos. 

1195 :return: A volume/VolumeInfo dict. 

1196 :rtype: dict(str, VolumeInfo) 

1197 """ 

1198 

1199 volumes = {} 

1200 

1201 all_volume_info = self._get_volumes_info() 

1202 volume_names = self.get_volumes_with_name() 

1203 for volume_uuid, volume_name in volume_names.items(): 

1204 if volume_name: 

1205 volume_info = all_volume_info.get(volume_name) 

1206 if volume_info: 

1207 volumes[volume_uuid] = volume_info 

1208 continue 

1209 

1210 # Well I suppose if this volume is not available, 

1211 # LINSTOR has been used directly without using this API. 

1212 volumes[volume_uuid] = self.VolumeInfo('') 

1213 

1214 return volumes 

1215 

1216 def get_volumes_with_metadata(self): 

1217 """ 

1218 Give a volume dictionnary that contains metadata. 

1219 :return: A volume/metadata dict. 

1220 :rtype: dict(str, dict) 

1221 """ 

1222 

1223 volumes = {} 

1224 

1225 metadata = self._get_volumes_by_property(self.REG_METADATA) 

1226 for volume_uuid, volume_metadata in metadata.items(): 

1227 if volume_metadata: 

1228 volume_metadata = json.loads(volume_metadata) 

1229 if isinstance(volume_metadata, dict): 

1230 volumes[volume_uuid] = volume_metadata 

1231 continue 

1232 raise LinstorVolumeManagerError( 

1233 'Expected dictionary in volume metadata: {}' 

1234 .format(volume_uuid) 

1235 ) 

1236 

1237 volumes[volume_uuid] = {} 

1238 

1239 return volumes 

1240 

1241 def get_volume_metadata(self, volume_uuid): 

1242 """ 

1243 Get the metadata of a volume. 

1244 :return: Dictionary that contains metadata. 

1245 :rtype: dict 

1246 """ 

1247 

1248 self._ensure_volume_exists(volume_uuid) 

1249 volume_properties = self._get_volume_properties(volume_uuid) 

1250 metadata = volume_properties.get(self.PROP_METADATA) 

1251 if metadata: 

1252 metadata = json.loads(metadata) 

1253 if isinstance(metadata, dict): 

1254 return metadata 

1255 raise LinstorVolumeManagerError( 

1256 'Expected dictionary in volume metadata: {}' 

1257 .format(volume_uuid) 

1258 ) 

1259 return {} 

1260 

1261 def set_volume_metadata(self, volume_uuid, metadata): 

1262 """ 

1263 Set the metadata of a volume. 

1264 :param dict metadata: Dictionary that contains metadata. 

1265 """ 

1266 

1267 self._ensure_volume_exists(volume_uuid) 

1268 self.ensure_volume_is_not_locked(volume_uuid) 

1269 

1270 assert isinstance(metadata, dict) 

1271 volume_properties = self._get_volume_properties(volume_uuid) 

1272 volume_properties[self.PROP_METADATA] = json.dumps(metadata) 

1273 

1274 def update_volume_metadata(self, volume_uuid, metadata): 

1275 """ 

1276 Update the metadata of a volume. It modify only the given keys. 

1277 It doesn't remove unreferenced key instead of set_volume_metadata. 

1278 :param dict metadata: Dictionary that contains metadata. 

1279 """ 

1280 

1281 self._ensure_volume_exists(volume_uuid) 

1282 self.ensure_volume_is_not_locked(volume_uuid) 

1283 

1284 assert isinstance(metadata, dict) 

1285 volume_properties = self._get_volume_properties(volume_uuid) 

1286 

1287 current_metadata = json.loads( 

1288 volume_properties.get(self.PROP_METADATA, '{}') 

1289 ) 

1290 if not isinstance(metadata, dict): 

1291 raise LinstorVolumeManagerError( 

1292 'Expected dictionary in volume metadata: {}' 

1293 .format(volume_uuid) 

1294 ) 

1295 

1296 for key, value in metadata.items(): 

1297 current_metadata[key] = value 

1298 volume_properties[self.PROP_METADATA] = json.dumps(current_metadata) 

1299 

1300 def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True): 

1301 """ 

1302 Clone a volume. Do not copy the data, this method creates a new volume 

1303 with the same size. 

1304 :param str volume_uuid: The volume to clone. 

1305 :param str clone_uuid: The cloned volume. 

1306 :param bool persistent: If false the volume will be unavailable 

1307 on the next constructor call LinstorSR(...). 

1308 :return: The current device path of the cloned volume. 

1309 :rtype: str 

1310 """ 

1311 

1312 volume_name = self.get_volume_name(volume_uuid) 

1313 self.ensure_volume_is_not_locked(volume_uuid) 

1314 

1315 # 1. Find ideal nodes + size to use. 

1316 ideal_node_names, size = self._get_volume_node_names_and_size( 

1317 volume_name 

1318 ) 

1319 if size <= 0: 

1320 raise LinstorVolumeManagerError( 

1321 'Invalid size of {} for volume `{}`'.format(size, volume_name) 

1322 ) 

1323 

1324 # 2. Create clone! 

1325 return self.create_volume(clone_uuid, size, persistent) 

1326 

1327 def remove_resourceless_volumes(self): 

1328 """ 

1329 Remove all volumes without valid or non-empty name 

1330 (i.e. without LINSTOR resource). It's different than 

1331 LinstorVolumeManager constructor that takes a `repair` param that 

1332 removes volumes with `PROP_NOT_EXISTS` to 1. 

1333 """ 

1334 

1335 resource_names = self._fetch_resource_names() 

1336 for volume_uuid, volume_name in self.get_volumes_with_name().items(): 

1337 if not volume_name or volume_name not in resource_names: 

1338 # Don't force, we can be sure of what's happening. 

1339 self.destroy_volume(volume_uuid) 

1340 

1341 def destroy(self): 

1342 """ 

1343 Destroy this SR. Object should not be used after that. 

1344 :param bool force: Try to destroy volumes before if true. 

1345 """ 

1346 

1347 # 1. Ensure volume list is empty. No cost. 

1348 if self._volumes: 

1349 raise LinstorVolumeManagerError( 

1350 'Cannot destroy LINSTOR volume manager: ' 

1351 'It exists remaining volumes' 

1352 ) 

1353 

1354 # 2. Fetch ALL resource names. 

1355 # This list may therefore contain volumes created outside 

1356 # the scope of the driver. 

1357 resource_names = self._fetch_resource_names(ignore_deleted=False) 

1358 try: 

1359 resource_names.remove(DATABASE_VOLUME_NAME) 

1360 except KeyError: 

1361 # Really strange to reach that point. 

1362 # Normally we always have the database volume in the list. 

1363 pass 

1364 

1365 # 3. Ensure the resource name list is entirely empty... 

1366 if resource_names: 

1367 raise LinstorVolumeManagerError( 

1368 'Cannot destroy LINSTOR volume manager: ' 

1369 'It exists remaining volumes (created externally or being deleted)' 

1370 ) 

1371 

1372 # 4. Destroying... 

1373 controller_is_running = self._controller_is_running() 

1374 uri = 'linstor://localhost' 

1375 try: 

1376 if controller_is_running: 

1377 self._start_controller(start=False) 

1378 

1379 # 4.1. Umount LINSTOR database. 

1380 self._mount_database_volume( 

1381 self.build_device_path(DATABASE_VOLUME_NAME), 

1382 mount=False, 

1383 force=True 

1384 ) 

1385 

1386 # 4.2. Refresh instance. 

1387 self._start_controller(start=True) 

1388 self._linstor = self._create_linstor_instance( 

1389 uri, keep_uri_unmodified=True 

1390 ) 

1391 

1392 # 4.3. Destroy database volume. 

1393 self._destroy_resource(DATABASE_VOLUME_NAME) 

1394 

1395 # 4.4. Refresh linstor connection. 

1396 # Without we get this error: 

1397 # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.." 

1398 # Because the deletion of the databse was not seen by Linstor for some reason. 

1399 # It seems a simple refresh of the Linstor connection make it aware of the deletion. 

1400 self._linstor.disconnect() 

1401 self._linstor.connect() 

1402 

1403 # 4.5. Destroy remaining drbd nodes on hosts. 

1404 # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups. 

1405 # It needs to be done locally by each host so we go through the linstor-manager plugin. 

1406 # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with: 

1407 # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it." 

1408 session = util.timeout_call(5, util.get_localAPI_session) 

1409 for host_ref in session.xenapi.host.get_all(): 

1410 try: 

1411 response = session.xenapi.host.call_plugin( 

1412 host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name} 

1413 ) 

1414 except Exception as e: 

1415 util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e)) 

1416 

1417 # 4.6. Destroy group and storage pools. 

1418 self._destroy_resource_group(self._linstor, self._group_name) 

1419 self._destroy_resource_group(self._linstor, self._ha_group_name) 

1420 for pool in self._get_storage_pools(force=True): 

1421 self._destroy_storage_pool( 

1422 self._linstor, pool.name, pool.node_name 

1423 ) 

1424 except Exception as e: 

1425 self._start_controller(start=controller_is_running) 

1426 raise e 

1427 

1428 try: 

1429 self._start_controller(start=False) 

1430 for file in os.listdir(DATABASE_PATH): 

1431 if file != 'lost+found': 

1432 os.remove(DATABASE_PATH + '/' + file) 

1433 except Exception as e: 

1434 util.SMlog( 

1435 'Ignoring failure after LINSTOR SR destruction: {}' 

1436 .format(e) 

1437 ) 

1438 

1439 def find_up_to_date_diskful_nodes(self, volume_uuid): 

1440 """ 

1441 Find all nodes that contain a specific volume using diskful disks. 

1442 The disk must be up to data to be used. 

1443 :param str volume_uuid: The volume to use. 

1444 :return: The available nodes. 

1445 :rtype: tuple(set(str), str) 

1446 """ 

1447 

1448 volume_name = self.get_volume_name(volume_uuid) 

1449 

1450 in_use_by = None 

1451 node_names = set() 

1452 

1453 resource_states = filter( 

1454 lambda resource_state: resource_state.name == volume_name, 

1455 self._get_resource_cache().resource_states 

1456 ) 

1457 

1458 for resource_state in resource_states: 

1459 volume_state = resource_state.volume_states[0] 

1460 if volume_state.disk_state == 'UpToDate': 

1461 node_names.add(resource_state.node_name) 

1462 if resource_state.in_use: 

1463 in_use_by = resource_state.node_name 

1464 

1465 return (node_names, in_use_by) 

1466 

1467 def invalidate_resource_cache(self): 

1468 """ 

1469 If resources are impacted by external commands like vhdutil, 

1470 it's necessary to call this function to invalidate current resource 

1471 cache. 

1472 """ 

1473 self._mark_resource_cache_as_dirty() 

1474 

1475 def has_node(self, node_name): 

1476 """ 

1477 Check if a node exists in the LINSTOR database. 

1478 :rtype: bool 

1479 """ 

1480 result = self._linstor.node_list() 

1481 error_str = self._get_error_str(result) 

1482 if error_str: 

1483 raise LinstorVolumeManagerError( 

1484 'Failed to list nodes using `{}`: {}' 

1485 .format(node_name, error_str) 

1486 ) 

1487 return bool(result[0].node(node_name)) 

1488 

1489 def create_node(self, node_name, ip): 

1490 """ 

1491 Create a new node in the LINSTOR database. 

1492 :param str node_name: Node name to use. 

1493 :param str ip: Host IP to communicate. 

1494 """ 

1495 result = self._linstor.node_create( 

1496 node_name, 

1497 linstor.consts.VAL_NODE_TYPE_CMBD, 

1498 ip 

1499 ) 

1500 errors = self._filter_errors(result) 

1501 if errors: 

1502 error_str = self._get_error_str(errors) 

1503 raise LinstorVolumeManagerError( 

1504 'Failed to create node `{}`: {}'.format(node_name, error_str) 

1505 ) 

1506 

1507 def destroy_node(self, node_name): 

1508 """ 

1509 Destroy a node in the LINSTOR database. 

1510 :param str node_name: Node name to remove. 

1511 """ 

1512 result = self._linstor.node_delete(node_name) 

1513 errors = self._filter_errors(result) 

1514 if errors: 

1515 error_str = self._get_error_str(errors) 

1516 raise LinstorVolumeManagerError( 

1517 'Failed to destroy node `{}`: {}'.format(node_name, error_str) 

1518 ) 

1519 

1520 def create_node_interface(self, node_name, name, ip): 

1521 """ 

1522 Create a new node interface in the LINSTOR database. 

1523 :param str node_name: Node name of the interface to use. 

1524 :param str name: Interface to create. 

1525 :param str ip: IP of the interface. 

1526 """ 

1527 result = self._linstor.netinterface_create(node_name, name, ip) 

1528 errors = self._filter_errors(result) 

1529 if errors: 

1530 error_str = self._get_error_str(errors) 

1531 raise LinstorVolumeManagerError( 

1532 'Failed to create node interface on `{}`: {}'.format(node_name, error_str) 

1533 ) 

1534 

1535 def destroy_node_interface(self, node_name, name): 

1536 """ 

1537 Destroy a node interface in the LINSTOR database. 

1538 :param str node_name: Node name of the interface to remove. 

1539 :param str name: Interface to remove. 

1540 """ 

1541 

1542 if name == 'default': 

1543 raise LinstorVolumeManagerError( 

1544 'Unable to delete the default interface of a node!' 

1545 ) 

1546 

1547 result = self._linstor.netinterface_delete(node_name, name) 

1548 errors = self._filter_errors(result) 

1549 if errors: 

1550 error_str = self._get_error_str(errors) 

1551 raise LinstorVolumeManagerError( 

1552 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str) 

1553 ) 

1554 

1555 def modify_node_interface(self, node_name, name, ip): 

1556 """ 

1557 Modify a node interface in the LINSTOR database. Create it if necessary. 

1558 :param str node_name: Node name of the interface to use. 

1559 :param str name: Interface to modify or create. 

1560 :param str ip: IP of the interface. 

1561 """ 

1562 result = self._linstor.netinterface_create(node_name, name, ip) 

1563 errors = self._filter_errors(result) 

1564 if not errors: 

1565 return 

1566 

1567 if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]): 

1568 result = self._linstor.netinterface_modify(node_name, name, ip) 

1569 errors = self._filter_errors(result) 

1570 if not errors: 

1571 return 

1572 

1573 error_str = self._get_error_str(errors) 

1574 raise LinstorVolumeManagerError( 

1575 'Unable to modify interface on `{}`: {}'.format(node_name, error_str) 

1576 ) 

1577 

1578 def list_node_interfaces(self, node_name): 

1579 """ 

1580 List all node interfaces. 

1581 :param str node_name: Node name to use to list interfaces. 

1582 :rtype: list 

1583 : 

1584 """ 

1585 result = self._linstor.net_interface_list(node_name) 

1586 if not result: 

1587 raise LinstorVolumeManagerError( 

1588 'Unable to list interfaces on `{}`: no list received'.format(node_name) 

1589 ) 

1590 

1591 interfaces = {} 

1592 for interface in result: 

1593 interface = interface._rest_data 

1594 interfaces[interface['name']] = { 

1595 'address': interface['address'], 

1596 'active': interface['is_active'] 

1597 } 

1598 return interfaces 

1599 

1600 def get_node_preferred_interface(self, node_name): 

1601 """ 

1602 Get the preferred interface used by a node. 

1603 :param str node_name: Node name of the interface to get. 

1604 :rtype: str 

1605 """ 

1606 try: 

1607 nodes = self._linstor.node_list_raise([node_name]).nodes 

1608 if nodes: 

1609 properties = nodes[0].props 

1610 return properties.get('PrefNic', 'default') 

1611 return nodes 

1612 except Exception as e: 

1613 raise LinstorVolumeManagerError( 

1614 'Failed to get preferred interface: `{}`'.format(e) 

1615 ) 

1616 

1617 def set_node_preferred_interface(self, node_name, name): 

1618 """ 

1619 Set the preferred interface to use on a node. 

1620 :param str node_name: Node name of the interface. 

1621 :param str name: Preferred interface to use. 

1622 """ 

1623 result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name}) 

1624 errors = self._filter_errors(result) 

1625 if errors: 

1626 error_str = self._get_error_str(errors) 

1627 raise LinstorVolumeManagerError( 

1628 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str) 

1629 ) 

1630 

1631 def get_nodes_info(self): 

1632 """ 

1633 Get all nodes + statuses, used or not by the pool. 

1634 :rtype: dict(str, dict) 

1635 """ 

1636 try: 

1637 nodes = {} 

1638 for node in self._linstor.node_list_raise().nodes: 

1639 nodes[node.name] = node.connection_status 

1640 return nodes 

1641 except Exception as e: 

1642 raise LinstorVolumeManagerError( 

1643 'Failed to get all nodes: `{}`'.format(e) 

1644 ) 

1645 

1646 def get_storage_pools_info(self): 

1647 """ 

1648 Give all storage pools of current group name. 

1649 :rtype: dict(str, list) 

1650 """ 

1651 storage_pools = {} 

1652 for pool in self._get_storage_pools(force=True): 

1653 if pool.node_name not in storage_pools: 

1654 storage_pools[pool.node_name] = [] 

1655 

1656 size = -1 

1657 capacity = -1 

1658 

1659 space = pool.free_space 

1660 if space: 

1661 size = space.free_capacity 

1662 if size < 0: 

1663 size = -1 

1664 else: 

1665 size *= 1024 

1666 capacity = space.total_capacity 

1667 if capacity <= 0: 

1668 capacity = -1 

1669 else: 

1670 capacity *= 1024 

1671 

1672 storage_pools[pool.node_name].append({ 

1673 'name': pool.name, 

1674 'linstor-uuid': pool.uuid, 

1675 'free-size': size, 

1676 'capacity': capacity 

1677 }) 

1678 

1679 return storage_pools 

1680 

1681 def get_resources_info(self): 

1682 """ 

1683 Give all resources of current group name. 

1684 :rtype: dict(str, list) 

1685 """ 

1686 resources = {} 

1687 resource_list = self._get_resource_cache() 

1688 volume_names = self.get_volumes_with_name() 

1689 for resource in resource_list.resources: 

1690 if resource.name not in resources: 

1691 resources[resource.name] = { 'nodes': {}, 'uuid': '' } 

1692 resource_nodes = resources[resource.name]['nodes'] 

1693 

1694 resource_nodes[resource.node_name] = { 

1695 'volumes': [], 

1696 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags, 

1697 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags 

1698 } 

1699 resource_volumes = resource_nodes[resource.node_name]['volumes'] 

1700 

1701 for volume in resource.volumes: 

1702 # We ignore diskless pools of the form "DfltDisklessStorPool". 

1703 if volume.storage_pool_name != self._group_name: 

1704 continue 

1705 

1706 usable_size = volume.usable_size 

1707 if usable_size < 0: 

1708 usable_size = -1 

1709 else: 

1710 usable_size *= 1024 

1711 

1712 allocated_size = volume.allocated_size 

1713 if allocated_size < 0: 

1714 allocated_size = -1 

1715 else: 

1716 allocated_size *= 1024 

1717 

1718 resource_volumes.append({ 

1719 'storage-pool-name': volume.storage_pool_name, 

1720 'linstor-uuid': volume.uuid, 

1721 'number': volume.number, 

1722 'device-path': volume.device_path, 

1723 'usable-size': usable_size, 

1724 'allocated-size': allocated_size 

1725 }) 

1726 

1727 for resource_state in resource_list.resource_states: 

1728 resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name] 

1729 resource['in-use'] = resource_state.in_use 

1730 

1731 volumes = resource['volumes'] 

1732 for volume_state in resource_state.volume_states: 

1733 volume = next((x for x in volumes if x['number'] == volume_state.number), None) 

1734 if volume: 

1735 volume['disk-state'] = volume_state.disk_state 

1736 

1737 for volume_uuid, volume_name in volume_names.items(): 

1738 resource = resources.get(volume_name) 

1739 if resource: 

1740 resource['uuid'] = volume_uuid 

1741 

1742 return resources 

1743 

1744 def get_database_path(self): 

1745 """ 

1746 Get the database path. 

1747 :return: The current database path. 

1748 :rtype: str 

1749 """ 

1750 return self._request_database_path(self._linstor) 

1751 

1752 @classmethod 

1753 def get_all_group_names(cls, base_name): 

1754 """ 

1755 Get all group names. I.e. list of current group + HA. 

1756 :param str base_name: The SR group_name to use. 

1757 :return: List of group names. 

1758 :rtype: list 

1759 """ 

1760 return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)] 

1761 

1762 @classmethod 

1763 def create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__): 

1764 """ 

1765 Create a new SR on the given nodes. 

1766 :param str group_name: The SR group_name to use. 

1767 :param set(str) ips: Node ips. 

1768 :param int redundancy: How many copy of volumes should we store? 

1769 :param bool thin_provisioning: Use thin or thick provisioning. 

1770 :param function logger: Function to log messages. 

1771 :return: A new LinstorSr instance. 

1772 :rtype: LinstorSr 

1773 """ 

1774 

1775 try: 

1776 cls._start_controller(start=True) 

1777 sr = cls._create_sr(group_name, ips, redundancy, thin_provisioning, logger) 

1778 finally: 

1779 # Controller must be stopped and volume unmounted because 

1780 # it is the role of the drbd-reactor daemon to do the right 

1781 # actions. 

1782 cls._start_controller(start=False) 

1783 cls._mount_volume( 

1784 cls.build_device_path(DATABASE_VOLUME_NAME), 

1785 DATABASE_PATH, 

1786 mount=False 

1787 ) 

1788 return sr 

1789 

1790 @classmethod 

1791 def _create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__): 

1792 # 1. Check if SR already exists. 

1793 uri = 'linstor://localhost' 

1794 

1795 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) 

1796 

1797 node_names = list(ips.keys()) 

1798 for node_name, ip in ips.items(): 

1799 while True: 

1800 # Try to create node. 

1801 result = lin.node_create( 

1802 node_name, 

1803 linstor.consts.VAL_NODE_TYPE_CMBD, 

1804 ip 

1805 ) 

1806 

1807 errors = cls._filter_errors(result) 

1808 if cls._check_errors( 

1809 errors, [linstor.consts.FAIL_EXISTS_NODE] 

1810 ): 

1811 # If it already exists, remove, then recreate. 

1812 result = lin.node_delete(node_name) 

1813 error_str = cls._get_error_str(result) 

1814 if error_str: 

1815 raise LinstorVolumeManagerError( 

1816 'Failed to remove old node `{}`: {}' 

1817 .format(node_name, error_str) 

1818 ) 

1819 elif not errors: 

1820 break # Created! 

1821 else: 

1822 raise LinstorVolumeManagerError( 

1823 'Failed to create node `{}` with ip `{}`: {}'.format( 

1824 node_name, ip, cls._get_error_str(errors) 

1825 ) 

1826 ) 

1827 

1828 driver_pool_name = group_name 

1829 base_group_name = group_name 

1830 group_name = cls._build_group_name(group_name) 

1831 storage_pool_name = group_name 

1832 pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools 

1833 if pools: 

1834 existing_node_names = [pool.node_name for pool in pools] 

1835 raise LinstorVolumeManagerError( 

1836 'Unable to create SR `{}`. It already exists on node(s): {}' 

1837 .format(group_name, existing_node_names) 

1838 ) 

1839 

1840 if lin.resource_group_list_raise( 

1841 cls.get_all_group_names(base_group_name) 

1842 ).resource_groups: 

1843 if not lin.resource_dfn_list_raise().resource_definitions: 

1844 backup_path = cls._create_database_backup_path() 

1845 logger( 

1846 'Group name already exists `{}` without LVs. ' 

1847 'Ignoring and moving the config files in {}'.format(group_name, backup_path) 

1848 ) 

1849 cls._move_files(DATABASE_PATH, backup_path) 

1850 else: 

1851 raise LinstorVolumeManagerError( 

1852 'Unable to create SR `{}`: The group name already exists' 

1853 .format(group_name) 

1854 ) 

1855 

1856 if thin_provisioning: 

1857 driver_pool_parts = driver_pool_name.split('/') 

1858 if not len(driver_pool_parts) == 2: 

1859 raise LinstorVolumeManagerError( 

1860 'Invalid group name using thin provisioning. ' 

1861 'Expected format: \'VG/LV`\'' 

1862 ) 

1863 

1864 # 2. Create storage pool on each node + resource group. 

1865 reg_volume_group_not_found = re.compile( 

1866 ".*Volume group '.*' not found$" 

1867 ) 

1868 

1869 i = 0 

1870 try: 

1871 # 2.a. Create storage pools. 

1872 storage_pool_count = 0 

1873 while i < len(node_names): 

1874 node_name = node_names[i] 

1875 

1876 result = lin.storage_pool_create( 

1877 node_name=node_name, 

1878 storage_pool_name=storage_pool_name, 

1879 storage_driver='LVM_THIN' if thin_provisioning else 'LVM', 

1880 driver_pool_name=driver_pool_name 

1881 ) 

1882 

1883 errors = linstor.Linstor.filter_api_call_response_errors( 

1884 result 

1885 ) 

1886 if errors: 

1887 if len(errors) == 1 and errors[0].is_error( 

1888 linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR 

1889 ) and reg_volume_group_not_found.match(errors[0].message): 

1890 logger( 

1891 'Volume group `{}` not found on `{}`. Ignoring...' 

1892 .format(group_name, node_name) 

1893 ) 

1894 cls._destroy_storage_pool(lin, storage_pool_name, node_name) 

1895 else: 

1896 error_str = cls._get_error_str(result) 

1897 raise LinstorVolumeManagerError( 

1898 'Could not create SP `{}` on node `{}`: {}' 

1899 .format(group_name, node_name, error_str) 

1900 ) 

1901 else: 

1902 storage_pool_count += 1 

1903 i += 1 

1904 

1905 if not storage_pool_count: 

1906 raise LinstorVolumeManagerError( 

1907 'Unable to create SR `{}`: No VG group found'.format( 

1908 group_name, 

1909 ) 

1910 ) 

1911 

1912 # 2.b. Create resource groups. 

1913 ha_group_name = cls._build_ha_group_name(base_group_name) 

1914 cls._create_resource_group( 

1915 lin, 

1916 group_name, 

1917 storage_pool_name, 

1918 redundancy, 

1919 True 

1920 ) 

1921 cls._create_resource_group( 

1922 lin, 

1923 ha_group_name, 

1924 storage_pool_name, 

1925 3, 

1926 True 

1927 ) 

1928 

1929 # 3. Create the LINSTOR database volume and mount it. 

1930 try: 

1931 logger('Creating database volume...') 

1932 volume_path = cls._create_database_volume( 

1933 lin, ha_group_name, storage_pool_name, node_names, redundancy 

1934 ) 

1935 except LinstorVolumeManagerError as e: 

1936 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

1937 logger('Destroying database volume after creation fail...') 

1938 cls._force_destroy_database_volume(lin, group_name) 

1939 raise 

1940 

1941 try: 

1942 logger('Mounting database volume...') 

1943 

1944 # First we must disable the controller to move safely the 

1945 # LINSTOR config. 

1946 cls._start_controller(start=False) 

1947 

1948 cls._mount_database_volume(volume_path) 

1949 except Exception as e: 

1950 # Ensure we are connected because controller has been 

1951 # restarted during mount call. 

1952 logger('Destroying database volume after mount fail...') 

1953 

1954 try: 

1955 cls._start_controller(start=True) 

1956 except Exception: 

1957 pass 

1958 

1959 lin = cls._create_linstor_instance( 

1960 uri, keep_uri_unmodified=True 

1961 ) 

1962 cls._force_destroy_database_volume(lin, group_name) 

1963 raise e 

1964 

1965 cls._start_controller(start=True) 

1966 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) 

1967 

1968 # 4. Remove storage pools/resource/volume group in the case of errors. 

1969 except Exception as e: 

1970 logger('Destroying resource group and storage pools after fail...') 

1971 try: 

1972 cls._destroy_resource_group(lin, group_name) 

1973 cls._destroy_resource_group(lin, ha_group_name) 

1974 except Exception as e2: 

1975 logger('Failed to destroy resource group: {}'.format(e2)) 

1976 pass 

1977 j = 0 

1978 i = min(i, len(node_names) - 1) 

1979 while j <= i: 

1980 try: 

1981 cls._destroy_storage_pool(lin, storage_pool_name, node_names[j]) 

1982 except Exception as e2: 

1983 logger('Failed to destroy resource group: {}'.format(e2)) 

1984 pass 

1985 j += 1 

1986 raise e 

1987 

1988 # 5. Return new instance. 

1989 instance = cls.__new__(cls) 

1990 instance._linstor = lin 

1991 instance._logger = logger 

1992 instance._redundancy = redundancy 

1993 instance._base_group_name = base_group_name 

1994 instance._group_name = group_name 

1995 instance._volumes = set() 

1996 instance._storage_pools_time = 0 

1997 instance._kv_cache = instance._create_kv_cache() 

1998 instance._resource_cache = None 

1999 instance._resource_cache_dirty = True 

2000 instance._volume_info_cache = None 

2001 instance._volume_info_cache_dirty = True 

2002 return instance 

2003 

2004 @classmethod 

2005 def build_device_path(cls, volume_name): 

2006 """ 

2007 Build a device path given a volume name. 

2008 :param str volume_name: The volume name to use. 

2009 :return: A valid or not device path. 

2010 :rtype: str 

2011 """ 

2012 

2013 return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name) 

2014 

2015 @classmethod 

2016 def build_volume_name(cls, base_name): 

2017 """ 

2018 Build a volume name given a base name (i.e. a UUID). 

2019 :param str base_name: The volume name to use. 

2020 :return: A valid or not device path. 

2021 :rtype: str 

2022 """ 

2023 return '{}{}'.format(cls.PREFIX_VOLUME, base_name) 

2024 

2025 @classmethod 

2026 def round_up_volume_size(cls, volume_size): 

2027 """ 

2028 Align volume size on higher multiple of BLOCK_SIZE. 

2029 :param int volume_size: The volume size to align. 

2030 :return: An aligned volume size. 

2031 :rtype: int 

2032 """ 

2033 return round_up(volume_size, cls.BLOCK_SIZE) 

2034 

2035 @classmethod 

2036 def round_down_volume_size(cls, volume_size): 

2037 """ 

2038 Align volume size on lower multiple of BLOCK_SIZE. 

2039 :param int volume_size: The volume size to align. 

2040 :return: An aligned volume size. 

2041 :rtype: int 

2042 """ 

2043 return round_down(volume_size, cls.BLOCK_SIZE) 

2044 

2045 # -------------------------------------------------------------------------- 

2046 # Private helpers. 

2047 # -------------------------------------------------------------------------- 

2048 

2049 def _create_kv_cache(self): 

2050 self._kv_cache = self._create_linstor_kv('/') 

2051 self._kv_cache_dirty = False 

2052 return self._kv_cache 

2053 

2054 def _get_kv_cache(self): 

2055 if self._kv_cache_dirty: 

2056 self._kv_cache = self._create_kv_cache() 

2057 return self._kv_cache 

2058 

2059 def _create_resource_cache(self): 

2060 self._resource_cache = self._linstor.resource_list_raise() 

2061 self._resource_cache_dirty = False 

2062 return self._resource_cache 

2063 

2064 def _get_resource_cache(self): 

2065 if self._resource_cache_dirty: 

2066 self._resource_cache = self._create_resource_cache() 

2067 return self._resource_cache 

2068 

2069 def _mark_resource_cache_as_dirty(self): 

2070 self._resource_cache_dirty = True 

2071 self._volume_info_cache_dirty = True 

2072 

2073 # -------------------------------------------------------------------------- 

2074 

2075 def _ensure_volume_exists(self, volume_uuid): 

2076 if volume_uuid not in self._volumes: 

2077 raise LinstorVolumeManagerError( 

2078 'volume `{}` doesn\'t exist'.format(volume_uuid), 

2079 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS 

2080 ) 

2081 

2082 def _find_best_size_candidates(self): 

2083 result = self._linstor.resource_group_qmvs(self._group_name) 

2084 error_str = self._get_error_str(result) 

2085 if error_str: 

2086 raise LinstorVolumeManagerError( 

2087 'Failed to get max volume size allowed of SR `{}`: {}'.format( 

2088 self._group_name, 

2089 error_str 

2090 ) 

2091 ) 

2092 return result[0].candidates 

2093 

2094 def _fetch_resource_names(self, ignore_deleted=True): 

2095 resource_names = set() 

2096 dfns = self._linstor.resource_dfn_list_raise().resource_definitions 

2097 for dfn in dfns: 

2098 if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and ( 

2099 ignore_deleted or 

2100 linstor.consts.FLAG_DELETE not in dfn.flags 

2101 ): 

2102 resource_names.add(dfn.name) 

2103 return resource_names 

2104 

2105 def _get_volumes_info(self, volume_name=None): 

2106 all_volume_info = {} 

2107 

2108 if not self._volume_info_cache_dirty: 

2109 return self._volume_info_cache 

2110 

2111 def process_resource(resource): 

2112 if resource.name not in all_volume_info: 

2113 current = all_volume_info[resource.name] = self.VolumeInfo( 

2114 resource.name 

2115 ) 

2116 else: 

2117 current = all_volume_info[resource.name] 

2118 

2119 if linstor.consts.FLAG_DISKLESS not in resource.flags: 

2120 current.diskful.append(resource.node_name) 

2121 

2122 for volume in resource.volumes: 

2123 # We ignore diskless pools of the form "DfltDisklessStorPool". 

2124 if volume.storage_pool_name != self._group_name: 

2125 continue 

2126 # Only fetch first volume. 

2127 if volume.number != 0: 

2128 continue 

2129 

2130 allocated_size = volume.allocated_size 

2131 if allocated_size > current.allocated_size: 

2132 current.allocated_size = allocated_size 

2133 

2134 usable_size = volume.usable_size 

2135 if usable_size > 0 and ( 

2136 usable_size < current.virtual_size or 

2137 not current.virtual_size 

2138 ): 

2139 current.virtual_size = usable_size 

2140 

2141 try: 

2142 for resource in self._get_resource_cache().resources: 

2143 process_resource(resource) 

2144 for volume in all_volume_info.values(): 

2145 if volume.allocated_size <= 0: 

2146 raise LinstorVolumeManagerError('Failed to get allocated size of `{}`'.format(resource.name)) 

2147 

2148 if volume.virtual_size <= 0: 

2149 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(volume.name)) 

2150 

2151 volume.allocated_size *= 1024 

2152 volume.virtual_size *= 1024 

2153 except LinstorVolumeManagerError: 

2154 self._mark_resource_cache_as_dirty() 

2155 raise 

2156 

2157 self._volume_info_cache_dirty = False 

2158 self._volume_info_cache = all_volume_info 

2159 

2160 return all_volume_info 

2161 

2162 def _get_volume_node_names_and_size(self, volume_name): 

2163 node_names = set() 

2164 size = -1 

2165 for resource in self._linstor.resource_list_raise( 

2166 filter_by_resources=[volume_name] 

2167 ).resources: 

2168 for volume in resource.volumes: 

2169 # We ignore diskless pools of the form "DfltDisklessStorPool". 

2170 if volume.storage_pool_name != self._group_name: 

2171 continue 

2172 

2173 node_names.add(resource.node_name) 

2174 

2175 usable_size = volume.usable_size 

2176 if usable_size <= 0: 

2177 continue 

2178 

2179 if size < 0: 

2180 size = usable_size 

2181 else: 

2182 size = min(size, usable_size) 

2183 

2184 if size <= 0: 

2185 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(resource.name)) 

2186 

2187 return (node_names, size * 1024) 

2188 

2189 def _compute_size(self, attr): 

2190 capacity = 0 

2191 for pool in self._get_storage_pools(force=True): 

2192 space = pool.free_space 

2193 if space: 

2194 size = getattr(space, attr) 

2195 if size < 0: 

2196 raise LinstorVolumeManagerError( 

2197 'Failed to get pool {} attr of `{}`' 

2198 .format(attr, pool.node_name) 

2199 ) 

2200 capacity += size 

2201 return capacity * 1024 

2202 

2203 def _get_node_names(self): 

2204 node_names = set() 

2205 for pool in self._get_storage_pools(): 

2206 node_names.add(pool.node_name) 

2207 return node_names 

2208 

2209 def _get_storage_pools(self, force=False): 

2210 cur_time = time.time() 

2211 elsaped_time = cur_time - self._storage_pools_time 

2212 

2213 if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL: 

2214 self._storage_pools = self._linstor.storage_pool_list_raise( 

2215 filter_by_stor_pools=[self._group_name] 

2216 ).storage_pools 

2217 self._storage_pools_time = time.time() 

2218 

2219 return self._storage_pools 

2220 

2221 def _create_volume( 

2222 self, 

2223 volume_uuid, 

2224 volume_name, 

2225 size, 

2226 place_resources, 

2227 high_availability 

2228 ): 

2229 size = self.round_up_volume_size(size) 

2230 self._mark_resource_cache_as_dirty() 

2231 

2232 group_name = self._ha_group_name if high_availability else self._group_name 

2233 def create_definition(): 

2234 first_attempt = True 

2235 while True: 

2236 try: 

2237 self._check_volume_creation_errors( 

2238 self._linstor.resource_group_spawn( 

2239 rsc_grp_name=group_name, 

2240 rsc_dfn_name=volume_name, 

2241 vlm_sizes=['{}B'.format(size)], 

2242 definitions_only=True 

2243 ), 

2244 volume_uuid, 

2245 self._group_name 

2246 ) 

2247 break 

2248 except LinstorVolumeManagerError as e: 

2249 if ( 

2250 not first_attempt or 

2251 not high_availability or 

2252 e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS 

2253 ): 

2254 raise 

2255 

2256 first_attempt = False 

2257 self._create_resource_group( 

2258 self._linstor, 

2259 group_name, 

2260 self._group_name, 

2261 3, 

2262 True 

2263 ) 

2264 

2265 self._configure_volume_peer_slots(self._linstor, volume_name) 

2266 

2267 def clean(): 

2268 try: 

2269 self._destroy_volume(volume_uuid, force=True, preserve_properties=True) 

2270 except Exception as e: 

2271 self._logger( 

2272 'Unable to destroy volume {} after creation fail: {}' 

2273 .format(volume_uuid, e) 

2274 ) 

2275 

2276 def create(): 

2277 try: 

2278 create_definition() 

2279 if place_resources: 

2280 # Basic case when we use the default redundancy of the group. 

2281 self._check_volume_creation_errors( 

2282 self._linstor.resource_auto_place( 

2283 rsc_name=volume_name, 

2284 place_count=self._redundancy, 

2285 diskless_on_remaining=False 

2286 ), 

2287 volume_uuid, 

2288 self._group_name 

2289 ) 

2290 except LinstorVolumeManagerError as e: 

2291 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

2292 clean() 

2293 raise 

2294 except Exception: 

2295 clean() 

2296 raise 

2297 

2298 util.retry(create, maxretry=5) 

2299 

2300 def _create_volume_with_properties( 

2301 self, 

2302 volume_uuid, 

2303 volume_name, 

2304 size, 

2305 place_resources, 

2306 high_availability 

2307 ): 

2308 if self.check_volume_exists(volume_uuid): 

2309 raise LinstorVolumeManagerError( 

2310 'Could not create volume `{}` from SR `{}`, it already exists' 

2311 .format(volume_uuid, self._group_name) + ' in properties', 

2312 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

2313 ) 

2314 

2315 if volume_name in self._fetch_resource_names(): 

2316 raise LinstorVolumeManagerError( 

2317 'Could not create volume `{}` from SR `{}`, '.format( 

2318 volume_uuid, self._group_name 

2319 ) + 'resource of the same name already exists in LINSTOR' 

2320 ) 

2321 

2322 # I am paranoid. 

2323 volume_properties = self._get_volume_properties(volume_uuid) 

2324 if (volume_properties.get(self.PROP_NOT_EXISTS) is not None): 

2325 raise LinstorVolumeManagerError( 

2326 'Could not create volume `{}`, '.format(volume_uuid) + 

2327 'properties already exist' 

2328 ) 

2329 

2330 try: 

2331 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING 

2332 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

2333 

2334 self._create_volume( 

2335 volume_uuid, 

2336 volume_name, 

2337 size, 

2338 place_resources, 

2339 high_availability 

2340 ) 

2341 

2342 assert volume_properties.namespace == \ 

2343 self._build_volume_namespace(volume_uuid) 

2344 return volume_properties 

2345 except LinstorVolumeManagerError as e: 

2346 # Do not destroy existing resource! 

2347 # In theory we can't get this error because we check this event 

2348 # before the `self._create_volume` case. 

2349 # It can only happen if the same volume uuid is used in the same 

2350 # call in another host. 

2351 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

2352 self._destroy_volume(volume_uuid, force=True) 

2353 raise 

2354 

2355 def _find_device_path(self, volume_uuid, volume_name): 

2356 current_device_path = self._request_device_path( 

2357 volume_uuid, volume_name, activate=True 

2358 ) 

2359 

2360 # We use realpath here to get the /dev/drbd<id> path instead of 

2361 # /dev/drbd/by-res/<resource_name>. 

2362 expected_device_path = self.build_device_path(volume_name) 

2363 util.wait_for_path(expected_device_path, 5) 

2364 

2365 device_realpath = os.path.realpath(expected_device_path) 

2366 if current_device_path != device_realpath: 

2367 raise LinstorVolumeManagerError( 

2368 'Invalid path, current={}, expected={} (realpath={})' 

2369 .format( 

2370 current_device_path, 

2371 expected_device_path, 

2372 device_realpath 

2373 ) 

2374 ) 

2375 return expected_device_path 

2376 

2377 def _request_device_path(self, volume_uuid, volume_name, activate=False): 

2378 node_name = socket.gethostname() 

2379 

2380 resource = next(filter( 

2381 lambda resource: resource.node_name == node_name and 

2382 resource.name == volume_name, 

2383 self._get_resource_cache().resources 

2384 ), None) 

2385 

2386 if not resource: 

2387 if activate: 

2388 self._mark_resource_cache_as_dirty() 

2389 self._activate_device_path( 

2390 self._linstor, node_name, volume_name 

2391 ) 

2392 return self._request_device_path(volume_uuid, volume_name) 

2393 raise LinstorVolumeManagerError( 

2394 'Empty dev path for `{}`, but definition "seems" to exist' 

2395 .format(volume_uuid) 

2396 ) 

2397 # Contains a path of the /dev/drbd<id> form. 

2398 return resource.volumes[0].device_path 

2399 

2400 def _destroy_resource(self, resource_name, force=False): 

2401 result = self._linstor.resource_dfn_delete(resource_name) 

2402 error_str = self._get_error_str(result) 

2403 if not error_str: 

2404 self._mark_resource_cache_as_dirty() 

2405 return 

2406 

2407 if not force: 

2408 self._mark_resource_cache_as_dirty() 

2409 raise LinstorVolumeManagerError( 

2410 'Could not destroy resource `{}` from SR `{}`: {}' 

2411 .format(resource_name, self._group_name, error_str) 

2412 ) 

2413 

2414 # If force is used, ensure there is no opener. 

2415 all_openers = get_all_volume_openers(resource_name, '0') 

2416 for openers in all_openers.values(): 

2417 if openers: 

2418 self._mark_resource_cache_as_dirty() 

2419 raise LinstorVolumeManagerError( 

2420 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)' 

2421 .format(resource_name, self._group_name, error_str, all_openers) 

2422 ) 

2423 

2424 # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue? 

2425 resource_states = filter( 

2426 lambda resource_state: resource_state.name == resource_name, 

2427 self._get_resource_cache().resource_states 

2428 ) 

2429 

2430 # Mark only after computation of states. 

2431 self._mark_resource_cache_as_dirty() 

2432 

2433 for resource_state in resource_states: 

2434 volume_state = resource_state.volume_states[0] 

2435 if resource_state.in_use: 

2436 demote_drbd_resource(resource_state.node_name, resource_name) 

2437 break 

2438 self._destroy_resource(resource_name) 

2439 

2440 def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False): 

2441 volume_properties = self._get_volume_properties(volume_uuid) 

2442 try: 

2443 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

2444 if volume_name in self._fetch_resource_names(): 

2445 self._destroy_resource(volume_name, force) 

2446 

2447 # Assume this call is atomic. 

2448 if not preserve_properties: 

2449 volume_properties.clear() 

2450 except Exception as e: 

2451 raise LinstorVolumeManagerError( 

2452 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e) 

2453 ) 

2454 

2455 def _build_volumes(self, repair): 

2456 properties = self._kv_cache 

2457 resource_names = self._fetch_resource_names() 

2458 

2459 self._volumes = set() 

2460 

2461 updating_uuid_volumes = self._get_volumes_by_property( 

2462 self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False 

2463 ) 

2464 if updating_uuid_volumes and not repair: 

2465 raise LinstorVolumeManagerError( 

2466 'Cannot build LINSTOR volume list: ' 

2467 'It exists invalid "updating uuid volumes", repair is required' 

2468 ) 

2469 

2470 existing_volumes = self._get_volumes_by_property( 

2471 self.REG_NOT_EXISTS, ignore_inexisting_volumes=False 

2472 ) 

2473 for volume_uuid, not_exists in existing_volumes.items(): 

2474 properties.namespace = self._build_volume_namespace(volume_uuid) 

2475 

2476 src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC) 

2477 if src_uuid: 

2478 self._logger( 

2479 'Ignoring volume during manager initialization with prop ' 

2480 ' PROP_UPDATING_UUID_SRC: {} (properties={})' 

2481 .format( 

2482 volume_uuid, 

2483 self._get_filtered_properties(properties) 

2484 ) 

2485 ) 

2486 continue 

2487 

2488 # Insert volume in list if the volume exists. Or if the volume 

2489 # is being created and a slave wants to use it (repair = False). 

2490 # 

2491 # If we are on the master and if repair is True and state is 

2492 # Creating, it's probably a bug or crash: the creation process has 

2493 # been stopped. 

2494 if not_exists == self.STATE_EXISTS or ( 

2495 not repair and not_exists == self.STATE_CREATING 

2496 ): 

2497 self._volumes.add(volume_uuid) 

2498 continue 

2499 

2500 if not repair: 

2501 self._logger( 

2502 'Ignoring bad volume during manager initialization: {} ' 

2503 '(properties={})'.format( 

2504 volume_uuid, 

2505 self._get_filtered_properties(properties) 

2506 ) 

2507 ) 

2508 continue 

2509 

2510 # Remove bad volume. 

2511 try: 

2512 self._logger( 

2513 'Removing bad volume during manager initialization: {} ' 

2514 '(properties={})'.format( 

2515 volume_uuid, 

2516 self._get_filtered_properties(properties) 

2517 ) 

2518 ) 

2519 volume_name = properties.get(self.PROP_VOLUME_NAME) 

2520 

2521 # Little optimization, don't call `self._destroy_volume`, 

2522 # we already have resource name list. 

2523 if volume_name in resource_names: 

2524 self._destroy_resource(volume_name, force=True) 

2525 

2526 # Assume this call is atomic. 

2527 properties.clear() 

2528 except Exception as e: 

2529 # Do not raise, we don't want to block user action. 

2530 self._logger( 

2531 'Cannot clean volume {}: {}'.format(volume_uuid, e) 

2532 ) 

2533 

2534 # The volume can't be removed, maybe it's still in use, 

2535 # in this case rename it with the "DELETED_" prefix. 

2536 # This prefix is mandatory if it exists a snap transaction to 

2537 # rollback because the original VDI UUID can try to be renamed 

2538 # with the UUID we are trying to delete... 

2539 if not volume_uuid.startswith('DELETED_'): 

2540 self.update_volume_uuid( 

2541 volume_uuid, 'DELETED_' + volume_uuid, force=True 

2542 ) 

2543 

2544 for dest_uuid, src_uuid in updating_uuid_volumes.items(): 

2545 dest_namespace = self._build_volume_namespace(dest_uuid) 

2546 

2547 properties.namespace = dest_namespace 

2548 if int(properties.get(self.PROP_NOT_EXISTS)): 

2549 properties.clear() 

2550 continue 

2551 

2552 properties.namespace = self._build_volume_namespace(src_uuid) 

2553 properties.clear() 

2554 

2555 properties.namespace = dest_namespace 

2556 properties.pop(self.PROP_UPDATING_UUID_SRC) 

2557 

2558 if src_uuid in self._volumes: 

2559 self._volumes.remove(src_uuid) 

2560 self._volumes.add(dest_uuid) 

2561 

2562 def _get_sr_properties(self): 

2563 return self._create_linstor_kv(self._build_sr_namespace()) 

2564 

2565 def _get_volumes_by_property( 

2566 self, reg_prop, ignore_inexisting_volumes=True 

2567 ): 

2568 base_properties = self._get_kv_cache() 

2569 base_properties.namespace = self._build_volume_namespace() 

2570 

2571 volume_properties = {} 

2572 for volume_uuid in self._volumes: 

2573 volume_properties[volume_uuid] = '' 

2574 

2575 for key, value in base_properties.items(): 

2576 res = reg_prop.match(key) 

2577 if res: 

2578 volume_uuid = res.groups()[0] 

2579 if not ignore_inexisting_volumes or \ 

2580 volume_uuid in self._volumes: 

2581 volume_properties[volume_uuid] = value 

2582 

2583 return volume_properties 

2584 

2585 def _create_linstor_kv(self, namespace): 

2586 return linstor.KV( 

2587 self._group_name, 

2588 uri=self._linstor.controller_host(), 

2589 namespace=namespace 

2590 ) 

2591 

2592 def _get_volume_properties(self, volume_uuid): 

2593 properties = self._get_kv_cache() 

2594 properties.namespace = self._build_volume_namespace(volume_uuid) 

2595 return properties 

2596 

2597 @classmethod 

2598 def _build_sr_namespace(cls): 

2599 return '/{}/'.format(cls.NAMESPACE_SR) 

2600 

2601 @classmethod 

2602 def _build_volume_namespace(cls, volume_uuid=None): 

2603 # Return a path to all volumes if `volume_uuid` is not given. 

2604 if volume_uuid is None: 

2605 return '/{}/'.format(cls.NAMESPACE_VOLUME) 

2606 return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid) 

2607 

2608 @classmethod 

2609 def _get_error_str(cls, result): 

2610 return ', '.join([ 

2611 err.message for err in cls._filter_errors(result) 

2612 ]) 

2613 

2614 @classmethod 

2615 def _create_linstor_instance( 

2616 cls, uri, keep_uri_unmodified=False, attempt_count=30 

2617 ): 

2618 retry = False 

2619 

2620 def connect(uri): 

2621 if not uri: 

2622 uri = get_controller_uri() 

2623 if not uri: 

2624 raise LinstorVolumeManagerError( 

2625 'Unable to find controller uri...' 

2626 ) 

2627 instance = linstor.Linstor(uri, keep_alive=True) 

2628 instance.connect() 

2629 return instance 

2630 

2631 try: 

2632 return connect(uri) 

2633 except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError): 

2634 pass 

2635 

2636 if not keep_uri_unmodified: 

2637 uri = None 

2638 

2639 return util.retry( 

2640 lambda: connect(uri), 

2641 maxretry=attempt_count, 

2642 period=1, 

2643 exceptions=[ 

2644 linstor.errors.LinstorNetworkError, 

2645 LinstorVolumeManagerError 

2646 ] 

2647 ) 

2648 

2649 @classmethod 

2650 def _configure_volume_peer_slots(cls, lin, volume_name): 

2651 result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3) 

2652 error_str = cls._get_error_str(result) 

2653 if error_str: 

2654 raise LinstorVolumeManagerError( 

2655 'Could not configure volume peer slots of {}: {}' 

2656 .format(volume_name, error_str) 

2657 ) 

2658 

2659 @classmethod 

2660 def _activate_device_path(cls, lin, node_name, volume_name): 

2661 result = lin.resource_make_available(node_name, volume_name, diskful=False) 

2662 if linstor.Linstor.all_api_responses_no_error(result): 

2663 return 

2664 errors = linstor.Linstor.filter_api_call_response_errors(result) 

2665 if len(errors) == 1 and errors[0].is_error( 

2666 linstor.consts.FAIL_EXISTS_RSC 

2667 ): 

2668 return 

2669 

2670 raise LinstorVolumeManagerError( 

2671 'Unable to activate device path of `{}` on node `{}`: {}' 

2672 .format(volume_name, node_name, ', '.join( 

2673 [str(x) for x in result])) 

2674 ) 

2675 

2676 @classmethod 

2677 def _request_database_path(cls, lin, activate=False): 

2678 node_name = socket.gethostname() 

2679 

2680 try: 

2681 resource = next(filter( 

2682 lambda resource: resource.node_name == node_name and 

2683 resource.name == DATABASE_VOLUME_NAME, 

2684 lin.resource_list_raise().resources 

2685 ), None) 

2686 except Exception as e: 

2687 raise LinstorVolumeManagerError( 

2688 'Unable to fetch database resource: {}' 

2689 .format(e) 

2690 ) 

2691 

2692 if not resource: 

2693 if activate: 

2694 cls._activate_device_path( 

2695 lin, node_name, DATABASE_VOLUME_NAME 

2696 ) 

2697 return cls._request_database_path( 

2698 DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME 

2699 ) 

2700 raise LinstorVolumeManagerError( 

2701 'Empty dev path for `{}`, but definition "seems" to exist' 

2702 .format(DATABASE_PATH) 

2703 ) 

2704 # Contains a path of the /dev/drbd<id> form. 

2705 return resource.volumes[0].device_path 

2706 

2707 @classmethod 

2708 def _create_database_volume( 

2709 cls, lin, group_name, storage_pool_name, node_names, redundancy 

2710 ): 

2711 try: 

2712 dfns = lin.resource_dfn_list_raise().resource_definitions 

2713 except Exception as e: 

2714 raise LinstorVolumeManagerError( 

2715 'Unable to get definitions during database creation: {}' 

2716 .format(e) 

2717 ) 

2718 

2719 if dfns: 

2720 raise LinstorVolumeManagerError( 

2721 'Could not create volume `{}` from SR `{}`, '.format( 

2722 DATABASE_VOLUME_NAME, group_name 

2723 ) + 'LINSTOR volume list must be empty.' 

2724 ) 

2725 

2726 # Workaround to use thin lvm. Without this line an error is returned: 

2727 # "Not enough available nodes" 

2728 # I don't understand why but this command protect against this bug. 

2729 try: 

2730 pools = lin.storage_pool_list_raise( 

2731 filter_by_stor_pools=[storage_pool_name] 

2732 ) 

2733 except Exception as e: 

2734 raise LinstorVolumeManagerError( 

2735 'Failed to get storage pool list before database creation: {}' 

2736 .format(e) 

2737 ) 

2738 

2739 # Ensure we have a correct list of storage pools. 

2740 assert pools.storage_pools # We must have at least one storage pool! 

2741 nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools)) 

2742 for node_name in nodes_with_pool: 

2743 assert node_name in node_names 

2744 util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool)) 

2745 

2746 # Create the database definition. 

2747 size = cls.round_up_volume_size(DATABASE_SIZE) 

2748 cls._check_volume_creation_errors(lin.resource_group_spawn( 

2749 rsc_grp_name=group_name, 

2750 rsc_dfn_name=DATABASE_VOLUME_NAME, 

2751 vlm_sizes=['{}B'.format(size)], 

2752 definitions_only=True 

2753 ), DATABASE_VOLUME_NAME, group_name) 

2754 cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME) 

2755 

2756 # Create real resources on the first nodes. 

2757 resources = [] 

2758 

2759 diskful_nodes = [] 

2760 diskless_nodes = [] 

2761 for node_name in node_names: 

2762 if node_name in nodes_with_pool: 

2763 diskful_nodes.append(node_name) 

2764 else: 

2765 diskless_nodes.append(node_name) 

2766 

2767 assert diskful_nodes 

2768 for node_name in diskful_nodes[:redundancy]: 

2769 util.SMlog('Create database diskful on {}'.format(node_name)) 

2770 resources.append(linstor.ResourceData( 

2771 node_name=node_name, 

2772 rsc_name=DATABASE_VOLUME_NAME, 

2773 storage_pool=storage_pool_name 

2774 )) 

2775 # Create diskless resources on the remaining set. 

2776 for node_name in diskful_nodes[redundancy:] + diskless_nodes: 

2777 util.SMlog('Create database diskless on {}'.format(node_name)) 

2778 resources.append(linstor.ResourceData( 

2779 node_name=node_name, 

2780 rsc_name=DATABASE_VOLUME_NAME, 

2781 diskless=True 

2782 )) 

2783 

2784 result = lin.resource_create(resources) 

2785 error_str = cls._get_error_str(result) 

2786 if error_str: 

2787 raise LinstorVolumeManagerError( 

2788 'Could not create database volume from SR `{}`: {}'.format( 

2789 group_name, error_str 

2790 ) 

2791 ) 

2792 

2793 # Create database and ensure path exists locally and 

2794 # on replicated devices. 

2795 current_device_path = cls._request_database_path(lin, activate=True) 

2796 

2797 # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be 

2798 # plugged. 

2799 for node_name in node_names: 

2800 cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME) 

2801 

2802 # We use realpath here to get the /dev/drbd<id> path instead of 

2803 # /dev/drbd/by-res/<resource_name>. 

2804 expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME) 

2805 util.wait_for_path(expected_device_path, 5) 

2806 

2807 device_realpath = os.path.realpath(expected_device_path) 

2808 if current_device_path != device_realpath: 

2809 raise LinstorVolumeManagerError( 

2810 'Invalid path, current={}, expected={} (realpath={})' 

2811 .format( 

2812 current_device_path, 

2813 expected_device_path, 

2814 device_realpath 

2815 ) 

2816 ) 

2817 

2818 try: 

2819 util.retry( 

2820 lambda: util.pread2([DATABASE_MKFS, expected_device_path]), 

2821 maxretry=5 

2822 ) 

2823 except Exception as e: 

2824 raise LinstorVolumeManagerError( 

2825 'Failed to execute {} on database volume: {}' 

2826 .format(DATABASE_MKFS, e) 

2827 ) 

2828 

2829 return expected_device_path 

2830 

2831 @classmethod 

2832 def _destroy_database_volume(cls, lin, group_name): 

2833 error_str = cls._get_error_str( 

2834 lin.resource_dfn_delete(DATABASE_VOLUME_NAME) 

2835 ) 

2836 if error_str: 

2837 raise LinstorVolumeManagerError( 

2838 'Could not destroy resource `{}` from SR `{}`: {}' 

2839 .format(DATABASE_VOLUME_NAME, group_name, error_str) 

2840 ) 

2841 

2842 @classmethod 

2843 def _mount_database_volume(cls, volume_path, mount=True, force=False): 

2844 try: 

2845 # 1. Create a backup config folder. 

2846 database_not_empty = bool(os.listdir(DATABASE_PATH)) 

2847 backup_path = cls._create_database_backup_path() 

2848 

2849 # 2. Move the config in the mounted volume. 

2850 if database_not_empty: 

2851 cls._move_files(DATABASE_PATH, backup_path) 

2852 

2853 cls._mount_volume(volume_path, DATABASE_PATH, mount) 

2854 

2855 if database_not_empty: 

2856 cls._move_files(backup_path, DATABASE_PATH, force) 

2857 

2858 # 3. Remove useless backup directory. 

2859 try: 

2860 os.rmdir(backup_path) 

2861 except Exception as e: 

2862 raise LinstorVolumeManagerError( 

2863 'Failed to remove backup path {} of LINSTOR config: {}' 

2864 .format(backup_path, e) 

2865 ) 

2866 except Exception as e: 

2867 def force_exec(fn): 

2868 try: 

2869 fn() 

2870 except Exception: 

2871 pass 

2872 

2873 if mount == cls._is_mounted(DATABASE_PATH): 

2874 force_exec(lambda: cls._move_files( 

2875 DATABASE_PATH, backup_path 

2876 )) 

2877 force_exec(lambda: cls._mount_volume( 

2878 volume_path, DATABASE_PATH, not mount 

2879 )) 

2880 

2881 if mount != cls._is_mounted(DATABASE_PATH): 

2882 force_exec(lambda: cls._move_files( 

2883 backup_path, DATABASE_PATH 

2884 )) 

2885 

2886 force_exec(lambda: os.rmdir(backup_path)) 

2887 raise e 

2888 

2889 @classmethod 

2890 def _force_destroy_database_volume(cls, lin, group_name): 

2891 try: 

2892 cls._destroy_database_volume(lin, group_name) 

2893 except Exception: 

2894 pass 

2895 

2896 @classmethod 

2897 def _destroy_storage_pool(cls, lin, group_name, node_name): 

2898 def destroy(): 

2899 result = lin.storage_pool_delete(node_name, group_name) 

2900 errors = cls._filter_errors(result) 

2901 if cls._check_errors(errors, [ 

2902 linstor.consts.FAIL_NOT_FOUND_STOR_POOL, 

2903 linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN 

2904 ]): 

2905 return 

2906 

2907 if errors: 

2908 raise LinstorVolumeManagerError( 

2909 'Failed to destroy SP `{}` on node `{}`: {}'.format( 

2910 group_name, 

2911 node_name, 

2912 cls._get_error_str(errors) 

2913 ) 

2914 ) 

2915 

2916 # We must retry to avoid errors like: 

2917 # "can not be deleted as volumes / snapshot-volumes are still using it" 

2918 # after LINSTOR database volume destruction. 

2919 return util.retry(destroy, maxretry=10) 

2920 

2921 @classmethod 

2922 def _create_resource_group( 

2923 cls, 

2924 lin, 

2925 group_name, 

2926 storage_pool_name, 

2927 redundancy, 

2928 destroy_old_group 

2929 ): 

2930 rg_creation_attempt = 0 

2931 while True: 

2932 result = lin.resource_group_create( 

2933 name=group_name, 

2934 place_count=redundancy, 

2935 storage_pool=storage_pool_name, 

2936 diskless_on_remaining=False 

2937 ) 

2938 error_str = cls._get_error_str(result) 

2939 if not error_str: 

2940 break 

2941 

2942 errors = cls._filter_errors(result) 

2943 if destroy_old_group and cls._check_errors(errors, [ 

2944 linstor.consts.FAIL_EXISTS_RSC_GRP 

2945 ]): 

2946 rg_creation_attempt += 1 

2947 if rg_creation_attempt < 2: 

2948 try: 

2949 cls._destroy_resource_group(lin, group_name) 

2950 except Exception as e: 

2951 error_str = 'Failed to destroy old and empty RG: {}'.format(e) 

2952 else: 

2953 continue 

2954 

2955 raise LinstorVolumeManagerError( 

2956 'Could not create RG `{}`: {}'.format( 

2957 group_name, error_str 

2958 ) 

2959 ) 

2960 

2961 result = lin.volume_group_create(group_name) 

2962 error_str = cls._get_error_str(result) 

2963 if error_str: 

2964 raise LinstorVolumeManagerError( 

2965 'Could not create VG `{}`: {}'.format( 

2966 group_name, error_str 

2967 ) 

2968 ) 

2969 

2970 @classmethod 

2971 def _destroy_resource_group(cls, lin, group_name): 

2972 def destroy(): 

2973 result = lin.resource_group_delete(group_name) 

2974 errors = cls._filter_errors(result) 

2975 if cls._check_errors(errors, [ 

2976 linstor.consts.FAIL_NOT_FOUND_RSC_GRP 

2977 ]): 

2978 return 

2979 

2980 if errors: 

2981 raise LinstorVolumeManagerError( 

2982 'Failed to destroy RG `{}`: {}' 

2983 .format(group_name, cls._get_error_str(errors)) 

2984 ) 

2985 

2986 return util.retry(destroy, maxretry=10) 

2987 

2988 @classmethod 

2989 def _build_group_name(cls, base_name): 

2990 # If thin provisioning is used we have a path like this: 

2991 # `VG/LV`. "/" is not accepted by LINSTOR. 

2992 return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_')) 

2993 

2994 # Used to store important data in a HA context, 

2995 # i.e. a replication count of 3. 

2996 @classmethod 

2997 def _build_ha_group_name(cls, base_name): 

2998 return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_')) 

2999 

3000 @classmethod 

3001 def _check_volume_creation_errors(cls, result, volume_uuid, group_name): 

3002 errors = cls._filter_errors(result) 

3003 if cls._check_errors(errors, [ 

3004 linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN 

3005 ]): 

3006 raise LinstorVolumeManagerError( 

3007 'Failed to create volume `{}` from SR `{}`, it already exists' 

3008 .format(volume_uuid, group_name), 

3009 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

3010 ) 

3011 

3012 if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]): 

3013 raise LinstorVolumeManagerError( 

3014 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist' 

3015 .format(volume_uuid, group_name), 

3016 LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS 

3017 ) 

3018 

3019 if errors: 

3020 raise LinstorVolumeManagerError( 

3021 'Failed to create volume `{}` from SR `{}`: {}'.format( 

3022 volume_uuid, 

3023 group_name, 

3024 cls._get_error_str(errors) 

3025 ) 

3026 ) 

3027 

3028 @classmethod 

3029 def _move_files(cls, src_dir, dest_dir, force=False): 

3030 def listdir(dir): 

3031 ignored = ['lost+found'] 

3032 return [file for file in os.listdir(dir) if file not in ignored] 

3033 

3034 try: 

3035 if not force: 

3036 files = listdir(dest_dir) 

3037 if files: 

3038 raise LinstorVolumeManagerError( 

3039 'Cannot move files from {} to {} because destination ' 

3040 'contains: {}'.format(src_dir, dest_dir, files) 

3041 ) 

3042 except LinstorVolumeManagerError: 

3043 raise 

3044 except Exception as e: 

3045 raise LinstorVolumeManagerError( 

3046 'Cannot list dir {}: {}'.format(dest_dir, e) 

3047 ) 

3048 

3049 try: 

3050 for file in listdir(src_dir): 

3051 try: 

3052 dest_file = os.path.join(dest_dir, file) 

3053 if not force and os.path.exists(dest_file): 

3054 raise LinstorVolumeManagerError( 

3055 'Cannot move {} because it already exists in the ' 

3056 'destination'.format(file) 

3057 ) 

3058 shutil.move(os.path.join(src_dir, file), dest_file) 

3059 except LinstorVolumeManagerError: 

3060 raise 

3061 except Exception as e: 

3062 raise LinstorVolumeManagerError( 

3063 'Cannot move {}: {}'.format(file, e) 

3064 ) 

3065 except Exception as e: 

3066 if not force: 

3067 try: 

3068 cls._move_files(dest_dir, src_dir, force=True) 

3069 except Exception: 

3070 pass 

3071 

3072 raise LinstorVolumeManagerError( 

3073 'Failed to move files from {} to {}: {}'.format( 

3074 src_dir, dest_dir, e 

3075 ) 

3076 ) 

3077 

3078 @staticmethod 

3079 def _create_database_backup_path(): 

3080 path = DATABASE_PATH + '-' + str(uuid.uuid4()) 

3081 try: 

3082 os.mkdir(path) 

3083 return path 

3084 except Exception as e: 

3085 raise LinstorVolumeManagerError( 

3086 'Failed to create backup path {} of LINSTOR config: {}' 

3087 .format(path, e) 

3088 ) 

3089 

3090 @staticmethod 

3091 def _get_filtered_properties(properties): 

3092 return dict(properties.items()) 

3093 

3094 @staticmethod 

3095 def _filter_errors(result): 

3096 return [ 

3097 err for err in result 

3098 if hasattr(err, 'is_error') and err.is_error() 

3099 ] 

3100 

3101 @staticmethod 

3102 def _check_errors(result, codes): 

3103 for err in result: 

3104 for code in codes: 

3105 if err.is_error(code): 

3106 return True 

3107 return False 

3108 

3109 @classmethod 

3110 def _controller_is_running(cls): 

3111 return cls._service_is_running('linstor-controller') 

3112 

3113 @classmethod 

3114 def _start_controller(cls, start=True): 

3115 return cls._start_service('linstor-controller', start) 

3116 

3117 @staticmethod 

3118 def _start_service(name, start=True): 

3119 action = 'start' if start else 'stop' 

3120 (ret, out, err) = util.doexec([ 

3121 'systemctl', action, name 

3122 ]) 

3123 if ret != 0: 

3124 raise LinstorVolumeManagerError( 

3125 'Failed to {} {}: {} {}' 

3126 .format(action, name, out, err) 

3127 ) 

3128 

3129 @staticmethod 

3130 def _service_is_running(name): 

3131 (ret, out, err) = util.doexec([ 

3132 'systemctl', 'is-active', '--quiet', name 

3133 ]) 

3134 return not ret 

3135 

3136 @staticmethod 

3137 def _is_mounted(mountpoint): 

3138 (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint]) 

3139 return ret == 0 

3140 

3141 @classmethod 

3142 def _mount_volume(cls, volume_path, mountpoint, mount=True): 

3143 if mount: 

3144 try: 

3145 util.pread(['mount', volume_path, mountpoint]) 

3146 except Exception as e: 

3147 raise LinstorVolumeManagerError( 

3148 'Failed to mount volume {} on {}: {}' 

3149 .format(volume_path, mountpoint, e) 

3150 ) 

3151 else: 

3152 try: 

3153 if cls._is_mounted(mountpoint): 

3154 util.pread(['umount', mountpoint]) 

3155 except Exception as e: 

3156 raise LinstorVolumeManagerError( 

3157 'Failed to umount volume {} on {}: {}' 

3158 .format(volume_path, mountpoint, e) 

3159 ) 

3160 

3161 

3162# ============================================================================== 

3163 

3164# Check if a path is a DRBD resource and log the process name/pid 

3165# that opened it. 

3166def log_drbd_openers(path): 

3167 # Ignore if it's not a symlink to DRBD resource. 

3168 if not path.startswith(DRBD_BY_RES_PATH): 

3169 return 

3170 

3171 # Compute resource name. 

3172 res_name_end = path.find('/', len(DRBD_BY_RES_PATH)) 

3173 if res_name_end == -1: 

3174 return 

3175 res_name = path[len(DRBD_BY_RES_PATH):res_name_end] 

3176 

3177 volume_end = path.rfind('/') 

3178 if volume_end == res_name_end: 

3179 return 

3180 volume = path[volume_end + 1:] 

3181 

3182 try: 

3183 # Ensure path is a DRBD. 

3184 drbd_path = os.path.realpath(path) 

3185 stats = os.stat(drbd_path) 

3186 if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147: 

3187 return 

3188 

3189 # Find where the device is open. 

3190 (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name]) 

3191 if ret != 0: 

3192 util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format( 

3193 res_name, stderr 

3194 )) 

3195 return 

3196 

3197 # Is it a local device? 

3198 if stdout.startswith('{} role:Primary'.format(res_name)): 

3199 util.SMlog( 

3200 'DRBD resource `{}` is open on local host: {}' 

3201 .format(path, get_local_volume_openers(res_name, volume)) 

3202 ) 

3203 return 

3204 

3205 # Is it a remote device? 

3206 util.SMlog( 

3207 'DRBD resource `{}` is open on hosts: {}' 

3208 .format(path, get_all_volume_openers(res_name, volume)) 

3209 ) 

3210 except Exception as e: 

3211 util.SMlog( 

3212 'Got exception while trying to determine where DRBD resource ' + 

3213 '`{}` is open: {}'.format(path, e) 

3214 )