Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python3 

2# 

3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr 

4# 

5# This program is free software: you can redistribute it and/or modify 

6# it under the terms of the GNU General Public License as published by 

7# the Free Software Foundation, either version 3 of the License, or 

8# (at your option) any later version. 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU General Public License for more details. 

13# 

14# You should have received a copy of the GNU General Public License 

15# along with this program. If not, see <https://www.gnu.org/licenses/>. 

16# 

17 

18from sm_typing import override 

19 

20import errno 

21import json 

22import linstor 

23import os.path 

24import re 

25import shutil 

26import socket 

27import stat 

28import time 

29import util 

30import uuid 

31 

32# Persistent prefix to add to RAW persistent volumes. 

33PERSISTENT_PREFIX = 'xcp-persistent-' 

34 

35# Contains the data of the "/var/lib/linstor" directory. 

36DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database' 

37DATABASE_SIZE = 1 << 30 # 1GB. 

38DATABASE_PATH = '/var/lib/linstor' 

39DATABASE_MKFS = 'mkfs.ext4' 

40 

41REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary") 

42REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$') 

43 

44DRBD_BY_RES_PATH = '/dev/drbd/by-res/' 

45 

46PLUGIN = 'linstor-manager' 

47 

48 

49# ============================================================================== 

50 

51def get_local_volume_openers(resource_name, volume): 

52 if not resource_name or volume is None: 

53 raise Exception('Cannot get DRBD openers without resource name and/or volume.') 

54 

55 path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format( 

56 resource_name, volume 

57 ) 

58 

59 with open(path, 'r') as openers: 

60 # Not a big cost, so read all lines directly. 

61 lines = openers.readlines() 

62 

63 result = {} 

64 

65 opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)') 

66 for line in lines: 

67 match = opener_re.match(line) 

68 assert match 

69 

70 groups = match.groups() 

71 process_name = groups[0] 

72 pid = groups[1] 

73 open_duration_ms = groups[2] 

74 result[pid] = { 

75 'process-name': process_name, 

76 'open-duration': open_duration_ms 

77 } 

78 

79 return json.dumps(result) 

80 

81def get_all_volume_openers(resource_name, volume): 

82 PLUGIN_CMD = 'getDrbdOpeners' 

83 

84 volume = str(volume) 

85 openers = {} 

86 

87 # Make sure this call never stucks because this function can be called 

88 # during HA init and in this case we can wait forever. 

89 session = util.timeout_call(10, util.get_localAPI_session) 

90 

91 hosts = session.xenapi.host.get_all_records() 

92 for host_ref, host_record in hosts.items(): 

93 node_name = host_record['hostname'] 

94 try: 

95 if not session.xenapi.host_metrics.get_record( 

96 host_record['metrics'] 

97 )['live']: 

98 # Ensure we call plugin on online hosts only. 

99 continue 

100 

101 openers[node_name] = json.loads( 

102 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, { 

103 'resourceName': resource_name, 

104 'volume': volume 

105 }) 

106 ) 

107 except Exception as e: 

108 util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format( 

109 resource_name, node_name, e 

110 )) 

111 

112 return openers 

113 

114 

115# ============================================================================== 

116 

117def round_up(value, divisor): 

118 assert divisor 

119 divisor = int(divisor) 

120 return ((int(value) + divisor - 1) // divisor) * divisor 

121 

122 

123def round_down(value, divisor): 

124 assert divisor 

125 value = int(value) 

126 return value - (value % int(divisor)) 

127 

128 

129# ============================================================================== 

130 

131def get_remote_host_ip(node_name): 

132 (ret, stdout, stderr) = util.doexec([ 

133 'drbdsetup', 'show', DATABASE_VOLUME_NAME, '--json' 

134 ]) 

135 if ret != 0: 

136 return 

137 

138 try: 

139 conf = json.loads(stdout) 

140 if not conf: 

141 return 

142 

143 for connection in conf[0]['connections']: 

144 if connection['net']['_name'] == node_name: 

145 value = connection['path']['_remote_host'] 

146 res = REG_DRBDSETUP_IP.match(value) 

147 if res: 

148 return res.groups()[0] 

149 break 

150 except Exception: 

151 pass 

152 

153 

154def _get_controller_uri(): 

155 PLUGIN_CMD = 'hasControllerRunning' 

156 

157 # Try to find controller using drbdadm. 

158 (ret, stdout, stderr) = util.doexec([ 

159 'drbdadm', 'status', DATABASE_VOLUME_NAME 

160 ]) 

161 if ret == 0: 

162 # If we are here, the database device exists locally. 

163 

164 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): 

165 # Nice case, we have the controller running on this local host. 

166 return 'linstor://localhost' 

167 

168 # Try to find the host using DRBD connections. 

169 res = REG_DRBDADM_PRIMARY.search(stdout) 

170 if res: 

171 node_name = res.groups()[0] 

172 ip = get_remote_host_ip(node_name) 

173 if ip: 

174 return 'linstor://' + ip 

175 

176 # Worst case: we use many hosts in the pool (>= 4), so we can't find the 

177 # primary using drbdadm because we don't have all connections to the 

178 # replicated volume. `drbdadm status xcp-persistent-database` returns 

179 # 3 connections by default. 

180 try: 

181 session = util.timeout_call(10, util.get_localAPI_session) 

182 

183 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

184 node_name = host_record['hostname'] 

185 try: 

186 if util.strtobool( 

187 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {}) 

188 ): 

189 return 'linstor://' + host_record['address'] 

190 except Exception as e: 

191 # Can throw and exception if a host is offline. So catch it. 

192 util.SMlog('Unable to search controller on `{}`: {}'.format( 

193 node_name, e 

194 )) 

195 except: 

196 # Not found, maybe we are trying to create the SR... 

197 pass 

198 

199def get_controller_uri(): 

200 retries = 0 

201 while True: 

202 uri = _get_controller_uri() 

203 if uri: 

204 return uri 

205 

206 retries += 1 

207 if retries >= 10: 

208 break 

209 time.sleep(1) 

210 

211 

212def get_controller_node_name(): 

213 PLUGIN_CMD = 'hasControllerRunning' 

214 

215 (ret, stdout, stderr) = util.doexec([ 

216 'drbdadm', 'status', DATABASE_VOLUME_NAME 

217 ]) 

218 

219 if ret == 0: 

220 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): 

221 return 'localhost' 

222 

223 res = REG_DRBDADM_PRIMARY.search(stdout) 

224 if res: 

225 return res.groups()[0] 

226 

227 session = util.timeout_call(5, util.get_localAPI_session) 

228 

229 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

230 node_name = host_record['hostname'] 

231 try: 

232 if not session.xenapi.host_metrics.get_record( 

233 host_record['metrics'] 

234 )['live']: 

235 continue 

236 

237 if util.strtobool(session.xenapi.host.call_plugin( 

238 host_ref, PLUGIN, PLUGIN_CMD, {} 

239 )): 

240 return node_name 

241 except Exception as e: 

242 util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format( 

243 node_name, e 

244 )) 

245 

246 

247def demote_drbd_resource(node_name, resource_name): 

248 PLUGIN_CMD = 'demoteDrbdResource' 

249 

250 session = util.timeout_call(5, util.get_localAPI_session) 

251 

252 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

253 if host_record['hostname'] != node_name: 

254 continue 

255 

256 try: 

257 session.xenapi.host.call_plugin( 

258 host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name} 

259 ) 

260 except Exception as e: 

261 util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format( 

262 resource_name, node_name, e 

263 )) 

264 raise Exception( 

265 'Can\'t demote resource `{}`, unable to find node `{}`' 

266 .format(resource_name, node_name) 

267 ) 

268 

269# ============================================================================== 

270 

271class LinstorVolumeManagerError(Exception): 

272 ERR_GENERIC = 0, 

273 ERR_VOLUME_EXISTS = 1, 

274 ERR_VOLUME_NOT_EXISTS = 2, 

275 ERR_VOLUME_DESTROY = 3, 

276 ERR_GROUP_NOT_EXISTS = 4 

277 

278 def __init__(self, message, code=ERR_GENERIC): 

279 super(LinstorVolumeManagerError, self).__init__(message) 

280 self._code = code 

281 

282 @property 

283 def code(self): 

284 return self._code 

285 

286 

287# ============================================================================== 

288 

289# Note: 

290# If a storage pool is not accessible after a network change: 

291# linstor node interface modify <NODE> default --ip <IP> 

292 

293 

294class LinstorVolumeManager(object): 

295 """ 

296 API to manager LINSTOR volumes in XCP-ng. 

297 A volume in this context is a physical part of the storage layer. 

298 """ 

299 

300 __slots__ = ( 

301 '_linstor', '_logger', '_redundancy', 

302 '_base_group_name', '_group_name', '_ha_group_name', 

303 '_volumes', '_storage_pools', '_storage_pools_time', 

304 '_kv_cache', '_resource_cache', '_volume_info_cache', 

305 '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty' 

306 ) 

307 

308 DEV_ROOT_PATH = DRBD_BY_RES_PATH 

309 

310 # Default sector size. 

311 BLOCK_SIZE = 512 

312 

313 # List of volume properties. 

314 PROP_METADATA = 'metadata' 

315 PROP_NOT_EXISTS = 'not-exists' 

316 PROP_VOLUME_NAME = 'volume-name' 

317 PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp' 

318 

319 # A volume can only be locked for a limited duration. 

320 # The goal is to give enough time to slaves to execute some actions on 

321 # a device before an UUID update or a coalesce for example. 

322 # Expiration is expressed in seconds. 

323 LOCKED_EXPIRATION_DELAY = 1 * 60 

324 

325 # Used when volume uuid is being updated. 

326 PROP_UPDATING_UUID_SRC = 'updating-uuid-src' 

327 

328 # States of property PROP_NOT_EXISTS. 

329 STATE_EXISTS = '0' 

330 STATE_NOT_EXISTS = '1' 

331 STATE_CREATING = '2' 

332 

333 # Property namespaces. 

334 NAMESPACE_SR = 'xcp/sr' 

335 NAMESPACE_VOLUME = 'xcp/volume' 

336 

337 # Regex to match properties. 

338 REG_PROP = '^([^/]+)/{}$' 

339 

340 REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA)) 

341 REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS)) 

342 REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME)) 

343 REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC)) 

344 

345 # Prefixes of SR/VOLUME in the LINSTOR DB. 

346 # A LINSTOR (resource, group, ...) name cannot start with a number. 

347 # So we add a prefix behind our SR/VOLUME uuids. 

348 PREFIX_SR = 'xcp-sr-' 

349 PREFIX_HA = 'xcp-ha-' 

350 PREFIX_VOLUME = 'xcp-volume-' 

351 

352 # Limit request number when storage pool info is asked, we fetch 

353 # the current pool status after N elapsed seconds. 

354 STORAGE_POOLS_FETCH_INTERVAL = 15 

355 

356 @staticmethod 

357 def default_logger(*args): 

358 print(args) 

359 

360 # -------------------------------------------------------------------------- 

361 # API. 

362 # -------------------------------------------------------------------------- 

363 

364 class VolumeInfo(object): 

365 __slots__ = ( 

366 'name', 

367 'allocated_size', # Allocated size, place count is not used. 

368 'virtual_size', # Total virtual available size of this volume 

369 # (i.e. the user size at creation). 

370 'diskful' # Array of nodes that have a diskful volume. 

371 ) 

372 

373 def __init__(self, name): 

374 self.name = name 

375 self.allocated_size = 0 

376 self.virtual_size = 0 

377 self.diskful = [] 

378 

379 @override 

380 def __repr__(self) -> str: 

381 return 'VolumeInfo("{}", {}, {}, {})'.format( 

382 self.name, self.allocated_size, self.virtual_size, 

383 self.diskful 

384 ) 

385 

386 # -------------------------------------------------------------------------- 

387 

388 def __init__( 

389 self, uri, group_name, repair=False, logger=default_logger.__func__, 

390 attempt_count=30 

391 ): 

392 """ 

393 Create a new LinstorVolumeManager object. 

394 :param str uri: URI to communicate with the LINSTOR controller. 

395 :param str group_name: The SR goup name to use. 

396 :param bool repair: If true we try to remove bad volumes due to a crash 

397 or unexpected behavior. 

398 :param function logger: Function to log messages. 

399 :param int attempt_count: Number of attempts to join the controller. 

400 """ 

401 

402 self._linstor = self._create_linstor_instance( 

403 uri, attempt_count=attempt_count 

404 ) 

405 self._base_group_name = group_name 

406 

407 # Ensure group exists. 

408 group_name = self._build_group_name(group_name) 

409 groups = self._linstor.resource_group_list_raise([group_name]).resource_groups 

410 if not groups: 

411 raise LinstorVolumeManagerError( 

412 'Unable to find `{}` Linstor SR'.format(group_name) 

413 ) 

414 

415 # Ok. ;) 

416 self._logger = logger 

417 self._redundancy = groups[0].select_filter.place_count 

418 self._group_name = group_name 

419 self._ha_group_name = self._build_ha_group_name(self._base_group_name) 

420 self._volumes = set() 

421 self._storage_pools_time = 0 

422 

423 # To increate performance and limit request count to LINSTOR services, 

424 # we use caches. 

425 self._kv_cache = self._create_kv_cache() 

426 self._resource_cache = None 

427 self._resource_cache_dirty = True 

428 self._volume_info_cache = None 

429 self._volume_info_cache_dirty = True 

430 self._build_volumes(repair=repair) 

431 

432 @property 

433 def group_name(self): 

434 """ 

435 Give the used group name. 

436 :return: The group name. 

437 :rtype: str 

438 """ 

439 return self._base_group_name 

440 

441 @property 

442 def redundancy(self): 

443 """ 

444 Give the used redundancy. 

445 :return: The redundancy. 

446 :rtype: int 

447 """ 

448 return self._redundancy 

449 

450 @property 

451 def volumes(self): 

452 """ 

453 Give the volumes uuid set. 

454 :return: The volumes uuid set. 

455 :rtype: set(str) 

456 """ 

457 return self._volumes 

458 

459 @property 

460 def max_volume_size_allowed(self): 

461 """ 

462 Give the max volume size currently available in B. 

463 :return: The current size. 

464 :rtype: int 

465 """ 

466 

467 candidates = self._find_best_size_candidates() 

468 if not candidates: 

469 raise LinstorVolumeManagerError( 

470 'Failed to get max volume size allowed' 

471 ) 

472 

473 size = candidates[0].max_volume_size 

474 if size < 0: 

475 raise LinstorVolumeManagerError( 

476 'Invalid max volume size allowed given: {}'.format(size) 

477 ) 

478 return self.round_down_volume_size(size * 1024) 

479 

480 @property 

481 def physical_size(self): 

482 """ 

483 Give the total physical size of the SR. 

484 :return: The physical size. 

485 :rtype: int 

486 """ 

487 return self._compute_size('total_capacity') 

488 

489 @property 

490 def physical_free_size(self): 

491 """ 

492 Give the total free physical size of the SR. 

493 :return: The physical free size. 

494 :rtype: int 

495 """ 

496 return self._compute_size('free_capacity') 

497 

498 @property 

499 def allocated_volume_size(self): 

500 """ 

501 Give the allocated size for all volumes. The place count is not 

502 used here. When thick lvm is used, the size for one volume should 

503 be equal to the virtual volume size. With thin lvm, the size is equal 

504 or lower to the volume size. 

505 :return: The allocated size of all volumes. 

506 :rtype: int 

507 """ 

508 

509 # Paths: /res_name/vol_number/size 

510 sizes = {} 

511 

512 for resource in self._get_resource_cache().resources: 

513 if resource.name not in sizes: 

514 current = sizes[resource.name] = {} 

515 else: 

516 current = sizes[resource.name] 

517 

518 for volume in resource.volumes: 

519 # We ignore diskless pools of the form "DfltDisklessStorPool". 

520 if volume.storage_pool_name != self._group_name: 

521 continue 

522 

523 allocated_size = max(volume.allocated_size, 0) 

524 current_allocated_size = current.get(volume.number) or -1 

525 if allocated_size > current_allocated_size: 

526 current[volume.number] = allocated_size 

527 

528 total_size = 0 

529 for volumes in sizes.values(): 

530 for size in volumes.values(): 

531 total_size += size 

532 

533 return total_size * 1024 

534 

535 def get_min_physical_size(self): 

536 """ 

537 Give the minimum physical size of the SR. 

538 I.e. the size of the smallest disk + the number of pools. 

539 :return: The physical min size. 

540 :rtype: tuple(int, int) 

541 """ 

542 size = None 

543 pool_count = 0 

544 for pool in self._get_storage_pools(force=True): 

545 space = pool.free_space 

546 if space: 

547 pool_count += 1 

548 current_size = space.total_capacity 

549 if current_size < 0: 

550 raise LinstorVolumeManagerError( 

551 'Failed to get pool total_capacity attr of `{}`' 

552 .format(pool.node_name) 

553 ) 

554 if size is None or current_size < size: 

555 size = current_size 

556 return (pool_count, (size or 0) * 1024) 

557 

558 @property 

559 def metadata(self): 

560 """ 

561 Get the metadata of the SR. 

562 :return: Dictionary that contains metadata. 

563 :rtype: dict(str, dict) 

564 """ 

565 

566 sr_properties = self._get_sr_properties() 

567 metadata = sr_properties.get(self.PROP_METADATA) 

568 if metadata is not None: 

569 metadata = json.loads(metadata) 

570 if isinstance(metadata, dict): 

571 return metadata 

572 raise LinstorVolumeManagerError( 

573 'Expected dictionary in SR metadata: {}'.format( 

574 self._group_name 

575 ) 

576 ) 

577 

578 return {} 

579 

580 @metadata.setter 

581 def metadata(self, metadata): 

582 """ 

583 Set the metadata of the SR. 

584 :param dict metadata: Dictionary that contains metadata. 

585 """ 

586 

587 assert isinstance(metadata, dict) 

588 sr_properties = self._get_sr_properties() 

589 sr_properties[self.PROP_METADATA] = json.dumps(metadata) 

590 

591 @property 

592 def disconnected_hosts(self): 

593 """ 

594 Get the list of disconnected hosts. 

595 :return: Set that contains disconnected hosts. 

596 :rtype: set(str) 

597 """ 

598 

599 disconnected_hosts = set() 

600 for pool in self._get_storage_pools(): 

601 for report in pool.reports: 

602 if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \ 

603 linstor.consts.WARN_NOT_CONNECTED: 

604 disconnected_hosts.add(pool.node_name) 

605 break 

606 return disconnected_hosts 

607 

608 def check_volume_exists(self, volume_uuid): 

609 """ 

610 Check if a volume exists in the SR. 

611 :return: True if volume exists. 

612 :rtype: bool 

613 """ 

614 return volume_uuid in self._volumes 

615 

616 def create_volume( 

617 self, 

618 volume_uuid, 

619 size, 

620 persistent=True, 

621 volume_name=None, 

622 high_availability=False 

623 ): 

624 """ 

625 Create a new volume on the SR. 

626 :param str volume_uuid: The volume uuid to use. 

627 :param int size: volume size in B. 

628 :param bool persistent: If false the volume will be unavailable 

629 on the next constructor call LinstorSR(...). 

630 :param str volume_name: If set, this name is used in the LINSTOR 

631 database instead of a generated name. 

632 :param bool high_availability: If set, the volume is created in 

633 the HA group. 

634 :return: The current device path of the volume. 

635 :rtype: str 

636 """ 

637 

638 self._logger('Creating LINSTOR volume {}...'.format(volume_uuid)) 

639 if not volume_name: 

640 volume_name = self.build_volume_name(util.gen_uuid()) 

641 volume_properties = self._create_volume_with_properties( 

642 volume_uuid, 

643 volume_name, 

644 size, 

645 True, # place_resources 

646 high_availability 

647 ) 

648 

649 # Volume created! Now try to find the device path. 

650 try: 

651 self._logger( 

652 'Find device path of LINSTOR volume {}...'.format(volume_uuid) 

653 ) 

654 device_path = self._find_device_path(volume_uuid, volume_name) 

655 if persistent: 

656 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

657 self._volumes.add(volume_uuid) 

658 self._logger( 

659 'LINSTOR volume {} created!'.format(volume_uuid) 

660 ) 

661 return device_path 

662 except Exception: 

663 # There is an issue to find the path. 

664 # At this point the volume has just been created, so force flag can be used. 

665 self._destroy_volume(volume_uuid, force=True) 

666 raise 

667 

668 def mark_volume_as_persistent(self, volume_uuid): 

669 """ 

670 Mark volume as persistent if created with persistent=False. 

671 :param str volume_uuid: The volume uuid to mark. 

672 """ 

673 

674 self._ensure_volume_exists(volume_uuid) 

675 

676 # Mark volume as persistent. 

677 volume_properties = self._get_volume_properties(volume_uuid) 

678 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

679 

680 def destroy_volume(self, volume_uuid): 

681 """ 

682 Destroy a volume. 

683 :param str volume_uuid: The volume uuid to destroy. 

684 """ 

685 

686 self._ensure_volume_exists(volume_uuid) 

687 self.ensure_volume_is_not_locked(volume_uuid) 

688 

689 # Mark volume as destroyed. 

690 volume_properties = self._get_volume_properties(volume_uuid) 

691 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS 

692 

693 try: 

694 self._volumes.remove(volume_uuid) 

695 self._destroy_volume(volume_uuid) 

696 except Exception as e: 

697 raise LinstorVolumeManagerError( 

698 str(e), 

699 LinstorVolumeManagerError.ERR_VOLUME_DESTROY 

700 ) 

701 

702 def lock_volume(self, volume_uuid, locked=True): 

703 """ 

704 Prevent modifications of the volume properties during 

705 "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked 

706 when used. This method is useful to attach/detach correctly a volume on 

707 a slave. Without it the GC can rename a volume, in this case the old 

708 volume path can be used by a slave... 

709 :param str volume_uuid: The volume uuid to protect/unprotect. 

710 :param bool locked: Lock/unlock the volume. 

711 """ 

712 

713 self._ensure_volume_exists(volume_uuid) 

714 

715 self._logger( 

716 '{} volume {} as locked'.format( 

717 'Mark' if locked else 'Unmark', 

718 volume_uuid 

719 ) 

720 ) 

721 

722 volume_properties = self._get_volume_properties(volume_uuid) 

723 if locked: 

724 volume_properties[ 

725 self.PROP_IS_READONLY_TIMESTAMP 

726 ] = str(time.time()) 

727 elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties: 

728 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) 

729 

730 def ensure_volume_is_not_locked(self, volume_uuid, timeout=None): 

731 """ 

732 Ensure a volume is not locked. Wait if necessary. 

733 :param str volume_uuid: The volume uuid to check. 

734 :param int timeout: If the volume is always locked after the expiration 

735 of the timeout, an exception is thrown. 

736 """ 

737 return self.ensure_volume_list_is_not_locked([volume_uuid], timeout) 

738 

739 def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None): 

740 checked = set() 

741 for volume_uuid in volume_uuids: 

742 if volume_uuid in self._volumes: 

743 checked.add(volume_uuid) 

744 

745 if not checked: 

746 return 

747 

748 waiting = False 

749 

750 volume_properties = self._get_kv_cache() 

751 

752 start = time.time() 

753 while True: 

754 # Can't delete in for loop, use a copy of the list. 

755 remaining = checked.copy() 

756 for volume_uuid in checked: 

757 volume_properties.namespace = \ 

758 self._build_volume_namespace(volume_uuid) 

759 timestamp = volume_properties.get( 

760 self.PROP_IS_READONLY_TIMESTAMP 

761 ) 

762 if timestamp is None: 

763 remaining.remove(volume_uuid) 

764 continue 

765 

766 now = time.time() 

767 if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY: 

768 self._logger( 

769 'Remove readonly timestamp on {}'.format(volume_uuid) 

770 ) 

771 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) 

772 remaining.remove(volume_uuid) 

773 continue 

774 

775 if not waiting: 

776 self._logger( 

777 'Volume {} is locked, waiting...'.format(volume_uuid) 

778 ) 

779 waiting = True 

780 break 

781 

782 if not remaining: 

783 break 

784 checked = remaining 

785 

786 if timeout is not None and now - start > timeout: 

787 raise LinstorVolumeManagerError( 

788 'volume `{}` is locked and timeout has been reached' 

789 .format(volume_uuid), 

790 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS 

791 ) 

792 

793 # We must wait to use the volume. After that we can modify it 

794 # ONLY if the SR is locked to avoid bad reads on the slaves. 

795 time.sleep(1) 

796 volume_properties = self._create_kv_cache() 

797 

798 if waiting: 

799 self._logger('No volume locked now!') 

800 

801 def remove_volume_if_diskless(self, volume_uuid): 

802 """ 

803 Remove disless path from local node. 

804 :param str volume_uuid: The volume uuid to remove. 

805 """ 

806 

807 self._ensure_volume_exists(volume_uuid) 

808 

809 volume_properties = self._get_volume_properties(volume_uuid) 

810 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

811 

812 node_name = socket.gethostname() 

813 

814 for resource in self._get_resource_cache().resources: 

815 if resource.name == volume_name and resource.node_name == node_name: 

816 if linstor.consts.FLAG_TIE_BREAKER in resource.flags: 

817 return 

818 break 

819 

820 result = self._linstor.resource_delete_if_diskless( 

821 node_name=node_name, rsc_name=volume_name 

822 ) 

823 if not linstor.Linstor.all_api_responses_no_error(result): 

824 raise LinstorVolumeManagerError( 

825 'Unable to delete diskless path of `{}` on node `{}`: {}' 

826 .format(volume_name, node_name, ', '.join( 

827 [str(x) for x in result])) 

828 ) 

829 

830 def introduce_volume(self, volume_uuid): 

831 pass # TODO: Implement me. 

832 

833 def resize_volume(self, volume_uuid, new_size): 

834 """ 

835 Resize a volume. 

836 :param str volume_uuid: The volume uuid to resize. 

837 :param int new_size: New size in B. 

838 """ 

839 

840 volume_name = self.get_volume_name(volume_uuid) 

841 self.ensure_volume_is_not_locked(volume_uuid) 

842 new_size = self.round_up_volume_size(new_size) // 1024 

843 

844 retry_count = 30 

845 while True: 

846 result = self._linstor.volume_dfn_modify( 

847 rsc_name=volume_name, 

848 volume_nr=0, 

849 size=new_size 

850 ) 

851 

852 self._mark_resource_cache_as_dirty() 

853 

854 error_str = self._get_error_str(result) 

855 if not error_str: 

856 break 

857 

858 # After volume creation, DRBD volume can be unusable during many seconds. 

859 # So we must retry the definition change if the device is not up to date. 

860 # Often the case for thick provisioning. 

861 if retry_count and error_str.find('non-UpToDate DRBD device') >= 0: 

862 time.sleep(2) 

863 retry_count -= 1 

864 continue 

865 

866 raise LinstorVolumeManagerError( 

867 'Could not resize volume `{}` from SR `{}`: {}' 

868 .format(volume_uuid, self._group_name, error_str) 

869 ) 

870 

871 def get_volume_name(self, volume_uuid): 

872 """ 

873 Get the name of a particular volume. 

874 :param str volume_uuid: The volume uuid of the name to get. 

875 :return: The volume name. 

876 :rtype: str 

877 """ 

878 

879 self._ensure_volume_exists(volume_uuid) 

880 volume_properties = self._get_volume_properties(volume_uuid) 

881 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

882 if volume_name: 

883 return volume_name 

884 raise LinstorVolumeManagerError( 

885 'Failed to get volume name of {}'.format(volume_uuid) 

886 ) 

887 

888 def get_volume_size(self, volume_uuid): 

889 """ 

890 Get the size of a particular volume. 

891 :param str volume_uuid: The volume uuid of the size to get. 

892 :return: The volume size. 

893 :rtype: int 

894 """ 

895 

896 volume_name = self.get_volume_name(volume_uuid) 

897 dfns = self._linstor.resource_dfn_list_raise( 

898 query_volume_definitions=True, 

899 filter_by_resource_definitions=[volume_name] 

900 ).resource_definitions 

901 

902 size = dfns[0].volume_definitions[0].size 

903 if size < 0: 

904 raise LinstorVolumeManagerError( 

905 'Failed to get volume size of: {}'.format(volume_uuid) 

906 ) 

907 return size * 1024 

908 

909 def set_auto_promote_timeout(self, volume_uuid, timeout): 

910 """ 

911 Define the blocking time of open calls when a DRBD 

912 is already open on another host. 

913 :param str volume_uuid: The volume uuid to modify. 

914 """ 

915 

916 volume_name = self.get_volume_name(volume_uuid) 

917 result = self._linstor.resource_dfn_modify(volume_name, { 

918 'DrbdOptions/Resource/auto-promote-timeout': timeout 

919 }) 

920 error_str = self._get_error_str(result) 

921 if error_str: 

922 raise LinstorVolumeManagerError( 

923 'Could not change the auto promote timeout of `{}`: {}' 

924 .format(volume_uuid, error_str) 

925 ) 

926 

927 def set_drbd_ha_properties(self, volume_name, enabled=True): 

928 """ 

929 Set or not HA DRBD properties required by drbd-reactor and 

930 by specific volumes. 

931 :param str volume_name: The volume to modify. 

932 :param bool enabled: Enable or disable HA properties. 

933 """ 

934 

935 properties = { 

936 'DrbdOptions/auto-quorum': 'disabled', 

937 'DrbdOptions/Resource/auto-promote': 'no', 

938 'DrbdOptions/Resource/on-no-data-accessible': 'io-error', 

939 'DrbdOptions/Resource/on-no-quorum': 'io-error', 

940 'DrbdOptions/Resource/on-suspended-primary-outdated': 'force-secondary', 

941 'DrbdOptions/Resource/quorum': 'majority' 

942 } 

943 if enabled: 

944 result = self._linstor.resource_dfn_modify(volume_name, properties) 

945 else: 

946 result = self._linstor.resource_dfn_modify(volume_name, {}, delete_props=list(properties.keys())) 

947 

948 error_str = self._get_error_str(result) 

949 if error_str: 

950 raise LinstorVolumeManagerError( 

951 'Could not modify HA DRBD properties on volume `{}`: {}' 

952 .format(volume_name, error_str) 

953 ) 

954 

955 def get_volume_info(self, volume_uuid): 

956 """ 

957 Get the volume info of a particular volume. 

958 :param str volume_uuid: The volume uuid of the volume info to get. 

959 :return: The volume info. 

960 :rtype: VolumeInfo 

961 """ 

962 

963 volume_name = self.get_volume_name(volume_uuid) 

964 return self._get_volumes_info()[volume_name] 

965 

966 def get_device_path(self, volume_uuid): 

967 """ 

968 Get the dev path of a volume, create a diskless if necessary. 

969 :param str volume_uuid: The volume uuid to get the dev path. 

970 :return: The current device path of the volume. 

971 :rtype: str 

972 """ 

973 

974 volume_name = self.get_volume_name(volume_uuid) 

975 return self._find_device_path(volume_uuid, volume_name) 

976 

977 def get_volume_uuid_from_device_path(self, device_path): 

978 """ 

979 Get the volume uuid of a device_path. 

980 :param str device_path: The dev path to find the volume uuid. 

981 :return: The volume uuid of the local device path. 

982 :rtype: str 

983 """ 

984 

985 expected_volume_name = \ 

986 self.get_volume_name_from_device_path(device_path) 

987 

988 volume_names = self.get_volumes_with_name() 

989 for volume_uuid, volume_name in volume_names.items(): 

990 if volume_name == expected_volume_name: 

991 return volume_uuid 

992 

993 raise LinstorVolumeManagerError( 

994 'Unable to find volume uuid from dev path `{}`'.format(device_path) 

995 ) 

996 

997 def get_volume_name_from_device_path(self, device_path): 

998 """ 

999 Get the volume name of a device_path. 

1000 :param str device_path: The dev path to find the volume name. 

1001 :return: The volume name of the device path. 

1002 :rtype: str 

1003 """ 

1004 

1005 # Assume that we have a path like this: 

1006 # - "/dev/drbd/by-res/xcp-volume-<UUID>/0" 

1007 # - "../xcp-volume-<UUID>/0" 

1008 if device_path.startswith(DRBD_BY_RES_PATH): 

1009 prefix_len = len(DRBD_BY_RES_PATH) 

1010 elif device_path.startswith('../'): 

1011 prefix_len = 3 

1012 else: 

1013 raise LinstorVolumeManagerError('Unexpected device path: `{}`'.format(device_path)) 

1014 

1015 res_name_end = device_path.find('/', prefix_len) 

1016 assert res_name_end != -1 

1017 return device_path[prefix_len:res_name_end] 

1018 

1019 def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False): 

1020 """ 

1021 Change the uuid of a volume. 

1022 :param str volume_uuid: The volume to modify. 

1023 :param str new_volume_uuid: The new volume uuid to use. 

1024 :param bool force: If true we doesn't check if volume_uuid is in the 

1025 volume list. I.e. the volume can be marked as deleted but the volume 

1026 can still be in the LINSTOR KV store if the deletion has failed. 

1027 In specific cases like "undo" after a failed clone we must rename a bad 

1028 deleted VDI. 

1029 """ 

1030 

1031 self._logger( 

1032 'Trying to update volume UUID {} to {}...' 

1033 .format(volume_uuid, new_volume_uuid) 

1034 ) 

1035 assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value' 

1036 

1037 if not force: 

1038 self._ensure_volume_exists(volume_uuid) 

1039 self.ensure_volume_is_not_locked(volume_uuid) 

1040 

1041 if new_volume_uuid in self._volumes: 

1042 raise LinstorVolumeManagerError( 

1043 'Volume `{}` already exists'.format(new_volume_uuid), 

1044 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

1045 ) 

1046 

1047 volume_properties = self._get_volume_properties(volume_uuid) 

1048 if volume_properties.get(self.PROP_UPDATING_UUID_SRC): 

1049 raise LinstorVolumeManagerError( 

1050 'Cannot update volume uuid {}: invalid state' 

1051 .format(volume_uuid) 

1052 ) 

1053 

1054 # 1. Copy in temp variables metadata and volume_name. 

1055 metadata = volume_properties.get(self.PROP_METADATA) 

1056 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

1057 

1058 # 2. Switch to new volume namespace. 

1059 volume_properties.namespace = self._build_volume_namespace( 

1060 new_volume_uuid 

1061 ) 

1062 

1063 if list(volume_properties.items()): 

1064 raise LinstorVolumeManagerError( 

1065 'Cannot update volume uuid {} to {}: ' 

1066 .format(volume_uuid, new_volume_uuid) + 

1067 'this last one is not empty' 

1068 ) 

1069 

1070 try: 

1071 # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC. 

1072 # If we crash after that, the new properties can be removed 

1073 # properly. 

1074 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS 

1075 volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid 

1076 

1077 # 4. Copy the properties. 

1078 # Note: On new volumes, during clone for example, the metadata 

1079 # may be missing. So we must test it to avoid this error: 

1080 # "None has to be a str/unicode, but is <type 'NoneType'>" 

1081 if metadata: 

1082 volume_properties[self.PROP_METADATA] = metadata 

1083 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

1084 

1085 # 5. Ok! 

1086 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

1087 except Exception as err: 

1088 try: 

1089 # Clear the new volume properties in case of failure. 

1090 assert volume_properties.namespace == \ 

1091 self._build_volume_namespace(new_volume_uuid) 

1092 volume_properties.clear() 

1093 except Exception as e: 

1094 self._logger( 

1095 'Failed to clear new volume properties: {} (ignoring...)' 

1096 .format(e) 

1097 ) 

1098 raise LinstorVolumeManagerError( 

1099 'Failed to copy volume properties: {}'.format(err) 

1100 ) 

1101 

1102 try: 

1103 # 6. After this point, it's ok we can remove the 

1104 # PROP_UPDATING_UUID_SRC property and clear the src properties 

1105 # without problems. 

1106 

1107 # 7. Switch to old volume namespace. 

1108 volume_properties.namespace = self._build_volume_namespace( 

1109 volume_uuid 

1110 ) 

1111 volume_properties.clear() 

1112 

1113 # 8. Switch a last time to new volume namespace. 

1114 volume_properties.namespace = self._build_volume_namespace( 

1115 new_volume_uuid 

1116 ) 

1117 volume_properties.pop(self.PROP_UPDATING_UUID_SRC) 

1118 except Exception as e: 

1119 raise LinstorVolumeManagerError( 

1120 'Failed to clear volume properties ' 

1121 'after volume uuid update: {}'.format(e) 

1122 ) 

1123 

1124 self._volumes.remove(volume_uuid) 

1125 self._volumes.add(new_volume_uuid) 

1126 

1127 self._logger( 

1128 'UUID update succeeded of {} to {}! (properties={})' 

1129 .format( 

1130 volume_uuid, new_volume_uuid, 

1131 self._get_filtered_properties(volume_properties) 

1132 ) 

1133 ) 

1134 

1135 def update_volume_name(self, volume_uuid, volume_name): 

1136 """ 

1137 Change the volume name of a volume. 

1138 :param str volume_uuid: The volume to modify. 

1139 :param str volume_name: The volume_name to use. 

1140 """ 

1141 

1142 self._ensure_volume_exists(volume_uuid) 

1143 self.ensure_volume_is_not_locked(volume_uuid) 

1144 if not volume_name.startswith(self.PREFIX_VOLUME): 

1145 raise LinstorVolumeManagerError( 

1146 'Volume name `{}` must be start with `{}`' 

1147 .format(volume_name, self.PREFIX_VOLUME) 

1148 ) 

1149 

1150 if volume_name not in self._fetch_resource_names(): 

1151 raise LinstorVolumeManagerError( 

1152 'Volume `{}` doesn\'t exist'.format(volume_name) 

1153 ) 

1154 

1155 volume_properties = self._get_volume_properties(volume_uuid) 

1156 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

1157 

1158 def get_usage_states(self, volume_uuid): 

1159 """ 

1160 Check if a volume is currently used. 

1161 :param str volume_uuid: The volume uuid to check. 

1162 :return: A dictionnary that contains states. 

1163 :rtype: dict(str, bool or None) 

1164 """ 

1165 

1166 states = {} 

1167 

1168 volume_name = self.get_volume_name(volume_uuid) 

1169 for resource_state in self._linstor.resource_list_raise( 

1170 filter_by_resources=[volume_name] 

1171 ).resource_states: 

1172 states[resource_state.node_name] = resource_state.in_use 

1173 

1174 return states 

1175 

1176 def get_volume_openers(self, volume_uuid): 

1177 """ 

1178 Get openers of a volume. 

1179 :param str volume_uuid: The volume uuid to monitor. 

1180 :return: A dictionnary that contains openers. 

1181 :rtype: dict(str, obj) 

1182 """ 

1183 return get_all_volume_openers(self.get_volume_name(volume_uuid), '0') 

1184 

1185 def get_volumes_with_name(self): 

1186 """ 

1187 Give a volume dictionnary that contains names actually owned. 

1188 :return: A volume/name dict. 

1189 :rtype: dict(str, str) 

1190 """ 

1191 return self._get_volumes_by_property(self.REG_VOLUME_NAME) 

1192 

1193 def get_volumes_with_info(self): 

1194 """ 

1195 Give a volume dictionnary that contains VolumeInfos. 

1196 :return: A volume/VolumeInfo dict. 

1197 :rtype: dict(str, VolumeInfo) 

1198 """ 

1199 

1200 volumes = {} 

1201 

1202 all_volume_info = self._get_volumes_info() 

1203 volume_names = self.get_volumes_with_name() 

1204 for volume_uuid, volume_name in volume_names.items(): 

1205 if volume_name: 

1206 volume_info = all_volume_info.get(volume_name) 

1207 if volume_info: 

1208 volumes[volume_uuid] = volume_info 

1209 continue 

1210 

1211 # Well I suppose if this volume is not available, 

1212 # LINSTOR has been used directly without using this API. 

1213 volumes[volume_uuid] = self.VolumeInfo('') 

1214 

1215 return volumes 

1216 

1217 def get_volumes_with_metadata(self): 

1218 """ 

1219 Give a volume dictionnary that contains metadata. 

1220 :return: A volume/metadata dict. 

1221 :rtype: dict(str, dict) 

1222 """ 

1223 

1224 volumes = {} 

1225 

1226 metadata = self._get_volumes_by_property(self.REG_METADATA) 

1227 for volume_uuid, volume_metadata in metadata.items(): 

1228 if volume_metadata: 

1229 volume_metadata = json.loads(volume_metadata) 

1230 if isinstance(volume_metadata, dict): 

1231 volumes[volume_uuid] = volume_metadata 

1232 continue 

1233 raise LinstorVolumeManagerError( 

1234 'Expected dictionary in volume metadata: {}' 

1235 .format(volume_uuid) 

1236 ) 

1237 

1238 volumes[volume_uuid] = {} 

1239 

1240 return volumes 

1241 

1242 def get_volume_metadata(self, volume_uuid): 

1243 """ 

1244 Get the metadata of a volume. 

1245 :return: Dictionary that contains metadata. 

1246 :rtype: dict 

1247 """ 

1248 

1249 self._ensure_volume_exists(volume_uuid) 

1250 volume_properties = self._get_volume_properties(volume_uuid) 

1251 metadata = volume_properties.get(self.PROP_METADATA) 

1252 if metadata: 

1253 metadata = json.loads(metadata) 

1254 if isinstance(metadata, dict): 

1255 return metadata 

1256 raise LinstorVolumeManagerError( 

1257 'Expected dictionary in volume metadata: {}' 

1258 .format(volume_uuid) 

1259 ) 

1260 return {} 

1261 

1262 def set_volume_metadata(self, volume_uuid, metadata): 

1263 """ 

1264 Set the metadata of a volume. 

1265 :param dict metadata: Dictionary that contains metadata. 

1266 """ 

1267 

1268 self._ensure_volume_exists(volume_uuid) 

1269 self.ensure_volume_is_not_locked(volume_uuid) 

1270 

1271 assert isinstance(metadata, dict) 

1272 volume_properties = self._get_volume_properties(volume_uuid) 

1273 volume_properties[self.PROP_METADATA] = json.dumps(metadata) 

1274 

1275 def update_volume_metadata(self, volume_uuid, metadata): 

1276 """ 

1277 Update the metadata of a volume. It modify only the given keys. 

1278 It doesn't remove unreferenced key instead of set_volume_metadata. 

1279 :param dict metadata: Dictionary that contains metadata. 

1280 """ 

1281 

1282 self._ensure_volume_exists(volume_uuid) 

1283 self.ensure_volume_is_not_locked(volume_uuid) 

1284 

1285 assert isinstance(metadata, dict) 

1286 volume_properties = self._get_volume_properties(volume_uuid) 

1287 

1288 current_metadata = json.loads( 

1289 volume_properties.get(self.PROP_METADATA, '{}') 

1290 ) 

1291 if not isinstance(metadata, dict): 

1292 raise LinstorVolumeManagerError( 

1293 'Expected dictionary in volume metadata: {}' 

1294 .format(volume_uuid) 

1295 ) 

1296 

1297 for key, value in metadata.items(): 

1298 current_metadata[key] = value 

1299 volume_properties[self.PROP_METADATA] = json.dumps(current_metadata) 

1300 

1301 def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True): 

1302 """ 

1303 Clone a volume. Do not copy the data, this method creates a new volume 

1304 with the same size. 

1305 :param str volume_uuid: The volume to clone. 

1306 :param str clone_uuid: The cloned volume. 

1307 :param bool persistent: If false the volume will be unavailable 

1308 on the next constructor call LinstorSR(...). 

1309 :return: The current device path of the cloned volume. 

1310 :rtype: str 

1311 """ 

1312 

1313 volume_name = self.get_volume_name(volume_uuid) 

1314 self.ensure_volume_is_not_locked(volume_uuid) 

1315 

1316 # 1. Find ideal nodes + size to use. 

1317 ideal_node_names, size = self._get_volume_node_names_and_size( 

1318 volume_name 

1319 ) 

1320 if size <= 0: 

1321 raise LinstorVolumeManagerError( 

1322 'Invalid size of {} for volume `{}`'.format(size, volume_name) 

1323 ) 

1324 

1325 # 2. Create clone! 

1326 return self.create_volume(clone_uuid, size, persistent) 

1327 

1328 def remove_resourceless_volumes(self): 

1329 """ 

1330 Remove all volumes without valid or non-empty name 

1331 (i.e. without LINSTOR resource). It's different than 

1332 LinstorVolumeManager constructor that takes a `repair` param that 

1333 removes volumes with `PROP_NOT_EXISTS` to 1. 

1334 """ 

1335 

1336 resource_names = self._fetch_resource_names() 

1337 for volume_uuid, volume_name in self.get_volumes_with_name().items(): 

1338 if not volume_name or volume_name not in resource_names: 

1339 # Don't force, we can be sure of what's happening. 

1340 self.destroy_volume(volume_uuid) 

1341 

1342 def destroy(self): 

1343 """ 

1344 Destroy this SR. Object should not be used after that. 

1345 :param bool force: Try to destroy volumes before if true. 

1346 """ 

1347 

1348 # 1. Ensure volume list is empty. No cost. 

1349 if self._volumes: 

1350 raise LinstorVolumeManagerError( 

1351 'Cannot destroy LINSTOR volume manager: ' 

1352 'It exists remaining volumes' 

1353 ) 

1354 

1355 # 2. Fetch ALL resource names. 

1356 # This list may therefore contain volumes created outside 

1357 # the scope of the driver. 

1358 resource_names = self._fetch_resource_names(ignore_deleted=False) 

1359 try: 

1360 resource_names.remove(DATABASE_VOLUME_NAME) 

1361 except KeyError: 

1362 # Really strange to reach that point. 

1363 # Normally we always have the database volume in the list. 

1364 pass 

1365 

1366 # 3. Ensure the resource name list is entirely empty... 

1367 if resource_names: 

1368 raise LinstorVolumeManagerError( 

1369 'Cannot destroy LINSTOR volume manager: ' 

1370 'It exists remaining volumes (created externally or being deleted)' 

1371 ) 

1372 

1373 # 4. Destroying... 

1374 controller_is_running = self._controller_is_running() 

1375 uri = 'linstor://localhost' 

1376 try: 

1377 if controller_is_running: 

1378 self._start_controller(start=False) 

1379 

1380 # 4.1. Umount LINSTOR database. 

1381 self._mount_database_volume( 

1382 self.build_device_path(DATABASE_VOLUME_NAME), 

1383 mount=False, 

1384 force=True 

1385 ) 

1386 

1387 # 4.2. Refresh instance. 

1388 self._start_controller(start=True) 

1389 self._linstor = self._create_linstor_instance( 

1390 uri, keep_uri_unmodified=True 

1391 ) 

1392 

1393 # 4.3. Destroy database volume. 

1394 self._destroy_resource(DATABASE_VOLUME_NAME) 

1395 

1396 # 4.4. Refresh linstor connection. 

1397 # Without we get this error: 

1398 # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.." 

1399 # Because the deletion of the databse was not seen by Linstor for some reason. 

1400 # It seems a simple refresh of the Linstor connection make it aware of the deletion. 

1401 self._linstor.disconnect() 

1402 self._linstor.connect() 

1403 

1404 # 4.5. Destroy remaining drbd nodes on hosts. 

1405 # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups. 

1406 # It needs to be done locally by each host so we go through the linstor-manager plugin. 

1407 # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with: 

1408 # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it." 

1409 session = util.timeout_call(5, util.get_localAPI_session) 

1410 for host_ref in session.xenapi.host.get_all(): 

1411 try: 

1412 response = session.xenapi.host.call_plugin( 

1413 host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name} 

1414 ) 

1415 except Exception as e: 

1416 util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e)) 

1417 

1418 # 4.6. Destroy group and storage pools. 

1419 self._destroy_resource_group(self._linstor, self._group_name) 

1420 self._destroy_resource_group(self._linstor, self._ha_group_name) 

1421 for pool in self._get_storage_pools(force=True): 

1422 self._destroy_storage_pool( 

1423 self._linstor, pool.name, pool.node_name 

1424 ) 

1425 except Exception as e: 

1426 self._start_controller(start=controller_is_running) 

1427 raise e 

1428 

1429 try: 

1430 self._start_controller(start=False) 

1431 for file in os.listdir(DATABASE_PATH): 

1432 if file != 'lost+found': 

1433 os.remove(DATABASE_PATH + '/' + file) 

1434 except Exception as e: 

1435 util.SMlog( 

1436 'Ignoring failure after LINSTOR SR destruction: {}' 

1437 .format(e) 

1438 ) 

1439 

1440 def find_up_to_date_diskful_nodes(self, volume_uuid): 

1441 """ 

1442 Find all nodes that contain a specific volume using diskful disks. 

1443 The disk must be up to data to be used. 

1444 :param str volume_uuid: The volume to use. 

1445 :return: The available nodes. 

1446 :rtype: tuple(set(str), str) 

1447 """ 

1448 

1449 volume_name = self.get_volume_name(volume_uuid) 

1450 

1451 in_use_by = None 

1452 node_names = set() 

1453 

1454 resource_states = filter( 

1455 lambda resource_state: resource_state.name == volume_name, 

1456 self._get_resource_cache().resource_states 

1457 ) 

1458 

1459 for resource_state in resource_states: 

1460 volume_state = resource_state.volume_states[0] 

1461 if volume_state.disk_state == 'UpToDate': 

1462 node_names.add(resource_state.node_name) 

1463 if resource_state.in_use: 

1464 in_use_by = resource_state.node_name 

1465 

1466 return (node_names, in_use_by) 

1467 

1468 def invalidate_resource_cache(self): 

1469 """ 

1470 If resources are impacted by external commands like vhdutil, 

1471 it's necessary to call this function to invalidate current resource 

1472 cache. 

1473 """ 

1474 self._mark_resource_cache_as_dirty() 

1475 

1476 def has_node(self, node_name): 

1477 """ 

1478 Check if a node exists in the LINSTOR database. 

1479 :rtype: bool 

1480 """ 

1481 result = self._linstor.node_list() 

1482 error_str = self._get_error_str(result) 

1483 if error_str: 

1484 raise LinstorVolumeManagerError( 

1485 'Failed to list nodes using `{}`: {}' 

1486 .format(node_name, error_str) 

1487 ) 

1488 return bool(result[0].node(node_name)) 

1489 

1490 def create_node(self, node_name, ip): 

1491 """ 

1492 Create a new node in the LINSTOR database. 

1493 :param str node_name: Node name to use. 

1494 :param str ip: Host IP to communicate. 

1495 """ 

1496 result = self._linstor.node_create( 

1497 node_name, 

1498 linstor.consts.VAL_NODE_TYPE_CMBD, 

1499 ip 

1500 ) 

1501 errors = self._filter_errors(result) 

1502 if errors: 

1503 error_str = self._get_error_str(errors) 

1504 raise LinstorVolumeManagerError( 

1505 'Failed to create node `{}`: {}'.format(node_name, error_str) 

1506 ) 

1507 

1508 def destroy_node(self, node_name): 

1509 """ 

1510 Destroy a node in the LINSTOR database. 

1511 :param str node_name: Node name to remove. 

1512 """ 

1513 result = self._linstor.node_delete(node_name) 

1514 errors = self._filter_errors(result) 

1515 if errors: 

1516 error_str = self._get_error_str(errors) 

1517 raise LinstorVolumeManagerError( 

1518 'Failed to destroy node `{}`: {}'.format(node_name, error_str) 

1519 ) 

1520 

1521 def create_node_interface(self, node_name, name, ip): 

1522 """ 

1523 Create a new node interface in the LINSTOR database. 

1524 :param str node_name: Node name of the interface to use. 

1525 :param str name: Interface to create. 

1526 :param str ip: IP of the interface. 

1527 """ 

1528 result = self._linstor.netinterface_create(node_name, name, ip) 

1529 errors = self._filter_errors(result) 

1530 if errors: 

1531 error_str = self._get_error_str(errors) 

1532 raise LinstorVolumeManagerError( 

1533 'Failed to create node interface on `{}`: {}'.format(node_name, error_str) 

1534 ) 

1535 

1536 def destroy_node_interface(self, node_name, name): 

1537 """ 

1538 Destroy a node interface in the LINSTOR database. 

1539 :param str node_name: Node name of the interface to remove. 

1540 :param str name: Interface to remove. 

1541 """ 

1542 

1543 if name == 'default': 

1544 raise LinstorVolumeManagerError( 

1545 'Unable to delete the default interface of a node!' 

1546 ) 

1547 

1548 result = self._linstor.netinterface_delete(node_name, name) 

1549 errors = self._filter_errors(result) 

1550 if errors: 

1551 error_str = self._get_error_str(errors) 

1552 raise LinstorVolumeManagerError( 

1553 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str) 

1554 ) 

1555 

1556 def modify_node_interface(self, node_name, name, ip): 

1557 """ 

1558 Modify a node interface in the LINSTOR database. Create it if necessary. 

1559 :param str node_name: Node name of the interface to use. 

1560 :param str name: Interface to modify or create. 

1561 :param str ip: IP of the interface. 

1562 """ 

1563 result = self._linstor.netinterface_create(node_name, name, ip) 

1564 errors = self._filter_errors(result) 

1565 if not errors: 

1566 return 

1567 

1568 if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]): 

1569 result = self._linstor.netinterface_modify(node_name, name, ip) 

1570 errors = self._filter_errors(result) 

1571 if not errors: 

1572 return 

1573 

1574 error_str = self._get_error_str(errors) 

1575 raise LinstorVolumeManagerError( 

1576 'Unable to modify interface on `{}`: {}'.format(node_name, error_str) 

1577 ) 

1578 

1579 def list_node_interfaces(self, node_name): 

1580 """ 

1581 List all node interfaces. 

1582 :param str node_name: Node name to use to list interfaces. 

1583 :rtype: list 

1584 : 

1585 """ 

1586 result = self._linstor.net_interface_list(node_name) 

1587 if not result: 

1588 raise LinstorVolumeManagerError( 

1589 'Unable to list interfaces on `{}`: no list received'.format(node_name) 

1590 ) 

1591 

1592 interfaces = {} 

1593 for interface in result: 

1594 interface = interface._rest_data 

1595 interfaces[interface['name']] = { 

1596 'address': interface['address'], 

1597 'active': interface['is_active'] 

1598 } 

1599 return interfaces 

1600 

1601 def get_node_preferred_interface(self, node_name): 

1602 """ 

1603 Get the preferred interface used by a node. 

1604 :param str node_name: Node name of the interface to get. 

1605 :rtype: str 

1606 """ 

1607 try: 

1608 nodes = self._linstor.node_list_raise([node_name]).nodes 

1609 if nodes: 

1610 properties = nodes[0].props 

1611 return properties.get('PrefNic', 'default') 

1612 return nodes 

1613 except Exception as e: 

1614 raise LinstorVolumeManagerError( 

1615 'Failed to get preferred interface: `{}`'.format(e) 

1616 ) 

1617 

1618 def set_node_preferred_interface(self, node_name, name): 

1619 """ 

1620 Set the preferred interface to use on a node. 

1621 :param str node_name: Node name of the interface. 

1622 :param str name: Preferred interface to use. 

1623 """ 

1624 result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name}) 

1625 errors = self._filter_errors(result) 

1626 if errors: 

1627 error_str = self._get_error_str(errors) 

1628 raise LinstorVolumeManagerError( 

1629 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str) 

1630 ) 

1631 

1632 def get_nodes_info(self): 

1633 """ 

1634 Get all nodes + statuses, used or not by the pool. 

1635 :rtype: dict(str, dict) 

1636 """ 

1637 try: 

1638 nodes = {} 

1639 for node in self._linstor.node_list_raise().nodes: 

1640 nodes[node.name] = node.connection_status 

1641 return nodes 

1642 except Exception as e: 

1643 raise LinstorVolumeManagerError( 

1644 'Failed to get all nodes: `{}`'.format(e) 

1645 ) 

1646 

1647 def get_storage_pools_info(self): 

1648 """ 

1649 Give all storage pools of current group name. 

1650 :rtype: dict(str, list) 

1651 """ 

1652 storage_pools = {} 

1653 for pool in self._get_storage_pools(force=True): 

1654 if pool.node_name not in storage_pools: 

1655 storage_pools[pool.node_name] = [] 

1656 

1657 size = -1 

1658 capacity = -1 

1659 

1660 space = pool.free_space 

1661 if space: 

1662 size = space.free_capacity 

1663 if size < 0: 

1664 size = -1 

1665 else: 

1666 size *= 1024 

1667 capacity = space.total_capacity 

1668 if capacity <= 0: 

1669 capacity = -1 

1670 else: 

1671 capacity *= 1024 

1672 

1673 storage_pools[pool.node_name].append({ 

1674 'name': pool.name, 

1675 'linstor-uuid': pool.uuid, 

1676 'free-size': size, 

1677 'capacity': capacity 

1678 }) 

1679 

1680 return storage_pools 

1681 

1682 def get_resources_info(self): 

1683 """ 

1684 Give all resources of current group name. 

1685 :rtype: dict(str, list) 

1686 """ 

1687 resources = {} 

1688 resource_list = self._get_resource_cache() 

1689 volume_names = self.get_volumes_with_name() 

1690 for resource in resource_list.resources: 

1691 if resource.name not in resources: 

1692 resources[resource.name] = { 'nodes': {}, 'uuid': '' } 

1693 resource_nodes = resources[resource.name]['nodes'] 

1694 

1695 resource_nodes[resource.node_name] = { 

1696 'volumes': [], 

1697 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags, 

1698 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags 

1699 } 

1700 resource_volumes = resource_nodes[resource.node_name]['volumes'] 

1701 

1702 for volume in resource.volumes: 

1703 # We ignore diskless pools of the form "DfltDisklessStorPool". 

1704 if volume.storage_pool_name != self._group_name: 

1705 continue 

1706 

1707 usable_size = volume.usable_size 

1708 if usable_size < 0: 

1709 usable_size = -1 

1710 else: 

1711 usable_size *= 1024 

1712 

1713 allocated_size = volume.allocated_size 

1714 if allocated_size < 0: 

1715 allocated_size = -1 

1716 else: 

1717 allocated_size *= 1024 

1718 

1719 resource_volumes.append({ 

1720 'storage-pool-name': volume.storage_pool_name, 

1721 'linstor-uuid': volume.uuid, 

1722 'number': volume.number, 

1723 'device-path': volume.device_path, 

1724 'usable-size': usable_size, 

1725 'allocated-size': allocated_size 

1726 }) 

1727 

1728 for resource_state in resource_list.resource_states: 

1729 resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name] 

1730 resource['in-use'] = resource_state.in_use 

1731 

1732 volumes = resource['volumes'] 

1733 for volume_state in resource_state.volume_states: 

1734 volume = next((x for x in volumes if x['number'] == volume_state.number), None) 

1735 if volume: 

1736 volume['disk-state'] = volume_state.disk_state 

1737 

1738 for volume_uuid, volume_name in volume_names.items(): 

1739 resource = resources.get(volume_name) 

1740 if resource: 

1741 resource['uuid'] = volume_uuid 

1742 

1743 return resources 

1744 

1745 def get_database_path(self): 

1746 """ 

1747 Get the database path. 

1748 :return: The current database path. 

1749 :rtype: str 

1750 """ 

1751 return self._request_database_path(self._linstor) 

1752 

1753 @classmethod 

1754 def get_all_group_names(cls, base_name): 

1755 """ 

1756 Get all group names. I.e. list of current group + HA. 

1757 :param str base_name: The SR group_name to use. 

1758 :return: List of group names. 

1759 :rtype: list 

1760 """ 

1761 return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)] 

1762 

1763 @classmethod 

1764 def create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__): 

1765 """ 

1766 Create a new SR on the given nodes. 

1767 :param str group_name: The SR group_name to use. 

1768 :param set(str) ips: Node ips. 

1769 :param int redundancy: How many copy of volumes should we store? 

1770 :param bool thin_provisioning: Use thin or thick provisioning. 

1771 :param function logger: Function to log messages. 

1772 :return: A new LinstorSr instance. 

1773 :rtype: LinstorSr 

1774 """ 

1775 

1776 try: 

1777 cls._start_controller(start=True) 

1778 sr = cls._create_sr(group_name, ips, redundancy, thin_provisioning, logger) 

1779 finally: 

1780 # Controller must be stopped and volume unmounted because 

1781 # it is the role of the drbd-reactor daemon to do the right 

1782 # actions. 

1783 cls._start_controller(start=False) 

1784 cls._mount_volume( 

1785 cls.build_device_path(DATABASE_VOLUME_NAME), 

1786 DATABASE_PATH, 

1787 mount=False 

1788 ) 

1789 return sr 

1790 

1791 @classmethod 

1792 def _create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__): 

1793 # 1. Check if SR already exists. 

1794 uri = 'linstor://localhost' 

1795 

1796 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) 

1797 

1798 node_names = list(ips.keys()) 

1799 for node_name, ip in ips.items(): 

1800 while True: 

1801 # Try to create node. 

1802 result = lin.node_create( 

1803 node_name, 

1804 linstor.consts.VAL_NODE_TYPE_CMBD, 

1805 ip 

1806 ) 

1807 

1808 errors = cls._filter_errors(result) 

1809 if cls._check_errors( 

1810 errors, [linstor.consts.FAIL_EXISTS_NODE] 

1811 ): 

1812 # If it already exists, remove, then recreate. 

1813 result = lin.node_delete(node_name) 

1814 error_str = cls._get_error_str(result) 

1815 if error_str: 

1816 raise LinstorVolumeManagerError( 

1817 'Failed to remove old node `{}`: {}' 

1818 .format(node_name, error_str) 

1819 ) 

1820 elif not errors: 

1821 break # Created! 

1822 else: 

1823 raise LinstorVolumeManagerError( 

1824 'Failed to create node `{}` with ip `{}`: {}'.format( 

1825 node_name, ip, cls._get_error_str(errors) 

1826 ) 

1827 ) 

1828 

1829 driver_pool_name = group_name 

1830 base_group_name = group_name 

1831 group_name = cls._build_group_name(group_name) 

1832 storage_pool_name = group_name 

1833 pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools 

1834 if pools: 

1835 existing_node_names = [pool.node_name for pool in pools] 

1836 raise LinstorVolumeManagerError( 

1837 'Unable to create SR `{}`. It already exists on node(s): {}' 

1838 .format(group_name, existing_node_names) 

1839 ) 

1840 

1841 if lin.resource_group_list_raise( 

1842 cls.get_all_group_names(base_group_name) 

1843 ).resource_groups: 

1844 if not lin.resource_dfn_list_raise().resource_definitions: 

1845 backup_path = cls._create_database_backup_path() 

1846 logger( 

1847 'Group name already exists `{}` without LVs. ' 

1848 'Ignoring and moving the config files in {}'.format(group_name, backup_path) 

1849 ) 

1850 cls._move_files(DATABASE_PATH, backup_path) 

1851 else: 

1852 raise LinstorVolumeManagerError( 

1853 'Unable to create SR `{}`: The group name already exists' 

1854 .format(group_name) 

1855 ) 

1856 

1857 if thin_provisioning: 

1858 driver_pool_parts = driver_pool_name.split('/') 

1859 if not len(driver_pool_parts) == 2: 

1860 raise LinstorVolumeManagerError( 

1861 'Invalid group name using thin provisioning. ' 

1862 'Expected format: \'VG/LV`\'' 

1863 ) 

1864 

1865 # 2. Create storage pool on each node + resource group. 

1866 reg_volume_group_not_found = re.compile( 

1867 ".*Volume group '.*' not found$" 

1868 ) 

1869 

1870 i = 0 

1871 try: 

1872 # 2.a. Create storage pools. 

1873 storage_pool_count = 0 

1874 while i < len(node_names): 

1875 node_name = node_names[i] 

1876 

1877 result = lin.storage_pool_create( 

1878 node_name=node_name, 

1879 storage_pool_name=storage_pool_name, 

1880 storage_driver='LVM_THIN' if thin_provisioning else 'LVM', 

1881 driver_pool_name=driver_pool_name 

1882 ) 

1883 

1884 errors = linstor.Linstor.filter_api_call_response_errors( 

1885 result 

1886 ) 

1887 if errors: 

1888 if len(errors) == 1 and errors[0].is_error( 

1889 linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR 

1890 ) and reg_volume_group_not_found.match(errors[0].message): 

1891 logger( 

1892 'Volume group `{}` not found on `{}`. Ignoring...' 

1893 .format(group_name, node_name) 

1894 ) 

1895 cls._destroy_storage_pool(lin, storage_pool_name, node_name) 

1896 else: 

1897 error_str = cls._get_error_str(result) 

1898 raise LinstorVolumeManagerError( 

1899 'Could not create SP `{}` on node `{}`: {}' 

1900 .format(group_name, node_name, error_str) 

1901 ) 

1902 else: 

1903 storage_pool_count += 1 

1904 i += 1 

1905 

1906 if not storage_pool_count: 

1907 raise LinstorVolumeManagerError( 

1908 'Unable to create SR `{}`: No VG group found'.format( 

1909 group_name, 

1910 ) 

1911 ) 

1912 

1913 # 2.b. Create resource groups. 

1914 ha_group_name = cls._build_ha_group_name(base_group_name) 

1915 cls._create_resource_group( 

1916 lin, 

1917 group_name, 

1918 storage_pool_name, 

1919 redundancy, 

1920 True 

1921 ) 

1922 cls._create_resource_group( 

1923 lin, 

1924 ha_group_name, 

1925 storage_pool_name, 

1926 3, 

1927 True 

1928 ) 

1929 

1930 # 3. Create the LINSTOR database volume and mount it. 

1931 try: 

1932 logger('Creating database volume...') 

1933 volume_path = cls._create_database_volume( 

1934 lin, ha_group_name, storage_pool_name, node_names, redundancy 

1935 ) 

1936 except LinstorVolumeManagerError as e: 

1937 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

1938 logger('Destroying database volume after creation fail...') 

1939 cls._force_destroy_database_volume(lin, group_name) 

1940 raise 

1941 

1942 try: 

1943 logger('Mounting database volume...') 

1944 

1945 # First we must disable the controller to move safely the 

1946 # LINSTOR config. 

1947 cls._start_controller(start=False) 

1948 

1949 cls._mount_database_volume(volume_path) 

1950 except Exception as e: 

1951 # Ensure we are connected because controller has been 

1952 # restarted during mount call. 

1953 logger('Destroying database volume after mount fail...') 

1954 

1955 try: 

1956 cls._start_controller(start=True) 

1957 except Exception: 

1958 pass 

1959 

1960 lin = cls._create_linstor_instance( 

1961 uri, keep_uri_unmodified=True 

1962 ) 

1963 cls._force_destroy_database_volume(lin, group_name) 

1964 raise e 

1965 

1966 cls._start_controller(start=True) 

1967 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) 

1968 

1969 # 4. Remove storage pools/resource/volume group in the case of errors. 

1970 except Exception as e: 

1971 logger('Destroying resource group and storage pools after fail...') 

1972 try: 

1973 cls._destroy_resource_group(lin, group_name) 

1974 cls._destroy_resource_group(lin, ha_group_name) 

1975 except Exception as e2: 

1976 logger('Failed to destroy resource group: {}'.format(e2)) 

1977 pass 

1978 j = 0 

1979 i = min(i, len(node_names) - 1) 

1980 while j <= i: 

1981 try: 

1982 cls._destroy_storage_pool(lin, storage_pool_name, node_names[j]) 

1983 except Exception as e2: 

1984 logger('Failed to destroy resource group: {}'.format(e2)) 

1985 pass 

1986 j += 1 

1987 raise e 

1988 

1989 # 5. Return new instance. 

1990 instance = cls.__new__(cls) 

1991 instance._linstor = lin 

1992 instance._logger = logger 

1993 instance._redundancy = redundancy 

1994 instance._base_group_name = base_group_name 

1995 instance._group_name = group_name 

1996 instance._volumes = set() 

1997 instance._storage_pools_time = 0 

1998 instance._kv_cache = instance._create_kv_cache() 

1999 instance._resource_cache = None 

2000 instance._resource_cache_dirty = True 

2001 instance._volume_info_cache = None 

2002 instance._volume_info_cache_dirty = True 

2003 return instance 

2004 

2005 @classmethod 

2006 def build_device_path(cls, volume_name): 

2007 """ 

2008 Build a device path given a volume name. 

2009 :param str volume_name: The volume name to use. 

2010 :return: A valid or not device path. 

2011 :rtype: str 

2012 """ 

2013 

2014 return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name) 

2015 

2016 @classmethod 

2017 def build_volume_name(cls, base_name): 

2018 """ 

2019 Build a volume name given a base name (i.e. a UUID). 

2020 :param str base_name: The volume name to use. 

2021 :return: A valid or not device path. 

2022 :rtype: str 

2023 """ 

2024 return '{}{}'.format(cls.PREFIX_VOLUME, base_name) 

2025 

2026 @classmethod 

2027 def round_up_volume_size(cls, volume_size): 

2028 """ 

2029 Align volume size on higher multiple of BLOCK_SIZE. 

2030 :param int volume_size: The volume size to align. 

2031 :return: An aligned volume size. 

2032 :rtype: int 

2033 """ 

2034 return round_up(volume_size, cls.BLOCK_SIZE) 

2035 

2036 @classmethod 

2037 def round_down_volume_size(cls, volume_size): 

2038 """ 

2039 Align volume size on lower multiple of BLOCK_SIZE. 

2040 :param int volume_size: The volume size to align. 

2041 :return: An aligned volume size. 

2042 :rtype: int 

2043 """ 

2044 return round_down(volume_size, cls.BLOCK_SIZE) 

2045 

2046 # -------------------------------------------------------------------------- 

2047 # Private helpers. 

2048 # -------------------------------------------------------------------------- 

2049 

2050 def _create_kv_cache(self): 

2051 self._kv_cache = self._create_linstor_kv('/') 

2052 self._kv_cache_dirty = False 

2053 return self._kv_cache 

2054 

2055 def _get_kv_cache(self): 

2056 if self._kv_cache_dirty: 

2057 self._kv_cache = self._create_kv_cache() 

2058 return self._kv_cache 

2059 

2060 def _create_resource_cache(self): 

2061 self._resource_cache = self._linstor.resource_list_raise() 

2062 self._resource_cache_dirty = False 

2063 return self._resource_cache 

2064 

2065 def _get_resource_cache(self): 

2066 if self._resource_cache_dirty: 

2067 self._resource_cache = self._create_resource_cache() 

2068 return self._resource_cache 

2069 

2070 def _mark_resource_cache_as_dirty(self): 

2071 self._resource_cache_dirty = True 

2072 self._volume_info_cache_dirty = True 

2073 

2074 # -------------------------------------------------------------------------- 

2075 

2076 def _ensure_volume_exists(self, volume_uuid): 

2077 if volume_uuid not in self._volumes: 

2078 raise LinstorVolumeManagerError( 

2079 'volume `{}` doesn\'t exist'.format(volume_uuid), 

2080 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS 

2081 ) 

2082 

2083 def _find_best_size_candidates(self): 

2084 result = self._linstor.resource_group_qmvs(self._group_name) 

2085 error_str = self._get_error_str(result) 

2086 if error_str: 

2087 raise LinstorVolumeManagerError( 

2088 'Failed to get max volume size allowed of SR `{}`: {}'.format( 

2089 self._group_name, 

2090 error_str 

2091 ) 

2092 ) 

2093 return result[0].candidates 

2094 

2095 def _fetch_resource_names(self, ignore_deleted=True): 

2096 resource_names = set() 

2097 dfns = self._linstor.resource_dfn_list_raise().resource_definitions 

2098 for dfn in dfns: 

2099 if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and ( 

2100 ignore_deleted or 

2101 linstor.consts.FLAG_DELETE not in dfn.flags 

2102 ): 

2103 resource_names.add(dfn.name) 

2104 return resource_names 

2105 

2106 def _get_volumes_info(self, volume_name=None): 

2107 all_volume_info = {} 

2108 

2109 if not self._volume_info_cache_dirty: 

2110 return self._volume_info_cache 

2111 

2112 def process_resource(resource): 

2113 if resource.name not in all_volume_info: 

2114 current = all_volume_info[resource.name] = self.VolumeInfo( 

2115 resource.name 

2116 ) 

2117 else: 

2118 current = all_volume_info[resource.name] 

2119 

2120 if linstor.consts.FLAG_DISKLESS not in resource.flags: 

2121 current.diskful.append(resource.node_name) 

2122 

2123 for volume in resource.volumes: 

2124 # We ignore diskless pools of the form "DfltDisklessStorPool". 

2125 if volume.storage_pool_name != self._group_name: 

2126 continue 

2127 # Only fetch first volume. 

2128 if volume.number != 0: 

2129 continue 

2130 

2131 allocated_size = volume.allocated_size 

2132 if allocated_size > current.allocated_size: 

2133 current.allocated_size = allocated_size 

2134 

2135 usable_size = volume.usable_size 

2136 if usable_size > 0 and ( 

2137 usable_size < current.virtual_size or 

2138 not current.virtual_size 

2139 ): 

2140 current.virtual_size = usable_size 

2141 

2142 try: 

2143 for resource in self._get_resource_cache().resources: 

2144 process_resource(resource) 

2145 for volume in all_volume_info.values(): 

2146 if volume.allocated_size <= 0: 

2147 raise LinstorVolumeManagerError('Failed to get allocated size of `{}`'.format(resource.name)) 

2148 

2149 if volume.virtual_size <= 0: 

2150 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(volume.name)) 

2151 

2152 volume.allocated_size *= 1024 

2153 volume.virtual_size *= 1024 

2154 except LinstorVolumeManagerError: 

2155 self._mark_resource_cache_as_dirty() 

2156 raise 

2157 

2158 self._volume_info_cache_dirty = False 

2159 self._volume_info_cache = all_volume_info 

2160 

2161 return all_volume_info 

2162 

2163 def _get_volume_node_names_and_size(self, volume_name): 

2164 node_names = set() 

2165 size = -1 

2166 for resource in self._linstor.resource_list_raise( 

2167 filter_by_resources=[volume_name] 

2168 ).resources: 

2169 for volume in resource.volumes: 

2170 # We ignore diskless pools of the form "DfltDisklessStorPool". 

2171 if volume.storage_pool_name != self._group_name: 

2172 continue 

2173 

2174 node_names.add(resource.node_name) 

2175 

2176 usable_size = volume.usable_size 

2177 if usable_size <= 0: 

2178 continue 

2179 

2180 if size < 0: 

2181 size = usable_size 

2182 else: 

2183 size = min(size, usable_size) 

2184 

2185 if size <= 0: 

2186 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(resource.name)) 

2187 

2188 return (node_names, size * 1024) 

2189 

2190 def _compute_size(self, attr): 

2191 capacity = 0 

2192 for pool in self._get_storage_pools(force=True): 

2193 space = pool.free_space 

2194 if space: 

2195 size = getattr(space, attr) 

2196 if size < 0: 

2197 raise LinstorVolumeManagerError( 

2198 'Failed to get pool {} attr of `{}`' 

2199 .format(attr, pool.node_name) 

2200 ) 

2201 capacity += size 

2202 return capacity * 1024 

2203 

2204 def _get_node_names(self): 

2205 node_names = set() 

2206 for pool in self._get_storage_pools(): 

2207 node_names.add(pool.node_name) 

2208 return node_names 

2209 

2210 def _get_storage_pools(self, force=False): 

2211 cur_time = time.time() 

2212 elsaped_time = cur_time - self._storage_pools_time 

2213 

2214 if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL: 

2215 self._storage_pools = self._linstor.storage_pool_list_raise( 

2216 filter_by_stor_pools=[self._group_name] 

2217 ).storage_pools 

2218 self._storage_pools_time = time.time() 

2219 

2220 return self._storage_pools 

2221 

2222 def _create_volume( 

2223 self, 

2224 volume_uuid, 

2225 volume_name, 

2226 size, 

2227 place_resources, 

2228 high_availability 

2229 ): 

2230 size = self.round_up_volume_size(size) 

2231 self._mark_resource_cache_as_dirty() 

2232 

2233 group_name = self._ha_group_name if high_availability else self._group_name 

2234 def create_definition(): 

2235 first_attempt = True 

2236 while True: 

2237 try: 

2238 self._check_volume_creation_errors( 

2239 self._linstor.resource_group_spawn( 

2240 rsc_grp_name=group_name, 

2241 rsc_dfn_name=volume_name, 

2242 vlm_sizes=['{}B'.format(size)], 

2243 definitions_only=True 

2244 ), 

2245 volume_uuid, 

2246 self._group_name 

2247 ) 

2248 break 

2249 except LinstorVolumeManagerError as e: 

2250 if ( 

2251 not first_attempt or 

2252 not high_availability or 

2253 e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS 

2254 ): 

2255 raise 

2256 

2257 first_attempt = False 

2258 self._create_resource_group( 

2259 self._linstor, 

2260 group_name, 

2261 self._group_name, 

2262 3, 

2263 True 

2264 ) 

2265 

2266 self._configure_volume_peer_slots(self._linstor, volume_name) 

2267 

2268 def clean(): 

2269 try: 

2270 self._destroy_volume(volume_uuid, force=True, preserve_properties=True) 

2271 except Exception as e: 

2272 self._logger( 

2273 'Unable to destroy volume {} after creation fail: {}' 

2274 .format(volume_uuid, e) 

2275 ) 

2276 

2277 def create(): 

2278 try: 

2279 create_definition() 

2280 if place_resources: 

2281 # Basic case when we use the default redundancy of the group. 

2282 self._check_volume_creation_errors( 

2283 self._linstor.resource_auto_place( 

2284 rsc_name=volume_name, 

2285 place_count=self._redundancy, 

2286 diskless_on_remaining=False 

2287 ), 

2288 volume_uuid, 

2289 self._group_name 

2290 ) 

2291 except LinstorVolumeManagerError as e: 

2292 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

2293 clean() 

2294 raise 

2295 except Exception: 

2296 clean() 

2297 raise 

2298 

2299 util.retry(create, maxretry=5) 

2300 

2301 def _create_volume_with_properties( 

2302 self, 

2303 volume_uuid, 

2304 volume_name, 

2305 size, 

2306 place_resources, 

2307 high_availability 

2308 ): 

2309 if self.check_volume_exists(volume_uuid): 

2310 raise LinstorVolumeManagerError( 

2311 'Could not create volume `{}` from SR `{}`, it already exists' 

2312 .format(volume_uuid, self._group_name) + ' in properties', 

2313 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

2314 ) 

2315 

2316 if volume_name in self._fetch_resource_names(): 

2317 raise LinstorVolumeManagerError( 

2318 'Could not create volume `{}` from SR `{}`, '.format( 

2319 volume_uuid, self._group_name 

2320 ) + 'resource of the same name already exists in LINSTOR' 

2321 ) 

2322 

2323 # I am paranoid. 

2324 volume_properties = self._get_volume_properties(volume_uuid) 

2325 if (volume_properties.get(self.PROP_NOT_EXISTS) is not None): 

2326 raise LinstorVolumeManagerError( 

2327 'Could not create volume `{}`, '.format(volume_uuid) + 

2328 'properties already exist' 

2329 ) 

2330 

2331 try: 

2332 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING 

2333 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

2334 

2335 self._create_volume( 

2336 volume_uuid, 

2337 volume_name, 

2338 size, 

2339 place_resources, 

2340 high_availability 

2341 ) 

2342 

2343 assert volume_properties.namespace == \ 

2344 self._build_volume_namespace(volume_uuid) 

2345 return volume_properties 

2346 except LinstorVolumeManagerError as e: 

2347 # Do not destroy existing resource! 

2348 # In theory we can't get this error because we check this event 

2349 # before the `self._create_volume` case. 

2350 # It can only happen if the same volume uuid is used in the same 

2351 # call in another host. 

2352 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

2353 self._destroy_volume(volume_uuid, force=True) 

2354 raise 

2355 

2356 def _find_device_path(self, volume_uuid, volume_name): 

2357 current_device_path = self._request_device_path( 

2358 volume_uuid, volume_name, activate=True 

2359 ) 

2360 

2361 # We use realpath here to get the /dev/drbd<id> path instead of 

2362 # /dev/drbd/by-res/<resource_name>. 

2363 expected_device_path = self.build_device_path(volume_name) 

2364 util.wait_for_path(expected_device_path, 5) 

2365 

2366 device_realpath = os.path.realpath(expected_device_path) 

2367 if current_device_path != device_realpath: 

2368 raise LinstorVolumeManagerError( 

2369 'Invalid path, current={}, expected={} (realpath={})' 

2370 .format( 

2371 current_device_path, 

2372 expected_device_path, 

2373 device_realpath 

2374 ) 

2375 ) 

2376 return expected_device_path 

2377 

2378 def _request_device_path(self, volume_uuid, volume_name, activate=False): 

2379 node_name = socket.gethostname() 

2380 

2381 resource = next(filter( 

2382 lambda resource: resource.node_name == node_name and 

2383 resource.name == volume_name, 

2384 self._get_resource_cache().resources 

2385 ), None) 

2386 

2387 if not resource: 

2388 if activate: 

2389 self._mark_resource_cache_as_dirty() 

2390 self._activate_device_path( 

2391 self._linstor, node_name, volume_name 

2392 ) 

2393 return self._request_device_path(volume_uuid, volume_name) 

2394 raise LinstorVolumeManagerError( 

2395 'Empty dev path for `{}`, but definition "seems" to exist' 

2396 .format(volume_uuid) 

2397 ) 

2398 # Contains a path of the /dev/drbd<id> form. 

2399 return resource.volumes[0].device_path 

2400 

2401 def _destroy_resource(self, resource_name, force=False): 

2402 result = self._linstor.resource_dfn_delete(resource_name) 

2403 error_str = self._get_error_str(result) 

2404 if not error_str: 

2405 self._mark_resource_cache_as_dirty() 

2406 return 

2407 

2408 if not force: 

2409 self._mark_resource_cache_as_dirty() 

2410 raise LinstorVolumeManagerError( 

2411 'Could not destroy resource `{}` from SR `{}`: {}' 

2412 .format(resource_name, self._group_name, error_str) 

2413 ) 

2414 

2415 # If force is used, ensure there is no opener. 

2416 all_openers = get_all_volume_openers(resource_name, '0') 

2417 for openers in all_openers.values(): 

2418 if openers: 

2419 self._mark_resource_cache_as_dirty() 

2420 raise LinstorVolumeManagerError( 

2421 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)' 

2422 .format(resource_name, self._group_name, error_str, all_openers) 

2423 ) 

2424 

2425 # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue? 

2426 resource_states = filter( 

2427 lambda resource_state: resource_state.name == resource_name, 

2428 self._get_resource_cache().resource_states 

2429 ) 

2430 

2431 # Mark only after computation of states. 

2432 self._mark_resource_cache_as_dirty() 

2433 

2434 for resource_state in resource_states: 

2435 volume_state = resource_state.volume_states[0] 

2436 if resource_state.in_use: 

2437 demote_drbd_resource(resource_state.node_name, resource_name) 

2438 break 

2439 self._destroy_resource(resource_name) 

2440 

2441 def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False): 

2442 volume_properties = self._get_volume_properties(volume_uuid) 

2443 try: 

2444 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

2445 if volume_name in self._fetch_resource_names(): 

2446 self._destroy_resource(volume_name, force) 

2447 

2448 # Assume this call is atomic. 

2449 if not preserve_properties: 

2450 volume_properties.clear() 

2451 except Exception as e: 

2452 raise LinstorVolumeManagerError( 

2453 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e) 

2454 ) 

2455 

2456 def _build_volumes(self, repair): 

2457 properties = self._kv_cache 

2458 resource_names = self._fetch_resource_names() 

2459 

2460 self._volumes = set() 

2461 

2462 updating_uuid_volumes = self._get_volumes_by_property( 

2463 self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False 

2464 ) 

2465 if updating_uuid_volumes and not repair: 

2466 raise LinstorVolumeManagerError( 

2467 'Cannot build LINSTOR volume list: ' 

2468 'It exists invalid "updating uuid volumes", repair is required' 

2469 ) 

2470 

2471 existing_volumes = self._get_volumes_by_property( 

2472 self.REG_NOT_EXISTS, ignore_inexisting_volumes=False 

2473 ) 

2474 for volume_uuid, not_exists in existing_volumes.items(): 

2475 properties.namespace = self._build_volume_namespace(volume_uuid) 

2476 

2477 src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC) 

2478 if src_uuid: 

2479 self._logger( 

2480 'Ignoring volume during manager initialization with prop ' 

2481 ' PROP_UPDATING_UUID_SRC: {} (properties={})' 

2482 .format( 

2483 volume_uuid, 

2484 self._get_filtered_properties(properties) 

2485 ) 

2486 ) 

2487 continue 

2488 

2489 # Insert volume in list if the volume exists. Or if the volume 

2490 # is being created and a slave wants to use it (repair = False). 

2491 # 

2492 # If we are on the master and if repair is True and state is 

2493 # Creating, it's probably a bug or crash: the creation process has 

2494 # been stopped. 

2495 if not_exists == self.STATE_EXISTS or ( 

2496 not repair and not_exists == self.STATE_CREATING 

2497 ): 

2498 self._volumes.add(volume_uuid) 

2499 continue 

2500 

2501 if not repair: 

2502 self._logger( 

2503 'Ignoring bad volume during manager initialization: {} ' 

2504 '(properties={})'.format( 

2505 volume_uuid, 

2506 self._get_filtered_properties(properties) 

2507 ) 

2508 ) 

2509 continue 

2510 

2511 # Remove bad volume. 

2512 try: 

2513 self._logger( 

2514 'Removing bad volume during manager initialization: {} ' 

2515 '(properties={})'.format( 

2516 volume_uuid, 

2517 self._get_filtered_properties(properties) 

2518 ) 

2519 ) 

2520 volume_name = properties.get(self.PROP_VOLUME_NAME) 

2521 

2522 # Little optimization, don't call `self._destroy_volume`, 

2523 # we already have resource name list. 

2524 if volume_name in resource_names: 

2525 self._destroy_resource(volume_name, force=True) 

2526 

2527 # Assume this call is atomic. 

2528 properties.clear() 

2529 except Exception as e: 

2530 # Do not raise, we don't want to block user action. 

2531 self._logger( 

2532 'Cannot clean volume {}: {}'.format(volume_uuid, e) 

2533 ) 

2534 

2535 # The volume can't be removed, maybe it's still in use, 

2536 # in this case rename it with the "DELETED_" prefix. 

2537 # This prefix is mandatory if it exists a snap transaction to 

2538 # rollback because the original VDI UUID can try to be renamed 

2539 # with the UUID we are trying to delete... 

2540 if not volume_uuid.startswith('DELETED_'): 

2541 self.update_volume_uuid( 

2542 volume_uuid, 'DELETED_' + volume_uuid, force=True 

2543 ) 

2544 

2545 for dest_uuid, src_uuid in updating_uuid_volumes.items(): 

2546 dest_namespace = self._build_volume_namespace(dest_uuid) 

2547 

2548 properties.namespace = dest_namespace 

2549 if int(properties.get(self.PROP_NOT_EXISTS)): 

2550 properties.clear() 

2551 continue 

2552 

2553 properties.namespace = self._build_volume_namespace(src_uuid) 

2554 properties.clear() 

2555 

2556 properties.namespace = dest_namespace 

2557 properties.pop(self.PROP_UPDATING_UUID_SRC) 

2558 

2559 if src_uuid in self._volumes: 

2560 self._volumes.remove(src_uuid) 

2561 self._volumes.add(dest_uuid) 

2562 

2563 def _get_sr_properties(self): 

2564 return self._create_linstor_kv(self._build_sr_namespace()) 

2565 

2566 def _get_volumes_by_property( 

2567 self, reg_prop, ignore_inexisting_volumes=True 

2568 ): 

2569 base_properties = self._get_kv_cache() 

2570 base_properties.namespace = self._build_volume_namespace() 

2571 

2572 volume_properties = {} 

2573 for volume_uuid in self._volumes: 

2574 volume_properties[volume_uuid] = '' 

2575 

2576 for key, value in base_properties.items(): 

2577 res = reg_prop.match(key) 

2578 if res: 

2579 volume_uuid = res.groups()[0] 

2580 if not ignore_inexisting_volumes or \ 

2581 volume_uuid in self._volumes: 

2582 volume_properties[volume_uuid] = value 

2583 

2584 return volume_properties 

2585 

2586 def _create_linstor_kv(self, namespace): 

2587 return linstor.KV( 

2588 self._group_name, 

2589 uri=self._linstor.controller_host(), 

2590 namespace=namespace 

2591 ) 

2592 

2593 def _get_volume_properties(self, volume_uuid): 

2594 properties = self._get_kv_cache() 

2595 properties.namespace = self._build_volume_namespace(volume_uuid) 

2596 return properties 

2597 

2598 @classmethod 

2599 def _build_sr_namespace(cls): 

2600 return '/{}/'.format(cls.NAMESPACE_SR) 

2601 

2602 @classmethod 

2603 def _build_volume_namespace(cls, volume_uuid=None): 

2604 # Return a path to all volumes if `volume_uuid` is not given. 

2605 if volume_uuid is None: 

2606 return '/{}/'.format(cls.NAMESPACE_VOLUME) 

2607 return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid) 

2608 

2609 @classmethod 

2610 def _get_error_str(cls, result): 

2611 return ', '.join([ 

2612 err.message for err in cls._filter_errors(result) 

2613 ]) 

2614 

2615 @classmethod 

2616 def _create_linstor_instance( 

2617 cls, uri, keep_uri_unmodified=False, attempt_count=30 

2618 ): 

2619 retry = False 

2620 

2621 def connect(uri): 

2622 if not uri: 

2623 uri = get_controller_uri() 

2624 if not uri: 

2625 raise LinstorVolumeManagerError( 

2626 'Unable to find controller uri...' 

2627 ) 

2628 instance = linstor.Linstor(uri, keep_alive=True) 

2629 instance.connect() 

2630 return instance 

2631 

2632 try: 

2633 return connect(uri) 

2634 except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError): 

2635 pass 

2636 

2637 if not keep_uri_unmodified: 

2638 uri = None 

2639 

2640 return util.retry( 

2641 lambda: connect(uri), 

2642 maxretry=attempt_count, 

2643 period=1, 

2644 exceptions=[ 

2645 linstor.errors.LinstorNetworkError, 

2646 LinstorVolumeManagerError 

2647 ] 

2648 ) 

2649 

2650 @classmethod 

2651 def _configure_volume_peer_slots(cls, lin, volume_name): 

2652 result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3) 

2653 error_str = cls._get_error_str(result) 

2654 if error_str: 

2655 raise LinstorVolumeManagerError( 

2656 'Could not configure volume peer slots of {}: {}' 

2657 .format(volume_name, error_str) 

2658 ) 

2659 

2660 @classmethod 

2661 def _activate_device_path(cls, lin, node_name, volume_name): 

2662 result = lin.resource_make_available(node_name, volume_name, diskful=False) 

2663 if linstor.Linstor.all_api_responses_no_error(result): 

2664 return 

2665 errors = linstor.Linstor.filter_api_call_response_errors(result) 

2666 if len(errors) == 1 and errors[0].is_error( 

2667 linstor.consts.FAIL_EXISTS_RSC 

2668 ): 

2669 return 

2670 

2671 raise LinstorVolumeManagerError( 

2672 'Unable to activate device path of `{}` on node `{}`: {}' 

2673 .format(volume_name, node_name, ', '.join( 

2674 [str(x) for x in result])) 

2675 ) 

2676 

2677 @classmethod 

2678 def _request_database_path(cls, lin, activate=False): 

2679 node_name = socket.gethostname() 

2680 

2681 try: 

2682 resource = next(filter( 

2683 lambda resource: resource.node_name == node_name and 

2684 resource.name == DATABASE_VOLUME_NAME, 

2685 lin.resource_list_raise().resources 

2686 ), None) 

2687 except Exception as e: 

2688 raise LinstorVolumeManagerError( 

2689 'Unable to fetch database resource: {}' 

2690 .format(e) 

2691 ) 

2692 

2693 if not resource: 

2694 if activate: 

2695 cls._activate_device_path( 

2696 lin, node_name, DATABASE_VOLUME_NAME 

2697 ) 

2698 return cls._request_database_path( 

2699 DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME 

2700 ) 

2701 raise LinstorVolumeManagerError( 

2702 'Empty dev path for `{}`, but definition "seems" to exist' 

2703 .format(DATABASE_PATH) 

2704 ) 

2705 # Contains a path of the /dev/drbd<id> form. 

2706 return resource.volumes[0].device_path 

2707 

2708 @classmethod 

2709 def _create_database_volume( 

2710 cls, lin, group_name, storage_pool_name, node_names, redundancy 

2711 ): 

2712 try: 

2713 dfns = lin.resource_dfn_list_raise().resource_definitions 

2714 except Exception as e: 

2715 raise LinstorVolumeManagerError( 

2716 'Unable to get definitions during database creation: {}' 

2717 .format(e) 

2718 ) 

2719 

2720 if dfns: 

2721 raise LinstorVolumeManagerError( 

2722 'Could not create volume `{}` from SR `{}`, '.format( 

2723 DATABASE_VOLUME_NAME, group_name 

2724 ) + 'LINSTOR volume list must be empty.' 

2725 ) 

2726 

2727 # Workaround to use thin lvm. Without this line an error is returned: 

2728 # "Not enough available nodes" 

2729 # I don't understand why but this command protect against this bug. 

2730 try: 

2731 pools = lin.storage_pool_list_raise( 

2732 filter_by_stor_pools=[storage_pool_name] 

2733 ) 

2734 except Exception as e: 

2735 raise LinstorVolumeManagerError( 

2736 'Failed to get storage pool list before database creation: {}' 

2737 .format(e) 

2738 ) 

2739 

2740 # Ensure we have a correct list of storage pools. 

2741 assert pools.storage_pools # We must have at least one storage pool! 

2742 nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools)) 

2743 for node_name in nodes_with_pool: 

2744 assert node_name in node_names 

2745 util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool)) 

2746 

2747 # Create the database definition. 

2748 size = cls.round_up_volume_size(DATABASE_SIZE) 

2749 cls._check_volume_creation_errors(lin.resource_group_spawn( 

2750 rsc_grp_name=group_name, 

2751 rsc_dfn_name=DATABASE_VOLUME_NAME, 

2752 vlm_sizes=['{}B'.format(size)], 

2753 definitions_only=True 

2754 ), DATABASE_VOLUME_NAME, group_name) 

2755 cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME) 

2756 

2757 # Create real resources on the first nodes. 

2758 resources = [] 

2759 

2760 diskful_nodes = [] 

2761 diskless_nodes = [] 

2762 for node_name in node_names: 

2763 if node_name in nodes_with_pool: 

2764 diskful_nodes.append(node_name) 

2765 else: 

2766 diskless_nodes.append(node_name) 

2767 

2768 assert diskful_nodes 

2769 for node_name in diskful_nodes[:redundancy]: 

2770 util.SMlog('Create database diskful on {}'.format(node_name)) 

2771 resources.append(linstor.ResourceData( 

2772 node_name=node_name, 

2773 rsc_name=DATABASE_VOLUME_NAME, 

2774 storage_pool=storage_pool_name 

2775 )) 

2776 # Create diskless resources on the remaining set. 

2777 for node_name in diskful_nodes[redundancy:] + diskless_nodes: 

2778 util.SMlog('Create database diskless on {}'.format(node_name)) 

2779 resources.append(linstor.ResourceData( 

2780 node_name=node_name, 

2781 rsc_name=DATABASE_VOLUME_NAME, 

2782 diskless=True 

2783 )) 

2784 

2785 result = lin.resource_create(resources) 

2786 error_str = cls._get_error_str(result) 

2787 if error_str: 

2788 raise LinstorVolumeManagerError( 

2789 'Could not create database volume from SR `{}`: {}'.format( 

2790 group_name, error_str 

2791 ) 

2792 ) 

2793 

2794 # Create database and ensure path exists locally and 

2795 # on replicated devices. 

2796 current_device_path = cls._request_database_path(lin, activate=True) 

2797 

2798 # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be 

2799 # plugged. 

2800 for node_name in node_names: 

2801 cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME) 

2802 

2803 # We use realpath here to get the /dev/drbd<id> path instead of 

2804 # /dev/drbd/by-res/<resource_name>. 

2805 expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME) 

2806 util.wait_for_path(expected_device_path, 5) 

2807 

2808 device_realpath = os.path.realpath(expected_device_path) 

2809 if current_device_path != device_realpath: 

2810 raise LinstorVolumeManagerError( 

2811 'Invalid path, current={}, expected={} (realpath={})' 

2812 .format( 

2813 current_device_path, 

2814 expected_device_path, 

2815 device_realpath 

2816 ) 

2817 ) 

2818 

2819 try: 

2820 util.retry( 

2821 lambda: util.pread2([DATABASE_MKFS, expected_device_path]), 

2822 maxretry=5 

2823 ) 

2824 except Exception as e: 

2825 raise LinstorVolumeManagerError( 

2826 'Failed to execute {} on database volume: {}' 

2827 .format(DATABASE_MKFS, e) 

2828 ) 

2829 

2830 return expected_device_path 

2831 

2832 @classmethod 

2833 def _destroy_database_volume(cls, lin, group_name): 

2834 error_str = cls._get_error_str( 

2835 lin.resource_dfn_delete(DATABASE_VOLUME_NAME) 

2836 ) 

2837 if error_str: 

2838 raise LinstorVolumeManagerError( 

2839 'Could not destroy resource `{}` from SR `{}`: {}' 

2840 .format(DATABASE_VOLUME_NAME, group_name, error_str) 

2841 ) 

2842 

2843 @classmethod 

2844 def _mount_database_volume(cls, volume_path, mount=True, force=False): 

2845 try: 

2846 # 1. Create a backup config folder. 

2847 database_not_empty = bool(os.listdir(DATABASE_PATH)) 

2848 backup_path = cls._create_database_backup_path() 

2849 

2850 # 2. Move the config in the mounted volume. 

2851 if database_not_empty: 

2852 cls._move_files(DATABASE_PATH, backup_path) 

2853 

2854 cls._mount_volume(volume_path, DATABASE_PATH, mount) 

2855 

2856 if database_not_empty: 

2857 cls._move_files(backup_path, DATABASE_PATH, force) 

2858 

2859 # 3. Remove useless backup directory. 

2860 try: 

2861 os.rmdir(backup_path) 

2862 except Exception as e: 

2863 raise LinstorVolumeManagerError( 

2864 'Failed to remove backup path {} of LINSTOR config: {}' 

2865 .format(backup_path, e) 

2866 ) 

2867 except Exception as e: 

2868 def force_exec(fn): 

2869 try: 

2870 fn() 

2871 except Exception: 

2872 pass 

2873 

2874 if mount == cls._is_mounted(DATABASE_PATH): 

2875 force_exec(lambda: cls._move_files( 

2876 DATABASE_PATH, backup_path 

2877 )) 

2878 force_exec(lambda: cls._mount_volume( 

2879 volume_path, DATABASE_PATH, not mount 

2880 )) 

2881 

2882 if mount != cls._is_mounted(DATABASE_PATH): 

2883 force_exec(lambda: cls._move_files( 

2884 backup_path, DATABASE_PATH 

2885 )) 

2886 

2887 force_exec(lambda: os.rmdir(backup_path)) 

2888 raise e 

2889 

2890 @classmethod 

2891 def _force_destroy_database_volume(cls, lin, group_name): 

2892 try: 

2893 cls._destroy_database_volume(lin, group_name) 

2894 except Exception: 

2895 pass 

2896 

2897 @classmethod 

2898 def _destroy_storage_pool(cls, lin, group_name, node_name): 

2899 def destroy(): 

2900 result = lin.storage_pool_delete(node_name, group_name) 

2901 errors = cls._filter_errors(result) 

2902 if cls._check_errors(errors, [ 

2903 linstor.consts.FAIL_NOT_FOUND_STOR_POOL, 

2904 linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN 

2905 ]): 

2906 return 

2907 

2908 if errors: 

2909 raise LinstorVolumeManagerError( 

2910 'Failed to destroy SP `{}` on node `{}`: {}'.format( 

2911 group_name, 

2912 node_name, 

2913 cls._get_error_str(errors) 

2914 ) 

2915 ) 

2916 

2917 # We must retry to avoid errors like: 

2918 # "can not be deleted as volumes / snapshot-volumes are still using it" 

2919 # after LINSTOR database volume destruction. 

2920 return util.retry(destroy, maxretry=10) 

2921 

2922 @classmethod 

2923 def _create_resource_group( 

2924 cls, 

2925 lin, 

2926 group_name, 

2927 storage_pool_name, 

2928 redundancy, 

2929 destroy_old_group 

2930 ): 

2931 rg_creation_attempt = 0 

2932 while True: 

2933 result = lin.resource_group_create( 

2934 name=group_name, 

2935 place_count=redundancy, 

2936 storage_pool=storage_pool_name, 

2937 diskless_on_remaining=False 

2938 ) 

2939 error_str = cls._get_error_str(result) 

2940 if not error_str: 

2941 break 

2942 

2943 errors = cls._filter_errors(result) 

2944 if destroy_old_group and cls._check_errors(errors, [ 

2945 linstor.consts.FAIL_EXISTS_RSC_GRP 

2946 ]): 

2947 rg_creation_attempt += 1 

2948 if rg_creation_attempt < 2: 

2949 try: 

2950 cls._destroy_resource_group(lin, group_name) 

2951 except Exception as e: 

2952 error_str = 'Failed to destroy old and empty RG: {}'.format(e) 

2953 else: 

2954 continue 

2955 

2956 raise LinstorVolumeManagerError( 

2957 'Could not create RG `{}`: {}'.format( 

2958 group_name, error_str 

2959 ) 

2960 ) 

2961 

2962 result = lin.volume_group_create(group_name) 

2963 error_str = cls._get_error_str(result) 

2964 if error_str: 

2965 raise LinstorVolumeManagerError( 

2966 'Could not create VG `{}`: {}'.format( 

2967 group_name, error_str 

2968 ) 

2969 ) 

2970 

2971 @classmethod 

2972 def _destroy_resource_group(cls, lin, group_name): 

2973 def destroy(): 

2974 result = lin.resource_group_delete(group_name) 

2975 errors = cls._filter_errors(result) 

2976 if cls._check_errors(errors, [ 

2977 linstor.consts.FAIL_NOT_FOUND_RSC_GRP 

2978 ]): 

2979 return 

2980 

2981 if errors: 

2982 raise LinstorVolumeManagerError( 

2983 'Failed to destroy RG `{}`: {}' 

2984 .format(group_name, cls._get_error_str(errors)) 

2985 ) 

2986 

2987 return util.retry(destroy, maxretry=10) 

2988 

2989 @classmethod 

2990 def _build_group_name(cls, base_name): 

2991 # If thin provisioning is used we have a path like this: 

2992 # `VG/LV`. "/" is not accepted by LINSTOR. 

2993 return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_')) 

2994 

2995 # Used to store important data in a HA context, 

2996 # i.e. a replication count of 3. 

2997 @classmethod 

2998 def _build_ha_group_name(cls, base_name): 

2999 return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_')) 

3000 

3001 @classmethod 

3002 def _check_volume_creation_errors(cls, result, volume_uuid, group_name): 

3003 errors = cls._filter_errors(result) 

3004 if cls._check_errors(errors, [ 

3005 linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN 

3006 ]): 

3007 raise LinstorVolumeManagerError( 

3008 'Failed to create volume `{}` from SR `{}`, it already exists' 

3009 .format(volume_uuid, group_name), 

3010 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

3011 ) 

3012 

3013 if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]): 

3014 raise LinstorVolumeManagerError( 

3015 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist' 

3016 .format(volume_uuid, group_name), 

3017 LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS 

3018 ) 

3019 

3020 if errors: 

3021 raise LinstorVolumeManagerError( 

3022 'Failed to create volume `{}` from SR `{}`: {}'.format( 

3023 volume_uuid, 

3024 group_name, 

3025 cls._get_error_str(errors) 

3026 ) 

3027 ) 

3028 

3029 @classmethod 

3030 def _move_files(cls, src_dir, dest_dir, force=False): 

3031 def listdir(dir): 

3032 ignored = ['lost+found'] 

3033 return [file for file in os.listdir(dir) if file not in ignored] 

3034 

3035 try: 

3036 if not force: 

3037 files = listdir(dest_dir) 

3038 if files: 

3039 raise LinstorVolumeManagerError( 

3040 'Cannot move files from {} to {} because destination ' 

3041 'contains: {}'.format(src_dir, dest_dir, files) 

3042 ) 

3043 except LinstorVolumeManagerError: 

3044 raise 

3045 except Exception as e: 

3046 raise LinstorVolumeManagerError( 

3047 'Cannot list dir {}: {}'.format(dest_dir, e) 

3048 ) 

3049 

3050 try: 

3051 for file in listdir(src_dir): 

3052 try: 

3053 dest_file = os.path.join(dest_dir, file) 

3054 if not force and os.path.exists(dest_file): 

3055 raise LinstorVolumeManagerError( 

3056 'Cannot move {} because it already exists in the ' 

3057 'destination'.format(file) 

3058 ) 

3059 shutil.move(os.path.join(src_dir, file), dest_file) 

3060 except LinstorVolumeManagerError: 

3061 raise 

3062 except Exception as e: 

3063 raise LinstorVolumeManagerError( 

3064 'Cannot move {}: {}'.format(file, e) 

3065 ) 

3066 except Exception as e: 

3067 if not force: 

3068 try: 

3069 cls._move_files(dest_dir, src_dir, force=True) 

3070 except Exception: 

3071 pass 

3072 

3073 raise LinstorVolumeManagerError( 

3074 'Failed to move files from {} to {}: {}'.format( 

3075 src_dir, dest_dir, e 

3076 ) 

3077 ) 

3078 

3079 @staticmethod 

3080 def _create_database_backup_path(): 

3081 path = DATABASE_PATH + '-' + str(uuid.uuid4()) 

3082 try: 

3083 os.mkdir(path) 

3084 return path 

3085 except Exception as e: 

3086 raise LinstorVolumeManagerError( 

3087 'Failed to create backup path {} of LINSTOR config: {}' 

3088 .format(path, e) 

3089 ) 

3090 

3091 @staticmethod 

3092 def _get_filtered_properties(properties): 

3093 return dict(properties.items()) 

3094 

3095 @staticmethod 

3096 def _filter_errors(result): 

3097 return [ 

3098 err for err in result 

3099 if hasattr(err, 'is_error') and err.is_error() 

3100 ] 

3101 

3102 @staticmethod 

3103 def _check_errors(result, codes): 

3104 for err in result: 

3105 for code in codes: 

3106 if err.is_error(code): 

3107 return True 

3108 return False 

3109 

3110 @classmethod 

3111 def _controller_is_running(cls): 

3112 return cls._service_is_running('linstor-controller') 

3113 

3114 @classmethod 

3115 def _start_controller(cls, start=True): 

3116 return cls._start_service('linstor-controller', start) 

3117 

3118 @staticmethod 

3119 def _start_service(name, start=True): 

3120 action = 'start' if start else 'stop' 

3121 (ret, out, err) = util.doexec([ 

3122 'systemctl', action, name 

3123 ]) 

3124 if ret != 0: 

3125 raise LinstorVolumeManagerError( 

3126 'Failed to {} {}: {} {}' 

3127 .format(action, name, out, err) 

3128 ) 

3129 

3130 @staticmethod 

3131 def _service_is_running(name): 

3132 (ret, out, err) = util.doexec([ 

3133 'systemctl', 'is-active', '--quiet', name 

3134 ]) 

3135 return not ret 

3136 

3137 @staticmethod 

3138 def _is_mounted(mountpoint): 

3139 (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint]) 

3140 return ret == 0 

3141 

3142 @classmethod 

3143 def _mount_volume(cls, volume_path, mountpoint, mount=True): 

3144 if mount: 

3145 try: 

3146 util.pread(['mount', volume_path, mountpoint]) 

3147 except Exception as e: 

3148 raise LinstorVolumeManagerError( 

3149 'Failed to mount volume {} on {}: {}' 

3150 .format(volume_path, mountpoint, e) 

3151 ) 

3152 else: 

3153 try: 

3154 if cls._is_mounted(mountpoint): 

3155 util.pread(['umount', mountpoint]) 

3156 except Exception as e: 

3157 raise LinstorVolumeManagerError( 

3158 'Failed to umount volume {} on {}: {}' 

3159 .format(volume_path, mountpoint, e) 

3160 ) 

3161 

3162 

3163# ============================================================================== 

3164 

3165# Check if a path is a DRBD resource and log the process name/pid 

3166# that opened it. 

3167def log_drbd_openers(path): 

3168 # Ignore if it's not a symlink to DRBD resource. 

3169 if not path.startswith(DRBD_BY_RES_PATH): 

3170 return 

3171 

3172 # Compute resource name. 

3173 res_name_end = path.find('/', len(DRBD_BY_RES_PATH)) 

3174 if res_name_end == -1: 

3175 return 

3176 res_name = path[len(DRBD_BY_RES_PATH):res_name_end] 

3177 

3178 volume_end = path.rfind('/') 

3179 if volume_end == res_name_end: 

3180 return 

3181 volume = path[volume_end + 1:] 

3182 

3183 try: 

3184 # Ensure path is a DRBD. 

3185 drbd_path = os.path.realpath(path) 

3186 stats = os.stat(drbd_path) 

3187 if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147: 

3188 return 

3189 

3190 # Find where the device is open. 

3191 (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name]) 

3192 if ret != 0: 

3193 util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format( 

3194 res_name, stderr 

3195 )) 

3196 return 

3197 

3198 # Is it a local device? 

3199 if stdout.startswith('{} role:Primary'.format(res_name)): 

3200 util.SMlog( 

3201 'DRBD resource `{}` is open on local host: {}' 

3202 .format(path, get_local_volume_openers(res_name, volume)) 

3203 ) 

3204 return 

3205 

3206 # Is it a remote device? 

3207 util.SMlog( 

3208 'DRBD resource `{}` is open on hosts: {}' 

3209 .format(path, get_all_volume_openers(res_name, volume)) 

3210 ) 

3211 except Exception as e: 

3212 util.SMlog( 

3213 'Got exception while trying to determine where DRBD resource ' + 

3214 '`{}` is open: {}'.format(path, e) 

3215 )