Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python3 

2# 

3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr 

4# 

5# This program is free software: you can redistribute it and/or modify 

6# it under the terms of the GNU General Public License as published by 

7# the Free Software Foundation, either version 3 of the License, or 

8# (at your option) any later version. 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU General Public License for more details. 

13# 

14# You should have received a copy of the GNU General Public License 

15# along with this program. If not, see <https://www.gnu.org/licenses/>. 

16 

17from sm_typing import Any, Optional, override 

18 

19from constants import CBTLOG_TAG 

20 

21try: 

22 from linstorcowutil import LinstorCowUtil 

23 from linstorjournaler import LinstorJournaler 

24 from linstorvolumemanager import get_controller_uri 

25 from linstorvolumemanager import get_controller_node_name 

26 from linstorvolumemanager import LinstorVolumeManager 

27 from linstorvolumemanager import LinstorVolumeManagerError 

28 from linstorvolumemanager import PERSISTENT_PREFIX 

29 

30 LINSTOR_AVAILABLE = True 

31except ImportError: 

32 PERSISTENT_PREFIX = 'unknown' 

33 

34 LINSTOR_AVAILABLE = False 

35 

36import blktap2 

37import cleanup 

38import errno 

39import functools 

40import lock 

41import lvutil 

42import os 

43import re 

44import scsiutil 

45import signal 

46import socket 

47import SR 

48import SRCommand 

49import subprocess 

50import sys 

51import time 

52import traceback 

53import util 

54import VDI 

55import xml.etree.ElementTree as xml_parser 

56import xmlrpc.client 

57import xs_errors 

58 

59from cowutil import CowUtil, getImageStringFromVdiType, getVdiTypeFromImageFormat 

60from srmetadata import \ 

61 NAME_LABEL_TAG, NAME_DESCRIPTION_TAG, IS_A_SNAPSHOT_TAG, SNAPSHOT_OF_TAG, \ 

62 TYPE_TAG, VDI_TYPE_TAG, READ_ONLY_TAG, SNAPSHOT_TIME_TAG, \ 

63 METADATA_OF_POOL_TAG 

64from vditype import VdiType 

65 

66HIDDEN_TAG = 'hidden' 

67 

68XHA_CONFIG_PATH = '/etc/xensource/xhad.conf' 

69 

70FORK_LOG_DAEMON = '/opt/xensource/libexec/fork-log-daemon' 

71 

72# This flag can be disabled to debug the DRBD layer. 

73# When this config var is False, the HA can only be used under 

74# specific conditions: 

75# - Only one heartbeat diskless VDI is present in the pool. 

76# - The other hearbeat volumes must be diskful and limited to a maximum of 3. 

77USE_HTTP_NBD_SERVERS = True 

78 

79# Useful flag to trace calls using cProfile. 

80TRACE_PERFS = False 

81 

82# Enable/Disable COW key hash support. 

83USE_KEY_HASH = False 

84 

85# Special volumes. 

86HA_VOLUME_NAME = PERSISTENT_PREFIX + 'ha-statefile' 

87REDO_LOG_VOLUME_NAME = PERSISTENT_PREFIX + 'redo-log' 

88 

89# TODO: Simplify with File SR and LVM SR 

90# Warning: Not the same values than VdiType.*. 

91# These values represents the types given on the command line. 

92CREATE_PARAM_TYPES = { 

93 "raw": VdiType.RAW, 

94 "vhd": VdiType.VHD, 

95 "qcow2": VdiType.QCOW2 

96} 

97 

98# ============================================================================== 

99 

100# TODO: Supports 'VDI_INTRODUCE', 'VDI_RESET_ON_BOOT/2', 'SR_TRIM', 

101# 'VDI_CONFIG_CBT', 'SR_PROBE' 

102 

103CAPABILITIES = [ 

104 'ATOMIC_PAUSE', 

105 'SR_UPDATE', 

106 'VDI_CREATE', 

107 'VDI_DELETE', 

108 'VDI_UPDATE', 

109 'VDI_ATTACH', 

110 'VDI_DETACH', 

111 'VDI_ACTIVATE', 

112 'VDI_DEACTIVATE', 

113 'VDI_CLONE', 

114 'VDI_MIRROR', 

115 'VDI_RESIZE', 

116 'VDI_SNAPSHOT', 

117 'VDI_GENERATE_CONFIG' 

118] 

119 

120CONFIGURATION = [ 

121 ['group-name', 'LVM group name'], 

122 ['redundancy', 'replication count'], 

123 ['provisioning', '"thin" or "thick" are accepted (optional, defaults to thin)'], 

124 ['monitor-db-quorum', 'disable controller when only one host is online (optional, defaults to true)'] 

125] 

126 

127DRIVER_INFO = { 

128 'name': 'LINSTOR resources on XCP-ng', 

129 'description': 'SR plugin which uses Linstor to manage VDIs', 

130 'vendor': 'Vates', 

131 'copyright': '(C) 2020 Vates', 

132 'driver_version': '1.0', 

133 'required_api_version': '1.0', 

134 'capabilities': CAPABILITIES, 

135 'configuration': CONFIGURATION 

136} 

137 

138DRIVER_CONFIG = {'ATTACH_FROM_CONFIG_WITH_TAPDISK': False} 

139 

140OPS_EXCLUSIVE = [ 

141 'sr_create', 'sr_delete', 'sr_attach', 'sr_detach', 'sr_scan', 

142 'sr_update', 'sr_probe', 'vdi_init', 'vdi_create', 'vdi_delete', 

143 'vdi_attach', 'vdi_detach', 'vdi_clone', 'vdi_snapshot', 

144] 

145 

146# ============================================================================== 

147# Misc helpers used by LinstorSR and linstor-thin plugin. 

148# ============================================================================== 

149 

150 

151def attach_thin(session, journaler, linstor, sr_uuid, vdi_uuid): 

152 volume_metadata = linstor.get_volume_metadata(vdi_uuid) 

153 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

154 if not VdiType.isCowImage(vdi_type): 

155 return 

156 

157 device_path = linstor.get_device_path(vdi_uuid) 

158 

159 linstorcowutil = LinstorCowUtil(session, linstor, vdi_type) 

160 

161 # If the virtual COW size is lower than the LINSTOR volume size, 

162 # there is nothing to do. 

163 cow_size = linstorcowutil.compute_volume_size( 

164 linstorcowutil.get_size_virt(vdi_uuid) 

165 ) 

166 

167 volume_info = linstor.get_volume_info(vdi_uuid) 

168 volume_size = volume_info.virtual_size 

169 

170 if cow_size > volume_size: 

171 linstorcowutil.inflate(journaler, vdi_uuid, device_path, cow_size, volume_size) 

172 

173 

174def detach_thin_impl(session, linstor, sr_uuid, vdi_uuid): 

175 volume_metadata = linstor.get_volume_metadata(vdi_uuid) 

176 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

177 if not VdiType.isCowImage(vdi_type): 

178 return 

179 

180 def check_vbd_count(): 

181 vdi_ref = session.xenapi.VDI.get_by_uuid(vdi_uuid) 

182 vbds = session.xenapi.VBD.get_all_records_where( 

183 'field "VDI" = "{}"'.format(vdi_ref) 

184 ) 

185 

186 num_plugged = 0 

187 for vbd_rec in vbds.values(): 

188 if vbd_rec['currently_attached']: 

189 num_plugged += 1 

190 if num_plugged > 1: 

191 raise xs_errors.XenError( 

192 'VDIUnavailable', 

193 opterr='Cannot deflate VDI {}, already used by ' 

194 'at least 2 VBDs'.format(vdi_uuid) 

195 ) 

196 

197 # We can have multiple VBDs attached to a VDI during a VM-template clone. 

198 # So we use a timeout to ensure that we can detach the volume properly. 

199 util.retry(check_vbd_count, maxretry=10, period=1) 

200 

201 device_path = linstor.get_device_path(vdi_uuid) 

202 linstorcowutil = LinstorCowUtil(session, linstor, vdi_type) 

203 new_volume_size = LinstorVolumeManager.round_up_volume_size( 

204 linstorcowutil.get_size_phys(vdi_uuid) 

205 ) 

206 

207 volume_info = linstor.get_volume_info(vdi_uuid) 

208 old_volume_size = volume_info.virtual_size 

209 linstorcowutil.deflate(device_path, new_volume_size, old_volume_size) 

210 

211 

212def detach_thin(session, linstor, sr_uuid, vdi_uuid): 

213 # This function must always return without errors. 

214 # Otherwise it could cause errors in the XAPI regarding the state of the VDI. 

215 # It's why we use this `try` block. 

216 try: 

217 detach_thin_impl(session, linstor, sr_uuid, vdi_uuid) 

218 except Exception as e: 

219 util.SMlog('Failed to detach properly VDI {}: {}'.format(vdi_uuid, e)) 

220 

221 

222def get_ips_from_xha_config_file(): 

223 ips = dict() 

224 host_id = None 

225 try: 

226 # Ensure there is no dirty read problem. 

227 # For example if the HA is reloaded. 

228 tree = util.retry( 

229 lambda: xml_parser.parse(XHA_CONFIG_PATH), 

230 maxretry=10, 

231 period=1 

232 ) 

233 except: 

234 return (None, ips) 

235 

236 def parse_host_nodes(ips, node): 

237 current_id = None 

238 current_ip = None 

239 

240 for sub_node in node: 

241 if sub_node.tag == 'IPaddress': 

242 current_ip = sub_node.text 

243 elif sub_node.tag == 'HostID': 

244 current_id = sub_node.text 

245 else: 

246 continue 

247 

248 if current_id and current_ip: 

249 ips[current_id] = current_ip 

250 return 

251 util.SMlog('Ill-formed XHA file, missing IPaddress or/and HostID') 

252 

253 def parse_common_config(ips, node): 

254 for sub_node in node: 

255 if sub_node.tag == 'host': 

256 parse_host_nodes(ips, sub_node) 

257 

258 def parse_local_config(ips, node): 

259 for sub_node in node: 

260 if sub_node.tag == 'localhost': 

261 for host_node in sub_node: 

262 if host_node.tag == 'HostID': 

263 return host_node.text 

264 

265 for node in tree.getroot(): 

266 if node.tag == 'common-config': 

267 parse_common_config(ips, node) 

268 elif node.tag == 'local-config': 

269 host_id = parse_local_config(ips, node) 

270 else: 

271 continue 

272 

273 if ips and host_id: 

274 break 

275 

276 return (host_id and ips.get(host_id), ips) 

277 

278 

279def activate_lvm_group(group_name): 

280 path = group_name.split('/') 

281 assert path and len(path) <= 2 

282 try: 

283 lvutil.setActiveVG(path[0], True) 

284 except Exception as e: 

285 util.SMlog('Cannot active VG `{}`: {}'.format(path[0], e)) 

286 

287# ============================================================================== 

288 

289# Usage example: 

290# xe sr-create type=linstor name-label=linstor-sr 

291# host-uuid=d2deba7a-c5ad-4de1-9a20-5c8df3343e93 

292# device-config:group-name=vg_loop device-config:redundancy=2 

293 

294 

295class LinstorSR(SR.SR): 

296 DRIVER_TYPE = 'linstor' 

297 

298 PROVISIONING_TYPES = ['thin', 'thick'] 

299 PROVISIONING_DEFAULT = 'thin' 

300 

301 MANAGER_PLUGIN = 'linstor-manager' 

302 

303 INIT_STATUS_NOT_SET = 0 

304 INIT_STATUS_IN_PROGRESS = 1 

305 INIT_STATUS_OK = 2 

306 INIT_STATUS_FAIL = 3 

307 

308 # -------------------------------------------------------------------------- 

309 # SR methods. 

310 # -------------------------------------------------------------------------- 

311 

312 _linstor: Optional[LinstorVolumeManager] = None 

313 

314 @override 

315 @staticmethod 

316 def handles(type) -> bool: 

317 return type == LinstorSR.DRIVER_TYPE 

318 

319 def __init__(self, srcmd, sr_uuid): 

320 SR.SR.__init__(self, srcmd, sr_uuid) 

321 self._init_preferred_image_formats() 

322 

323 @override 

324 def load(self, sr_uuid) -> None: 

325 if not LINSTOR_AVAILABLE: 

326 raise util.SMException( 

327 'Can\'t load LinstorSR: LINSTOR libraries are missing' 

328 ) 

329 

330 # Check parameters. 

331 if 'group-name' not in self.dconf or not self.dconf['group-name']: 

332 raise xs_errors.XenError('LinstorConfigGroupNameMissing') 

333 if 'redundancy' not in self.dconf or not self.dconf['redundancy']: 

334 raise xs_errors.XenError('LinstorConfigRedundancyMissing') 

335 

336 self.driver_config = DRIVER_CONFIG 

337 

338 # Check provisioning config. 

339 provisioning = self.dconf.get('provisioning') 

340 if provisioning: 

341 if provisioning in self.PROVISIONING_TYPES: 

342 self._provisioning = provisioning 

343 else: 

344 raise xs_errors.XenError( 

345 'InvalidArg', 

346 opterr='Provisioning parameter must be one of {}'.format( 

347 self.PROVISIONING_TYPES 

348 ) 

349 ) 

350 else: 

351 self._provisioning = self.PROVISIONING_DEFAULT 

352 

353 monitor_db_quorum = self.dconf.get('monitor-db-quorum') 

354 self._monitor_db_quorum = (monitor_db_quorum is None) or \ 

355 util.strtobool(monitor_db_quorum) 

356 

357 # Note: We don't have access to the session field if the 

358 # 'vdi_attach_from_config' command is executed. 

359 self._has_session = self.sr_ref and self.session is not None 

360 if self._has_session: 

361 self.sm_config = self.session.xenapi.SR.get_sm_config(self.sr_ref) 

362 else: 

363 self.sm_config = self.srcmd.params.get('sr_sm_config') or {} 

364 

365 provisioning = self.sm_config.get('provisioning') 

366 if provisioning in self.PROVISIONING_TYPES: 

367 self._provisioning = provisioning 

368 

369 # Define properties for SR parent class. 

370 self.ops_exclusive = OPS_EXCLUSIVE 

371 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

372 self.lock = lock.Lock(lock.LOCK_TYPE_SR, self.uuid) 

373 self.sr_vditype = SR.DEFAULT_TAP 

374 

375 if self.cmd == 'sr_create': 

376 self._redundancy = int(self.dconf['redundancy']) or 1 

377 self._linstor = None # Ensure that LINSTOR attribute exists. 

378 self._journaler = None 

379 

380 # Used to handle reconnect calls on LINSTOR object attached to the SR. 

381 class LinstorProxy: 

382 def __init__(self, sr: LinstorSR) -> None: 

383 self.sr = sr 

384 

385 def __getattr__(self, attr: str) -> Any: 

386 assert self.sr, "Cannot use `LinstorProxy` without valid `LinstorVolumeManager` instance" 

387 return getattr(self.sr._linstor, attr) 

388 

389 self._linstor_proxy = LinstorProxy(self) 

390 

391 self._group_name = self.dconf['group-name'] 

392 

393 self._vdi_shared_time = 0 

394 

395 self._init_status = self.INIT_STATUS_NOT_SET 

396 

397 self._vdis_loaded = False 

398 self._all_volume_info_cache = None 

399 self._all_volume_metadata_cache = None 

400 

401 # To remove in python 3.10. 

402 # Use directly @staticmethod instead. 

403 @util.conditional_decorator(staticmethod, sys.version_info >= (3, 10, 0)) 

404 def _locked_load(method): 

405 def wrapped_method(self, *args, **kwargs): 

406 self._init_status = self.INIT_STATUS_OK 

407 return method(self, *args, **kwargs) 

408 

409 def load(self, *args, **kwargs): 

410 # Activate all LVMs to make drbd-reactor happy. 

411 if self.srcmd.cmd in ('sr_attach', 'vdi_attach_from_config'): 

412 activate_lvm_group(self._group_name) 

413 

414 if not self._has_session: 

415 if self.srcmd.cmd in ( 

416 'vdi_attach_from_config', 

417 'vdi_detach_from_config', 

418 # When on-slave (is_open) is executed we have an 

419 # empty command. 

420 None 

421 ): 

422 def create_linstor(uri, attempt_count=30): 

423 self._linstor = LinstorVolumeManager( 

424 uri, 

425 self._group_name, 

426 logger=util.SMlog, 

427 attempt_count=attempt_count 

428 ) 

429 

430 controller_uri = get_controller_uri() 

431 if controller_uri: 

432 create_linstor(controller_uri) 

433 else: 

434 def connect(): 

435 # We must have a valid LINSTOR instance here without using 

436 # the XAPI. Fallback with the HA config file. 

437 for ip in get_ips_from_xha_config_file()[1].values(): 

438 controller_uri = 'linstor://' + ip 

439 try: 

440 util.SMlog('Connecting from config to LINSTOR controller using: {}'.format(ip)) 

441 create_linstor(controller_uri, attempt_count=0) 

442 return controller_uri 

443 except: 

444 pass 

445 

446 controller_uri = util.retry(connect, maxretry=30, period=1) 

447 if not controller_uri: 

448 raise xs_errors.XenError( 

449 'SRUnavailable', 

450 opterr='No valid controller URI to attach/detach from config' 

451 ) 

452 

453 self._journaler = LinstorJournaler( 

454 controller_uri, self._group_name, logger=util.SMlog 

455 ) 

456 

457 return wrapped_method(self, *args, **kwargs) 

458 

459 if not self.is_master(): 

460 if self.cmd in [ 

461 'sr_create', 'sr_delete', 'sr_update', 'sr_probe', 

462 'sr_scan', 'vdi_create', 'vdi_delete', 'vdi_resize', 

463 'vdi_snapshot', 'vdi_clone' 

464 ]: 

465 util.SMlog('{} blocked for non-master'.format(self.cmd)) 

466 raise xs_errors.XenError('LinstorMaster') 

467 

468 # Because the LINSTOR KV objects cache all values, we must lock 

469 # the VDI before the LinstorJournaler/LinstorVolumeManager 

470 # instantiation and before any action on the master to avoid a 

471 # bad read. The lock is also necessary to avoid strange 

472 # behaviors if the GC is executed during an action on a slave. 

473 if self.cmd.startswith('vdi_'): 

474 self._shared_lock_vdi(self.srcmd.params['vdi_uuid']) 

475 self._vdi_shared_time = time.time() 

476 

477 if self.srcmd.cmd != 'sr_create' and self.srcmd.cmd != 'sr_detach': 

478 try: 

479 self._reconnect() 

480 except Exception as e: 

481 raise xs_errors.XenError('SRUnavailable', opterr=str(e)) 

482 

483 if self._linstor: 

484 try: 

485 hosts = self._linstor.disconnected_hosts 

486 except Exception as e: 

487 raise xs_errors.XenError('SRUnavailable', opterr=str(e)) 

488 

489 if hosts: 

490 util.SMlog('Failed to join node(s): {}'.format(hosts)) 

491 

492 # Ensure we use a non-locked volume when cowutil is called. 

493 if ( 

494 self.is_master() and self.cmd.startswith('vdi_') and 

495 self.cmd != 'vdi_create' 

496 ): 

497 self._linstor.ensure_volume_is_not_locked( 

498 self.srcmd.params['vdi_uuid'] 

499 ) 

500 

501 try: 

502 # If the command is a SR scan command on the master, 

503 # we must load all VDIs and clean journal transactions. 

504 # We must load the VDIs in the snapshot case too only if 

505 # there is at least one entry in the journal. 

506 # 

507 # If the command is a SR command we want at least to remove 

508 # resourceless volumes. 

509 if self.is_master() and self.cmd not in [ 

510 'vdi_attach', 'vdi_detach', 

511 'vdi_activate', 'vdi_deactivate', 

512 'vdi_epoch_begin', 'vdi_epoch_end', 

513 'vdi_update', 'vdi_destroy' 

514 ]: 

515 load_vdis = ( 

516 self.cmd == 'sr_scan' or 

517 self.cmd == 'sr_attach' 

518 ) or len( 

519 self._journaler.get_all(LinstorJournaler.INFLATE) 

520 ) or len( 

521 self._journaler.get_all(LinstorJournaler.CLONE) 

522 ) 

523 

524 if load_vdis: 

525 self._load_vdis() 

526 

527 self._linstor.remove_resourceless_volumes() 

528 

529 self._synchronize_metadata() 

530 except Exception as e: 

531 if self.cmd == 'sr_scan' or self.cmd == 'sr_attach': 

532 # Always raise, we don't want to remove VDIs 

533 # from the XAPI database otherwise. 

534 raise e 

535 util.SMlog( 

536 'Ignoring exception in LinstorSR.load: {}'.format(e) 

537 ) 

538 util.SMlog(traceback.format_exc()) 

539 

540 return wrapped_method(self, *args, **kwargs) 

541 

542 @functools.wraps(wrapped_method) 

543 def wrap(self, *args, **kwargs): 

544 if self._init_status in \ 

545 (self.INIT_STATUS_OK, self.INIT_STATUS_IN_PROGRESS): 

546 return wrapped_method(self, *args, **kwargs) 

547 if self._init_status == self.INIT_STATUS_FAIL: 

548 util.SMlog( 

549 'Can\'t call method {} because initialization failed' 

550 .format(method) 

551 ) 

552 else: 

553 try: 

554 self._init_status = self.INIT_STATUS_IN_PROGRESS 

555 return load(self, *args, **kwargs) 

556 except Exception: 

557 if self._init_status != self.INIT_STATUS_OK: 

558 self._init_status = self.INIT_STATUS_FAIL 

559 raise 

560 

561 return wrap 

562 

563 @override 

564 def cleanup(self) -> None: 

565 if self._vdi_shared_time: 

566 self._shared_lock_vdi(self.srcmd.params['vdi_uuid'], locked=False) 

567 

568 @override 

569 @_locked_load 

570 def create(self, uuid, size) -> None: 

571 util.SMlog('LinstorSR.create for {}'.format(self.uuid)) 

572 

573 host_adresses = util.get_host_addresses(self.session) 

574 if self._redundancy > len(host_adresses): 

575 raise xs_errors.XenError( 

576 'LinstorSRCreate', 

577 opterr='Redundancy greater than host count' 

578 ) 

579 

580 xenapi = self.session.xenapi 

581 srs = xenapi.SR.get_all_records_where( 

582 'field "type" = "{}"'.format(self.DRIVER_TYPE) 

583 ) 

584 srs = dict([e for e in srs.items() if e[1]['uuid'] != self.uuid]) 

585 

586 for sr in srs.values(): 

587 for pbd in sr['PBDs']: 

588 device_config = xenapi.PBD.get_device_config(pbd) 

589 group_name = device_config.get('group-name') 

590 if group_name and group_name == self._group_name: 

591 raise xs_errors.XenError( 

592 'LinstorSRCreate', 

593 opterr='group name must be unique, already used by PBD {}'.format( 

594 xenapi.PBD.get_uuid(pbd) 

595 ) 

596 ) 

597 

598 if srs: 

599 raise xs_errors.XenError( 

600 'LinstorSRCreate', 

601 opterr='LINSTOR SR must be unique in a pool' 

602 ) 

603 

604 online_hosts = util.get_online_hosts(self.session) 

605 if len(online_hosts) < len(host_adresses): 

606 raise xs_errors.XenError( 

607 'LinstorSRCreate', 

608 opterr='Not enough online hosts' 

609 ) 

610 

611 ips = {} 

612 for host_ref in online_hosts: 

613 record = self.session.xenapi.host.get_record(host_ref) 

614 hostname = record['hostname'] 

615 ips[hostname] = record['address'] 

616 

617 if len(ips) != len(online_hosts): 

618 raise xs_errors.XenError( 

619 'LinstorSRCreate', 

620 opterr='Multiple hosts with same hostname' 

621 ) 

622 

623 # Ensure ports are opened and LINSTOR satellites 

624 # are activated. In the same time the drbd-reactor instances 

625 # must be stopped. 

626 self._prepare_sr_on_all_hosts(self._group_name, enabled=True) 

627 

628 # Create SR. 

629 # Throw if the SR already exists. 

630 try: 

631 self._linstor = LinstorVolumeManager.create_sr( 

632 self._group_name, 

633 ips, 

634 self._redundancy, 

635 thin_provisioning=self._provisioning == 'thin', 

636 auto_quorum=self._monitor_db_quorum, 

637 logger=util.SMlog 

638 ) 

639 

640 util.SMlog( 

641 "Finishing SR creation, enable drbd-reactor on all hosts..." 

642 ) 

643 self._update_drbd_reactor_on_all_hosts(enabled=True) 

644 except Exception as e: 

645 if not self._linstor: 

646 util.SMlog('Failed to create LINSTOR SR: {}'.format(e)) 

647 raise xs_errors.XenError('LinstorSRCreate', opterr=str(e)) 

648 

649 try: 

650 self._linstor.destroy() 

651 except Exception as e2: 

652 util.SMlog( 

653 'Failed to destroy LINSTOR SR after creation fail: {}' 

654 .format(e2) 

655 ) 

656 raise e 

657 

658 @override 

659 @_locked_load 

660 def delete(self, uuid) -> None: 

661 util.SMlog('LinstorSR.delete for {}'.format(self.uuid)) 

662 cleanup.gc_force(self.session, self.uuid) 

663 

664 assert self._linstor 

665 if self.vdis or self._linstor._volumes: 

666 raise xs_errors.XenError('SRNotEmpty') 

667 

668 node_name = get_controller_node_name() 

669 if not node_name: 

670 raise xs_errors.XenError( 

671 'LinstorSRDelete', 

672 opterr='Cannot get controller node name' 

673 ) 

674 

675 host_ref = None 

676 if node_name == 'localhost': 

677 host_ref = util.get_this_host_ref(self.session) 

678 else: 

679 for slave in util.get_all_slaves(self.session): 

680 r_name = self.session.xenapi.host.get_record(slave)['hostname'] 

681 if r_name == node_name: 

682 host_ref = slave 

683 break 

684 

685 if not host_ref: 

686 raise xs_errors.XenError( 

687 'LinstorSRDelete', 

688 opterr='Failed to find host with hostname: {}'.format( 

689 node_name 

690 ) 

691 ) 

692 

693 try: 

694 self._update_drbd_reactor_on_all_hosts( 

695 controller_node_name=node_name, enabled=False 

696 ) 

697 

698 args = { 

699 'groupName': self._group_name, 

700 } 

701 self._exec_manager_command( 

702 host_ref, 'destroy', args, 'LinstorSRDelete' 

703 ) 

704 except Exception as e: 

705 try: 

706 self._update_drbd_reactor_on_all_hosts( 

707 controller_node_name=node_name, enabled=True 

708 ) 

709 except Exception as e2: 

710 util.SMlog( 

711 'Failed to restart drbd-reactor after destroy fail: {}' 

712 .format(e2) 

713 ) 

714 util.SMlog('Failed to delete LINSTOR SR: {}'.format(e)) 

715 raise xs_errors.XenError( 

716 'LinstorSRDelete', 

717 opterr=str(e) 

718 ) 

719 

720 lock.Lock.cleanupAll(self.uuid) 

721 

722 @override 

723 @_locked_load 

724 def update(self, uuid) -> None: 

725 util.SMlog('LinstorSR.update for {}'.format(self.uuid)) 

726 

727 # Well, how can we update a SR if it doesn't exist? :thinking: 

728 if not self._linstor: 

729 raise xs_errors.XenError( 

730 'SRUnavailable', 

731 opterr='no such volume group: {}'.format(self._group_name) 

732 ) 

733 

734 self._update_stats(0) 

735 

736 # Update the SR name and description only in LINSTOR metadata. 

737 xenapi = self.session.xenapi 

738 self._linstor.metadata = { 

739 NAME_LABEL_TAG: util.to_plain_string( 

740 xenapi.SR.get_name_label(self.sr_ref) 

741 ), 

742 NAME_DESCRIPTION_TAG: util.to_plain_string( 

743 xenapi.SR.get_name_description(self.sr_ref) 

744 ) 

745 } 

746 

747 @override 

748 @_locked_load 

749 def attach(self, uuid) -> None: 

750 util.SMlog('LinstorSR.attach for {}'.format(self.uuid)) 

751 

752 if not self._linstor: 

753 raise xs_errors.XenError( 

754 'SRUnavailable', 

755 opterr='no such group: {}'.format(self._group_name) 

756 ) 

757 

758 @override 

759 @_locked_load 

760 def detach(self, uuid) -> None: 

761 util.SMlog('LinstorSR.detach for {}'.format(self.uuid)) 

762 cleanup.abort(self.uuid) 

763 

764 @override 

765 @_locked_load 

766 def probe(self) -> str: 

767 util.SMlog('LinstorSR.probe for {}'.format(self.uuid)) 

768 # TODO 

769 return '' 

770 

771 @override 

772 @_locked_load 

773 def scan(self, uuid) -> None: 

774 if self._init_status == self.INIT_STATUS_FAIL: 

775 return 

776 

777 util.SMlog('LinstorSR.scan for {}'.format(self.uuid)) 

778 if not self._linstor: 

779 raise xs_errors.XenError( 

780 'SRUnavailable', 

781 opterr='no such volume group: {}'.format(self._group_name) 

782 ) 

783 

784 # Note: `scan` can be called outside this module, so ensure the VDIs 

785 # are loaded. 

786 self._load_vdis() 

787 self._update_physical_size() 

788 

789 for vdi_uuid in list(self.vdis.keys()): 

790 if self.vdis[vdi_uuid].deleted: 

791 del self.vdis[vdi_uuid] 

792 

793 # Security to prevent VDIs from being forgotten if the controller 

794 # is started without a shared and mounted /var/lib/linstor path. 

795 try: 

796 self._linstor.get_database_path() 

797 except Exception as e: 

798 # Failed to get database path, ensure we don't have 

799 # VDIs in the XAPI database... 

800 if self.session.xenapi.SR.get_VDIs( 

801 self.session.xenapi.SR.get_by_uuid(self.uuid) 

802 ): 

803 raise xs_errors.XenError( 

804 'SRUnavailable', 

805 opterr='Database is not mounted or node name is invalid ({})'.format(e) 

806 ) 

807 

808 # Update the database before the restart of the GC to avoid 

809 # bad sync in the process if new VDIs have been introduced. 

810 super(LinstorSR, self).scan(self.uuid) 

811 self._kick_gc() 

812 

813 def is_master(self): 

814 if not hasattr(self, '_is_master'): 

815 if 'SRmaster' not in self.dconf: 

816 self._is_master = self.session is not None and util.is_master(self.session) 

817 else: 

818 self._is_master = self.dconf['SRmaster'] == 'true' 

819 

820 return self._is_master 

821 

822 @override 

823 @_locked_load 

824 def vdi(self, uuid) -> VDI.VDI: 

825 return LinstorVDI(self, uuid) 

826 

827 # To remove in python 3.10 

828 # See: https://stackoverflow.com/questions/12718187/python-version-3-9-calling-class-staticmethod-within-the-class-body 

829 _locked_load = staticmethod(_locked_load) 

830 

831 # -------------------------------------------------------------------------- 

832 # Lock. 

833 # -------------------------------------------------------------------------- 

834 

835 def _shared_lock_vdi(self, vdi_uuid, locked=True): 

836 master = util.get_master_ref(self.session) 

837 

838 command = 'lockVdi' 

839 args = { 

840 'groupName': self._group_name, 

841 'srUuid': self.uuid, 

842 'vdiUuid': vdi_uuid, 

843 'locked': str(locked) 

844 } 

845 

846 # Note: We must avoid to unlock the volume if the timeout is reached 

847 # because during volume unlock, the SR lock is not used. Otherwise 

848 # we could destroy a valid lock acquired from another host... 

849 # 

850 # This code is not very clean, the ideal solution would be to acquire 

851 # the SR lock during volume unlock (like lock) but it's not easy 

852 # to implement without impacting performance. 

853 if not locked: 

854 elapsed_time = time.time() - self._vdi_shared_time 

855 timeout = LinstorVolumeManager.LOCKED_EXPIRATION_DELAY * 0.7 

856 if elapsed_time >= timeout: 

857 util.SMlog( 

858 'Avoid unlock call of {} because timeout has been reached' 

859 .format(vdi_uuid) 

860 ) 

861 return 

862 

863 self._exec_manager_command(master, command, args, 'VDIUnavailable') 

864 

865 # -------------------------------------------------------------------------- 

866 # Network. 

867 # -------------------------------------------------------------------------- 

868 

869 def _exec_manager_command(self, host_ref, command, args, error): 

870 host_rec = self.session.xenapi.host.get_record(host_ref) 

871 host_uuid = host_rec['uuid'] 

872 

873 try: 

874 ret = self.session.xenapi.host.call_plugin( 

875 host_ref, self.MANAGER_PLUGIN, command, args 

876 ) 

877 except Exception as e: 

878 util.SMlog( 

879 'call-plugin on {} ({}:{} with {}) raised'.format( 

880 host_uuid, self.MANAGER_PLUGIN, command, args 

881 ) 

882 ) 

883 raise e 

884 

885 util.SMlog( 

886 'call-plugin on {} ({}:{} with {}) returned: {}'.format( 

887 host_uuid, self.MANAGER_PLUGIN, command, args, ret 

888 ) 

889 ) 

890 if ret == 'False': 

891 raise xs_errors.XenError( 

892 error, 

893 opterr='Plugin {} failed'.format(self.MANAGER_PLUGIN) 

894 ) 

895 

896 def _prepare_sr(self, host, group_name, enabled): 

897 self._exec_manager_command( 

898 host, 

899 'prepareSr' if enabled else 'releaseSr', 

900 {'groupName': group_name}, 

901 'SRUnavailable' 

902 ) 

903 

904 def _prepare_sr_on_all_hosts(self, group_name, enabled): 

905 master = util.get_master_ref(self.session) 

906 self._prepare_sr(master, group_name, enabled) 

907 

908 for slave in util.get_all_slaves(self.session): 

909 self._prepare_sr(slave, group_name, enabled) 

910 

911 def _update_drbd_reactor(self, host, enabled): 

912 self._exec_manager_command( 

913 host, 

914 'updateDrbdReactor', 

915 {'enabled': str(enabled)}, 

916 'SRUnavailable' 

917 ) 

918 

919 def _update_drbd_reactor_on_all_hosts( 

920 self, enabled, controller_node_name=None 

921 ): 

922 if controller_node_name == 'localhost': 

923 controller_node_name = self.session.xenapi.host.get_record( 

924 util.get_this_host_ref(self.session) 

925 )['hostname'] 

926 assert controller_node_name 

927 assert controller_node_name != 'localhost' 

928 

929 controller_host = None 

930 secondary_hosts = [] 

931 

932 hosts = self.session.xenapi.host.get_all_records() 

933 for host_ref, host_rec in hosts.items(): 

934 hostname = host_rec['hostname'] 

935 if controller_node_name == hostname: 

936 controller_host = host_ref 

937 else: 

938 secondary_hosts.append((host_ref, hostname)) 

939 

940 action_name = 'Starting' if enabled else 'Stopping' 

941 if controller_node_name and not controller_host: 

942 util.SMlog('Failed to find controller host: `{}`'.format( 

943 controller_node_name 

944 )) 

945 

946 if enabled and controller_host: 

947 util.SMlog('{} drbd-reactor on controller host `{}`...'.format( 

948 action_name, controller_node_name 

949 )) 

950 # If enabled is true, we try to start the controller on the desired 

951 # node name first. 

952 self._update_drbd_reactor(controller_host, enabled) 

953 

954 for host_ref, hostname in secondary_hosts: 

955 util.SMlog('{} drbd-reactor on host {}...'.format( 

956 action_name, hostname 

957 )) 

958 self._update_drbd_reactor(host_ref, enabled) 

959 

960 if not enabled and controller_host: 

961 util.SMlog('{} drbd-reactor on controller host `{}`...'.format( 

962 action_name, controller_node_name 

963 )) 

964 # If enabled is false, we disable the drbd-reactor service of 

965 # the controller host last. Why? Otherwise the linstor-controller 

966 # of other nodes can be started, and we don't want that. 

967 self._update_drbd_reactor(controller_host, enabled) 

968 

969 # -------------------------------------------------------------------------- 

970 # Metadata. 

971 # -------------------------------------------------------------------------- 

972 

973 def _synchronize_metadata_and_xapi(self): 

974 try: 

975 # First synch SR parameters. 

976 self.update(self.uuid) 

977 

978 # Now update the VDI information in the metadata if required. 

979 xenapi = self.session.xenapi 

980 volumes_metadata = self._linstor.get_volumes_with_metadata() 

981 for vdi_uuid, volume_metadata in volumes_metadata.items(): 

982 try: 

983 vdi_ref = xenapi.VDI.get_by_uuid(vdi_uuid) 

984 except Exception: 

985 # May be the VDI is not in XAPI yet dont bother. 

986 continue 

987 

988 label = util.to_plain_string( 

989 xenapi.VDI.get_name_label(vdi_ref) 

990 ) 

991 description = util.to_plain_string( 

992 xenapi.VDI.get_name_description(vdi_ref) 

993 ) 

994 

995 if ( 

996 volume_metadata.get(NAME_LABEL_TAG) != label or 

997 volume_metadata.get(NAME_DESCRIPTION_TAG) != description 

998 ): 

999 self._linstor.update_volume_metadata(vdi_uuid, { 

1000 NAME_LABEL_TAG: label, 

1001 NAME_DESCRIPTION_TAG: description 

1002 }) 

1003 except Exception as e: 

1004 raise xs_errors.XenError( 

1005 'MetadataError', 

1006 opterr='Error synching SR Metadata and XAPI: {}'.format(e) 

1007 ) 

1008 

1009 def _synchronize_metadata(self): 

1010 if not self.is_master(): 

1011 return 

1012 

1013 util.SMlog('Synchronize metadata...') 

1014 if self.cmd == 'sr_attach': 

1015 try: 

1016 util.SMlog( 

1017 'Synchronize SR metadata and the state on the storage.' 

1018 ) 

1019 self._synchronize_metadata_and_xapi() 

1020 except Exception as e: 

1021 util.SMlog('Failed to synchronize metadata: {}'.format(e)) 

1022 

1023 # -------------------------------------------------------------------------- 

1024 # Stats. 

1025 # -------------------------------------------------------------------------- 

1026 

1027 def _update_stats(self, virt_alloc_delta): 

1028 valloc = int(self.session.xenapi.SR.get_virtual_allocation( 

1029 self.sr_ref 

1030 )) 

1031 

1032 # Update size attributes of the SR parent class. 

1033 self.virtual_allocation = valloc + virt_alloc_delta 

1034 

1035 self._update_physical_size() 

1036 

1037 # Notify SR parent class. 

1038 self._db_update() 

1039 

1040 def _update_physical_size(self): 

1041 # We use the size of the smallest disk, this is an approximation that 

1042 # ensures the displayed physical size is reachable by the user. 

1043 (min_physical_size, pool_count) = self._linstor.get_min_physical_size() 

1044 self.physical_size = min_physical_size * pool_count // \ 

1045 self._linstor.redundancy 

1046 

1047 self.physical_utilisation = self._linstor.allocated_volume_size 

1048 

1049 # -------------------------------------------------------------------------- 

1050 # VDIs. 

1051 # -------------------------------------------------------------------------- 

1052 

1053 def _load_vdis(self): 

1054 if self._vdis_loaded: 

1055 return 

1056 

1057 assert self.is_master() 

1058 

1059 # We use a cache to avoid repeated JSON parsing. 

1060 # The performance gain is not big but we can still 

1061 # enjoy it with a few lines. 

1062 self._create_linstor_cache() 

1063 self._load_vdis_ex() 

1064 self._destroy_linstor_cache() 

1065 

1066 # We must mark VDIs as loaded only if the load is a success. 

1067 self._vdis_loaded = True 

1068 

1069 self._undo_all_journal_transactions() 

1070 

1071 def _load_vdis_ex(self): 

1072 # 1. Get existing VDIs in XAPI. 

1073 xenapi = self.session.xenapi 

1074 xapi_vdi_uuids = set() 

1075 for vdi in xenapi.SR.get_VDIs(self.sr_ref): 

1076 xapi_vdi_uuids.add(xenapi.VDI.get_uuid(vdi)) 

1077 

1078 # 2. Get volumes info. 

1079 all_volume_info = self._all_volume_info_cache 

1080 volumes_metadata = self._all_volume_metadata_cache 

1081 

1082 # 3. Get CBT vdis. 

1083 # See: https://support.citrix.com/article/CTX230619 

1084 cbt_vdis = set() 

1085 for volume_metadata in volumes_metadata.values(): 

1086 cbt_uuid = volume_metadata.get(CBTLOG_TAG) 

1087 if cbt_uuid: 

1088 cbt_vdis.add(cbt_uuid) 

1089 

1090 introduce = False 

1091 

1092 # Try to introduce VDIs only during scan/attach. 

1093 if self.cmd == 'sr_scan' or self.cmd == 'sr_attach': 

1094 has_clone_entries = list(self._journaler.get_all( 

1095 LinstorJournaler.CLONE 

1096 ).items()) 

1097 

1098 if has_clone_entries: 

1099 util.SMlog( 

1100 'Cannot introduce VDIs during scan because it exists ' 

1101 'CLONE entries in journaler on SR {}'.format(self.uuid) 

1102 ) 

1103 else: 

1104 introduce = True 

1105 

1106 # 4. Now check all volume info. 

1107 vdi_to_snaps = {} 

1108 for vdi_uuid, volume_info in all_volume_info.items(): 

1109 if vdi_uuid.startswith(cleanup.SR.TMP_RENAME_PREFIX): 

1110 continue 

1111 

1112 # 4.a. Check if the VDI in LINSTOR is in XAPI VDIs. 

1113 if vdi_uuid not in xapi_vdi_uuids: 

1114 if not introduce: 

1115 continue 

1116 

1117 if vdi_uuid.startswith('DELETED_'): 

1118 continue 

1119 

1120 volume_metadata = volumes_metadata.get(vdi_uuid) 

1121 if not volume_metadata: 

1122 util.SMlog( 

1123 'Skipping volume {} because no metadata could be found' 

1124 .format(vdi_uuid) 

1125 ) 

1126 continue 

1127 

1128 util.SMlog( 

1129 'Trying to introduce VDI {} as it is present in ' 

1130 'LINSTOR and not in XAPI...' 

1131 .format(vdi_uuid) 

1132 ) 

1133 

1134 try: 

1135 self._linstor.get_device_path(vdi_uuid) 

1136 except Exception as e: 

1137 util.SMlog( 

1138 'Cannot introduce {}, unable to get path: {}' 

1139 .format(vdi_uuid, e) 

1140 ) 

1141 continue 

1142 

1143 name_label = volume_metadata.get(NAME_LABEL_TAG) or '' 

1144 type = volume_metadata.get(TYPE_TAG) or 'user' 

1145 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

1146 

1147 if not vdi_type: 

1148 util.SMlog( 

1149 'Cannot introduce {} '.format(vdi_uuid) + 

1150 'without vdi_type' 

1151 ) 

1152 continue 

1153 

1154 sm_config = { 

1155 'vdi_type': vdi_type 

1156 } 

1157 

1158 if not VdiType.isCowImage(vdi_type): 

1159 managed = not volume_metadata.get(HIDDEN_TAG) 

1160 else: 

1161 image_info = LinstorCowUtil(self.session, self._linstor, vdi_type).get_info(vdi_uuid) 

1162 managed = not image_info.hidden 

1163 if image_info.parentUuid: 

1164 sm_config['vhd-parent'] = image_info.parentUuid 

1165 

1166 util.SMlog( 

1167 'Introducing VDI {} '.format(vdi_uuid) + 

1168 ' (name={}, virtual_size={}, allocated_size={})'.format( 

1169 name_label, 

1170 volume_info.virtual_size, 

1171 volume_info.allocated_size 

1172 ) 

1173 ) 

1174 

1175 vdi_ref = xenapi.VDI.db_introduce( 

1176 vdi_uuid, 

1177 name_label, 

1178 volume_metadata.get(NAME_DESCRIPTION_TAG) or '', 

1179 self.sr_ref, 

1180 type, 

1181 False, # sharable 

1182 bool(volume_metadata.get(READ_ONLY_TAG)), 

1183 {}, # other_config 

1184 vdi_uuid, # location 

1185 {}, # xenstore_data 

1186 sm_config, 

1187 managed, 

1188 str(volume_info.virtual_size), 

1189 str(volume_info.allocated_size) 

1190 ) 

1191 

1192 is_a_snapshot = volume_metadata.get(IS_A_SNAPSHOT_TAG) 

1193 xenapi.VDI.set_is_a_snapshot(vdi_ref, bool(is_a_snapshot)) 

1194 if is_a_snapshot: 

1195 xenapi.VDI.set_snapshot_time( 

1196 vdi_ref, 

1197 xmlrpc.client.DateTime( 

1198 volume_metadata[SNAPSHOT_TIME_TAG] or 

1199 '19700101T00:00:00Z' 

1200 ) 

1201 ) 

1202 

1203 snap_uuid = volume_metadata[SNAPSHOT_OF_TAG] 

1204 if snap_uuid in vdi_to_snaps: 

1205 vdi_to_snaps[snap_uuid].append(vdi_uuid) 

1206 else: 

1207 vdi_to_snaps[snap_uuid] = [vdi_uuid] 

1208 

1209 # 4.b. Add the VDI in the list. 

1210 vdi = self.vdi(vdi_uuid) 

1211 self.vdis[vdi_uuid] = vdi 

1212 

1213 if USE_KEY_HASH and VdiType.isCowImage(vdi.vdi_type): 

1214 vdi.sm_config_override['key_hash'] = vdi.linstorcowutil.get_key_hash(vdi_uuid) 

1215 

1216 # 4.c. Update CBT status of disks either just added 

1217 # or already in XAPI. 

1218 cbt_uuid = volume_metadata.get(CBTLOG_TAG) 

1219 if cbt_uuid in cbt_vdis: 

1220 vdi_ref = xenapi.VDI.get_by_uuid(vdi_uuid) 

1221 xenapi.VDI.set_cbt_enabled(vdi_ref, True) 

1222 # For existing VDIs, update local state too. 

1223 # Scan in base class SR updates existing VDIs 

1224 # again based on local states. 

1225 self.vdis[vdi_uuid].cbt_enabled = True 

1226 cbt_vdis.remove(cbt_uuid) 

1227 

1228 # 5. Now set the snapshot statuses correctly in XAPI. 

1229 for src_uuid in vdi_to_snaps: 

1230 try: 

1231 src_ref = xenapi.VDI.get_by_uuid(src_uuid) 

1232 except Exception: 

1233 # The source VDI no longer exists, continue. 

1234 continue 

1235 

1236 for snap_uuid in vdi_to_snaps[src_uuid]: 

1237 try: 

1238 # This might fail in cases where its already set. 

1239 snap_ref = xenapi.VDI.get_by_uuid(snap_uuid) 

1240 xenapi.VDI.set_snapshot_of(snap_ref, src_ref) 

1241 except Exception as e: 

1242 util.SMlog('Setting snapshot failed: {}'.format(e)) 

1243 

1244 # TODO: Check correctly how to use CBT. 

1245 # Update cbt_enabled on the right VDI, check LVM/FileSR code. 

1246 

1247 # 6. If we have items remaining in this list, 

1248 # they are cbt_metadata VDI that XAPI doesn't know about. 

1249 # Add them to self.vdis and they'll get added to the DB. 

1250 for cbt_uuid in cbt_vdis: 

1251 new_vdi = self.vdi(cbt_uuid) 

1252 new_vdi.ty = 'cbt_metadata' 

1253 new_vdi.cbt_enabled = True 

1254 self.vdis[cbt_uuid] = new_vdi 

1255 

1256 # 7. Update virtual allocation, build geneology and remove useless VDIs 

1257 self.virtual_allocation = 0 

1258 

1259 # 8. Build geneology. 

1260 geneology = {} 

1261 

1262 for vdi_uuid, vdi in self.vdis.items(): 

1263 if vdi.parent: 

1264 if vdi.parent in self.vdis: 

1265 self.vdis[vdi.parent].read_only = True 

1266 if vdi.parent in geneology: 

1267 geneology[vdi.parent].append(vdi_uuid) 

1268 else: 

1269 geneology[vdi.parent] = [vdi_uuid] 

1270 if not vdi.hidden: 

1271 self.virtual_allocation += vdi.size 

1272 

1273 # 9. Remove all hidden leaf nodes to avoid introducing records that 

1274 # will be GC'ed. 

1275 for vdi_uuid in list(self.vdis.keys()): 

1276 if vdi_uuid not in geneology and self.vdis[vdi_uuid].hidden: 

1277 util.SMlog( 

1278 'Scan found hidden leaf ({}), ignoring'.format(vdi_uuid) 

1279 ) 

1280 del self.vdis[vdi_uuid] 

1281 

1282 # -------------------------------------------------------------------------- 

1283 # Journals. 

1284 # -------------------------------------------------------------------------- 

1285 

1286 def _get_vdi_path_and_parent(self, vdi_uuid, volume_name): 

1287 try: 

1288 device_path = self._linstor.build_device_path(volume_name) 

1289 if not util.pathexists(device_path): 

1290 return (None, None) 

1291 

1292 # If it's a RAW VDI, there is no parent. 

1293 volume_metadata = self._linstor.get_volume_metadata(vdi_uuid) 

1294 vdi_type = volume_metadata[VDI_TYPE_TAG] 

1295 if not VdiType.isCowImage(vdi_type): 

1296 return (device_path, None) 

1297 

1298 # Otherwise it's a COW and a parent can exist. 

1299 linstorcowutil = LinstorCowUtil(self.session, self._linstor, vdi_type) 

1300 if linstorcowutil.check(vdi_uuid) != CowUtil.CheckResult.Success: 

1301 return (None, None) 

1302 

1303 image_info = linstorcowutil.get_info(vdi_uuid) 

1304 if image_info: 

1305 return (device_path, image_info.parentUuid) 

1306 except Exception as e: 

1307 util.SMlog( 

1308 'Failed to get VDI path and parent, ignoring: {}' 

1309 .format(e) 

1310 ) 

1311 return (None, None) 

1312 

1313 def _undo_all_journal_transactions(self): 

1314 util.SMlog('Undoing all journal transactions...') 

1315 self.lock.acquire() 

1316 try: 

1317 self._handle_interrupted_inflate_ops() 

1318 self._handle_interrupted_clone_ops() 

1319 pass 

1320 finally: 

1321 self.lock.release() 

1322 

1323 def _handle_interrupted_inflate_ops(self): 

1324 transactions = self._journaler.get_all(LinstorJournaler.INFLATE) 

1325 for vdi_uuid, old_size in transactions.items(): 

1326 self._handle_interrupted_inflate(vdi_uuid, old_size) 

1327 self._journaler.remove(LinstorJournaler.INFLATE, vdi_uuid) 

1328 

1329 def _handle_interrupted_clone_ops(self): 

1330 transactions = self._journaler.get_all(LinstorJournaler.CLONE) 

1331 for vdi_uuid, old_size in transactions.items(): 

1332 self._handle_interrupted_clone(vdi_uuid, old_size) 

1333 self._journaler.remove(LinstorJournaler.CLONE, vdi_uuid) 

1334 

1335 def _handle_interrupted_inflate(self, vdi_uuid, old_size): 

1336 util.SMlog( 

1337 '*** INTERRUPTED INFLATE OP: for {} ({})' 

1338 .format(vdi_uuid, old_size) 

1339 ) 

1340 

1341 vdi = self.vdis.get(vdi_uuid) 

1342 if not vdi: 

1343 util.SMlog('Cannot deflate missing VDI {}'.format(vdi_uuid)) 

1344 return 

1345 

1346 assert not self._all_volume_info_cache 

1347 volume_info = self._linstor.get_volume_info(vdi_uuid) 

1348 

1349 current_size = volume_info.virtual_size 

1350 assert current_size > 0 

1351 vdi.linstorcowutil.force_deflate(vdi.path, old_size, current_size, zeroize=True) 

1352 

1353 def _handle_interrupted_clone( 

1354 self, vdi_uuid, clone_info, force_undo=False 

1355 ): 

1356 util.SMlog( 

1357 '*** INTERRUPTED CLONE OP: for {} ({})' 

1358 .format(vdi_uuid, clone_info) 

1359 ) 

1360 

1361 base_uuid, snap_uuid = clone_info.split('_') 

1362 

1363 # Use LINSTOR data because new VDIs may not be in the XAPI. 

1364 volume_names = self._linstor.get_volumes_with_name() 

1365 

1366 # Check if we don't have a base VDI. (If clone failed at startup.) 

1367 if base_uuid not in volume_names: 

1368 if vdi_uuid in volume_names: 

1369 util.SMlog('*** INTERRUPTED CLONE OP: nothing to do') 

1370 return 

1371 raise util.SMException( 

1372 'Base copy {} not present, but no original {} found' 

1373 .format(base_uuid, vdi_uuid) 

1374 ) 

1375 

1376 if force_undo: 

1377 util.SMlog('Explicit revert') 

1378 self._undo_clone( 

1379 volume_names, vdi_uuid, base_uuid, snap_uuid 

1380 ) 

1381 return 

1382 

1383 # If VDI or snap uuid is missing... 

1384 if vdi_uuid not in volume_names or \ 

1385 (snap_uuid and snap_uuid not in volume_names): 

1386 util.SMlog('One or both leaves missing => revert') 

1387 self._undo_clone(volume_names, vdi_uuid, base_uuid, snap_uuid) 

1388 return 

1389 

1390 vdi_path, vdi_parent_uuid = self._get_vdi_path_and_parent( 

1391 vdi_uuid, volume_names[vdi_uuid] 

1392 ) 

1393 snap_path, snap_parent_uuid = self._get_vdi_path_and_parent( 

1394 snap_uuid, volume_names[snap_uuid] 

1395 ) 

1396 

1397 if not vdi_path or (snap_uuid and not snap_path): 

1398 util.SMlog('One or both leaves invalid (and path(s)) => revert') 

1399 self._undo_clone(volume_names, vdi_uuid, base_uuid, snap_uuid) 

1400 return 

1401 

1402 util.SMlog('Leaves valid but => revert') 

1403 self._undo_clone(volume_names, vdi_uuid, base_uuid, snap_uuid) 

1404 

1405 def _undo_clone(self, volume_names, vdi_uuid, base_uuid, snap_uuid): 

1406 base_path = self._linstor.build_device_path(volume_names[base_uuid]) 

1407 base_metadata = self._linstor.get_volume_metadata(base_uuid) 

1408 base_type = base_metadata[VDI_TYPE_TAG] 

1409 

1410 if not util.pathexists(base_path): 

1411 util.SMlog('Base not found! Exit...') 

1412 util.SMlog('*** INTERRUPTED CLONE OP: rollback fail') 

1413 return 

1414 

1415 linstorcowutil = LinstorCowUtil(self.session, self._linstor, base_type) 

1416 

1417 # Un-hide the parent. 

1418 self._linstor.update_volume_metadata(base_uuid, {READ_ONLY_TAG: False}) 

1419 if VdiType.isCowImage(base_type): 

1420 image_info = linstorcowutil.get_info(base_uuid, False) 

1421 if image_info.hidden: 

1422 linstorcowutil.set_hidden(base_path, False) 

1423 elif base_metadata.get(HIDDEN_TAG): 

1424 self._linstor.update_volume_metadata( 

1425 base_uuid, {HIDDEN_TAG: False} 

1426 ) 

1427 

1428 # Remove the child nodes. 

1429 if snap_uuid and snap_uuid in volume_names: 

1430 util.SMlog('Destroying snap {}...'.format(snap_uuid)) 

1431 

1432 try: 

1433 self._linstor.destroy_volume(snap_uuid) 

1434 except Exception as e: 

1435 util.SMlog( 

1436 'Cannot destroy snap {} during undo clone: {}' 

1437 .format(snap_uuid, e) 

1438 ) 

1439 

1440 if vdi_uuid in volume_names: 

1441 try: 

1442 util.SMlog('Destroying {}...'.format(vdi_uuid)) 

1443 self._linstor.destroy_volume(vdi_uuid) 

1444 except Exception as e: 

1445 util.SMlog( 

1446 'Cannot destroy VDI {} during undo clone: {}' 

1447 .format(vdi_uuid, e) 

1448 ) 

1449 # We can get an exception like this: 

1450 # "Shutdown of the DRBD resource 'XXX failed", so the 

1451 # volume info remains... The problem is we can't rename 

1452 # properly the base VDI below this line, so we must change the 

1453 # UUID of this bad VDI before. 

1454 self._linstor.update_volume_uuid( 

1455 vdi_uuid, 'DELETED_' + vdi_uuid, force=True 

1456 ) 

1457 

1458 # Rename! 

1459 self._linstor.update_volume_uuid(base_uuid, vdi_uuid) 

1460 

1461 # Inflate to the right size. 

1462 if VdiType.isCowImage(base_type): 

1463 vdi = self.vdi(vdi_uuid) 

1464 linstorcowutil = LinstorCowUtil(self.session, self._linstor, vdi.vdi_type) 

1465 volume_size = linstorcowutil.compute_volume_size(vdi.size) 

1466 linstorcowutil.inflate( 

1467 self._journaler, vdi_uuid, vdi.path, 

1468 volume_size, vdi.capacity 

1469 ) 

1470 self.vdis[vdi_uuid] = vdi 

1471 

1472 # At this stage, tapdisk and SM vdi will be in paused state. Remove 

1473 # flag to facilitate vm deactivate. 

1474 vdi_ref = self.session.xenapi.VDI.get_by_uuid(vdi_uuid) 

1475 self.session.xenapi.VDI.remove_from_sm_config(vdi_ref, 'paused') 

1476 

1477 util.SMlog('*** INTERRUPTED CLONE OP: rollback success') 

1478 

1479 # -------------------------------------------------------------------------- 

1480 # Cache. 

1481 # -------------------------------------------------------------------------- 

1482 

1483 def _create_linstor_cache(self): 

1484 reconnect = False 

1485 

1486 def create_cache(): 

1487 nonlocal reconnect 

1488 try: 

1489 if reconnect: 

1490 self._reconnect() 

1491 return self._linstor.get_volumes_with_info() 

1492 except Exception as e: 

1493 reconnect = True 

1494 raise e 

1495 

1496 self._all_volume_metadata_cache = \ 

1497 self._linstor.get_volumes_with_metadata() 

1498 self._all_volume_info_cache = util.retry( 

1499 create_cache, 

1500 maxretry=10, 

1501 period=3 

1502 ) 

1503 

1504 def _destroy_linstor_cache(self): 

1505 self._all_volume_info_cache = None 

1506 self._all_volume_metadata_cache = None 

1507 

1508 # -------------------------------------------------------------------------- 

1509 # Misc. 

1510 # -------------------------------------------------------------------------- 

1511 

1512 def _reconnect(self): 

1513 controller_uri = get_controller_uri() 

1514 

1515 self._journaler = LinstorJournaler( 

1516 controller_uri, self._group_name, logger=util.SMlog 

1517 ) 

1518 

1519 # Try to open SR if exists. 

1520 # We can repair only if we are on the master AND if 

1521 # we are trying to execute an exclusive operation. 

1522 # Otherwise we could try to delete a VDI being created or 

1523 # during a snapshot. An exclusive op is the guarantee that 

1524 # the SR is locked. 

1525 self._linstor = LinstorVolumeManager( 

1526 controller_uri, 

1527 self._group_name, 

1528 repair=( 

1529 self.is_master() and 

1530 self.srcmd.cmd in self.ops_exclusive 

1531 ), 

1532 logger=util.SMlog 

1533 ) 

1534 

1535 def _ensure_space_available(self, amount_needed): 

1536 space_available = self._linstor.max_volume_size_allowed 

1537 if (space_available < amount_needed): 

1538 util.SMlog( 

1539 'Not enough space! Free space: {}, need: {}'.format( 

1540 space_available, amount_needed 

1541 ) 

1542 ) 

1543 raise xs_errors.XenError('SRNoSpace') 

1544 

1545 def _kick_gc(self): 

1546 util.SMlog('Kicking GC') 

1547 cleanup.start_gc_service(self.uuid) 

1548 

1549# ============================================================================== 

1550# LinstorSr VDI 

1551# ============================================================================== 

1552 

1553 

1554class LinstorVDI(VDI.VDI): 

1555 # -------------------------------------------------------------------------- 

1556 # VDI methods. 

1557 # -------------------------------------------------------------------------- 

1558 

1559 @override 

1560 def load(self, vdi_uuid) -> None: 

1561 self._lock = self.sr.lock 

1562 self._exists = True 

1563 self._linstor = self.sr._linstor 

1564 

1565 # Update hidden parent property. 

1566 self.hidden = False 

1567 

1568 def raise_bad_load(e): 

1569 util.SMlog( 

1570 'Got exception in LinstorVDI.load: {}'.format(e) 

1571 ) 

1572 util.SMlog(traceback.format_exc()) 

1573 raise xs_errors.XenError( 

1574 'VDIUnavailable', 

1575 opterr='Could not load {} because: {}'.format(self.uuid, e) 

1576 ) 

1577 

1578 # Try to load VDI. 

1579 try: 

1580 if ( 

1581 self.sr.srcmd.cmd == 'vdi_attach_from_config' or 

1582 self.sr.srcmd.cmd == 'vdi_detach_from_config' 

1583 ): 

1584 self._set_type(VdiType.RAW) 

1585 self.path = self.sr.srcmd.params['vdi_path'] 

1586 else: 

1587 self._determine_type_and_path() 

1588 self._load_this() 

1589 

1590 util.SMlog('VDI {} loaded! (path={}, hidden={})'.format( 

1591 self.uuid, self.path, self.hidden 

1592 )) 

1593 except LinstorVolumeManagerError as e: 

1594 # 1. It may be a VDI deletion. 

1595 if e.code == LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

1596 if self.sr.srcmd.cmd == 'vdi_delete': 

1597 self.deleted = True 

1598 return 

1599 

1600 # 2. Or maybe a creation. 

1601 if self.sr.srcmd.cmd == 'vdi_create': 

1602 self._key_hash = None # Only used in create. 

1603 

1604 self._exists = False 

1605 vdi_sm_config = self.sr.srcmd.params.get('vdi_sm_config') 

1606 if vdi_sm_config: 

1607 image_format = vdi_sm_config.get('image-format') or vdi_sm_config.get('type') 

1608 if image_format: 

1609 try: 

1610 self._set_type(CREATE_PARAM_TYPES[image_format]) 

1611 except: 

1612 raise xs_errors.XenError('VDICreate', opterr='bad image format') 

1613 

1614 if not self.vdi_type: 

1615 self._set_type(getVdiTypeFromImageFormat(self.sr.preferred_image_formats[0])) 

1616 

1617 if VdiType.isCowImage(self.vdi_type): 

1618 self._key_hash = vdi_sm_config.get('key_hash') 

1619 

1620 # For the moment we don't have a path. 

1621 self._update_device_name(None) 

1622 return 

1623 raise_bad_load(e) 

1624 except Exception as e: 

1625 raise_bad_load(e) 

1626 

1627 @override 

1628 def create(self, sr_uuid, vdi_uuid, size) -> str: 

1629 # Usage example: 

1630 # xe vdi-create sr-uuid=39a5826b-5a90-73eb-dd09-51e3a116f937 

1631 # name-label="linstor-vdi-1" virtual-size=4096MiB sm-config:type=vhd 

1632 

1633 # 1. Check if we are on the master and if the VDI doesn't exist. 

1634 util.SMlog('LinstorVDI.create for {}'.format(self.uuid)) 

1635 if self._exists: 

1636 raise xs_errors.XenError('VDIExists') 

1637 

1638 assert self.uuid 

1639 assert self.ty 

1640 assert self.vdi_type 

1641 

1642 # 2. Compute size and check space available. 

1643 size = self.linstorcowutil.cowutil.validateAndRoundImageSize(int(size)) 

1644 volume_size = self.linstorcowutil.compute_volume_size(size) 

1645 util.SMlog( 

1646 'LinstorVDI.create: type={}, cow-size={}, volume-size={}' 

1647 .format(self.vdi_type, size, volume_size) 

1648 ) 

1649 self.sr._ensure_space_available(volume_size) 

1650 

1651 # 3. Set sm_config attribute of VDI parent class. 

1652 self.sm_config = self.sr.srcmd.params['vdi_sm_config'] 

1653 

1654 # 4. Create! 

1655 failed = False 

1656 try: 

1657 volume_name = None 

1658 if self.ty == 'ha_statefile': 

1659 volume_name = HA_VOLUME_NAME 

1660 elif self.ty == 'redo_log': 

1661 volume_name = REDO_LOG_VOLUME_NAME 

1662 

1663 self._linstor.create_volume( 

1664 self.uuid, 

1665 volume_size, 

1666 persistent=False, 

1667 volume_name=volume_name, 

1668 high_availability=volume_name is not None 

1669 ) 

1670 volume_info = self._linstor.get_volume_info(self.uuid) 

1671 

1672 self._update_device_name(volume_info.name) 

1673 

1674 if not VdiType.isCowImage(self.vdi_type): 

1675 self.size = volume_info.virtual_size 

1676 else: 

1677 self.linstorcowutil.create( 

1678 self.path, size, False, self.linstorcowutil.cowutil.getDefaultPreallocationSizeVirt() 

1679 ) 

1680 self.size = self.linstorcowutil.get_size_virt(self.uuid) 

1681 

1682 if self._key_hash: 

1683 self.linstorcowutil.set_key(self.path, self._key_hash) 

1684 

1685 # Because cowutil commands modify the volume data, 

1686 # we must retrieve a new time the utilization size. 

1687 volume_info = self._linstor.get_volume_info(self.uuid) 

1688 

1689 volume_metadata = { 

1690 NAME_LABEL_TAG: util.to_plain_string(self.label), 

1691 NAME_DESCRIPTION_TAG: util.to_plain_string(self.description), 

1692 IS_A_SNAPSHOT_TAG: False, 

1693 SNAPSHOT_OF_TAG: '', 

1694 SNAPSHOT_TIME_TAG: '', 

1695 TYPE_TAG: self.ty, 

1696 VDI_TYPE_TAG: self.vdi_type, 

1697 READ_ONLY_TAG: bool(self.read_only), 

1698 METADATA_OF_POOL_TAG: '' 

1699 } 

1700 self._linstor.set_volume_metadata(self.uuid, volume_metadata) 

1701 

1702 # Set the open timeout to 1min to reduce CPU usage 

1703 # in http-disk-server when a secondary server tries to open 

1704 # an already opened volume. 

1705 if self.ty == 'ha_statefile' or self.ty == 'redo_log': 

1706 self._linstor.set_auto_promote_timeout(self.uuid, 600) 

1707 

1708 self._linstor.mark_volume_as_persistent(self.uuid) 

1709 except util.CommandException as e: 

1710 failed = True 

1711 raise xs_errors.XenError( 

1712 'VDICreate', opterr='error {}'.format(e.code) 

1713 ) 

1714 except Exception as e: 

1715 failed = True 

1716 raise xs_errors.XenError('VDICreate', opterr='error {}'.format(e)) 

1717 finally: 

1718 if failed: 

1719 util.SMlog('Unable to create VDI {}'.format(self.uuid)) 

1720 try: 

1721 self._linstor.destroy_volume(self.uuid) 

1722 except Exception as e: 

1723 util.SMlog( 

1724 'Ignoring exception after fail in LinstorVDI.create: ' 

1725 '{}'.format(e) 

1726 ) 

1727 

1728 self.utilisation = volume_info.allocated_size 

1729 self.sm_config['vdi_type'] = self.vdi_type 

1730 self.sm_config['image-format'] = getImageStringFromVdiType(self.vdi_type) 

1731 

1732 self.ref = self._db_introduce() 

1733 self.sr._update_stats(self.size) 

1734 

1735 return VDI.VDI.get_params(self) 

1736 

1737 @override 

1738 def delete(self, sr_uuid, vdi_uuid, data_only=False) -> None: 

1739 util.SMlog('LinstorVDI.delete for {}'.format(self.uuid)) 

1740 if self.attached: 

1741 raise xs_errors.XenError('VDIInUse') 

1742 

1743 if self.deleted: 

1744 return super(LinstorVDI, self).delete( 

1745 sr_uuid, vdi_uuid, data_only 

1746 ) 

1747 

1748 vdi_ref = self.sr.srcmd.params['vdi_ref'] 

1749 if not self.session.xenapi.VDI.get_managed(vdi_ref): 

1750 raise xs_errors.XenError( 

1751 'VDIDelete', 

1752 opterr='Deleting non-leaf node not permitted' 

1753 ) 

1754 

1755 try: 

1756 # Remove from XAPI and delete from LINSTOR. 

1757 self._linstor.destroy_volume(self.uuid) 

1758 if not data_only: 

1759 self._db_forget() 

1760 

1761 self.sr.lock.cleanupAll(vdi_uuid) 

1762 except Exception as e: 

1763 util.SMlog( 

1764 'Failed to remove the volume (maybe is leaf coalescing) ' 

1765 'for {} err: {}'.format(self.uuid, e) 

1766 ) 

1767 

1768 try: 

1769 raise xs_errors.XenError('VDIDelete', opterr=str(e)) 

1770 except LinstorVolumeManagerError as e: 

1771 if e.code != LinstorVolumeManagerError.ERR_VOLUME_DESTROY: 

1772 raise xs_errors.XenError('VDIDelete', opterr=str(e)) 

1773 

1774 return 

1775 

1776 if self.uuid in self.sr.vdis: 

1777 del self.sr.vdis[self.uuid] 

1778 

1779 # TODO: Check size after delete. 

1780 self.sr._update_stats(-self.size) 

1781 self.sr._kick_gc() 

1782 return super(LinstorVDI, self).delete(sr_uuid, vdi_uuid, data_only) 

1783 

1784 @override 

1785 def attach(self, sr_uuid, vdi_uuid) -> str: 

1786 util.SMlog('LinstorVDI.attach for {}'.format(self.uuid)) 

1787 attach_from_config = self.sr.srcmd.cmd == 'vdi_attach_from_config' 

1788 if ( 

1789 not attach_from_config or 

1790 self.sr.srcmd.params['vdi_uuid'] != self.uuid 

1791 ) and self.sr._journaler.has_entries(self.uuid): 

1792 raise xs_errors.XenError( 

1793 'VDIUnavailable', 

1794 opterr='Interrupted operation detected on this VDI, ' 

1795 'scan SR first to trigger auto-repair' 

1796 ) 

1797 

1798 writable = 'args' not in self.sr.srcmd.params or \ 

1799 self.sr.srcmd.params['args'][0] == 'true' 

1800 

1801 if not attach_from_config or self.sr.is_master(): 

1802 # We need to inflate the volume if we don't have enough place 

1803 # to mount the COW image. I.e. the volume capacity must be greater 

1804 # than the COW size + bitmap size. 

1805 need_inflate = True 

1806 if ( 

1807 not VdiType.isCowImage(self.vdi_type) or 

1808 not writable or 

1809 self.capacity >= self.linstorcowutil.compute_volume_size(self.size) 

1810 ): 

1811 need_inflate = False 

1812 

1813 if need_inflate: 

1814 try: 

1815 self._prepare_thin(True) 

1816 except Exception as e: 

1817 raise xs_errors.XenError( 

1818 'VDIUnavailable', 

1819 opterr='Failed to attach VDI during "prepare thin": {}' 

1820 .format(e) 

1821 ) 

1822 

1823 if not hasattr(self, 'xenstore_data'): 

1824 self.xenstore_data = {} 

1825 self.xenstore_data['storage-type'] = LinstorSR.DRIVER_TYPE 

1826 

1827 if ( 

1828 USE_HTTP_NBD_SERVERS and 

1829 attach_from_config and 

1830 self.path.startswith('/dev/http-nbd/') 

1831 ): 

1832 return self._attach_using_http_nbd() 

1833 

1834 # Ensure we have a path... 

1835 self.linstorcowutil.create_chain_paths(self.uuid, readonly=not writable) 

1836 

1837 self.attached = True 

1838 return VDI.VDI.attach(self, self.sr.uuid, self.uuid) 

1839 

1840 @override 

1841 def detach(self, sr_uuid, vdi_uuid) -> None: 

1842 util.SMlog('LinstorVDI.detach for {}'.format(self.uuid)) 

1843 detach_from_config = self.sr.srcmd.cmd == 'vdi_detach_from_config' 

1844 self.attached = False 

1845 

1846 if detach_from_config and self.path.startswith('/dev/http-nbd/'): 

1847 return self._detach_using_http_nbd() 

1848 

1849 if not VdiType.isCowImage(self.vdi_type): 

1850 return 

1851 

1852 # The VDI is already deflated if the COW image size + metadata is 

1853 # equal to the LINSTOR volume size. 

1854 volume_size = self.linstorcowutil.compute_volume_size(self.size) 

1855 already_deflated = self.capacity <= volume_size 

1856 

1857 if already_deflated: 

1858 util.SMlog( 

1859 'VDI {} already deflated (old volume size={}, volume size={})' 

1860 .format(self.uuid, self.capacity, volume_size) 

1861 ) 

1862 

1863 need_deflate = True 

1864 if already_deflated: 

1865 need_deflate = False 

1866 elif self.sr._provisioning == 'thick': 

1867 need_deflate = False 

1868 

1869 vdi_ref = self.sr.srcmd.params['vdi_ref'] 

1870 if self.session.xenapi.VDI.get_is_a_snapshot(vdi_ref): 

1871 need_deflate = True 

1872 

1873 if need_deflate: 

1874 try: 

1875 self._prepare_thin(False) 

1876 except Exception as e: 

1877 raise xs_errors.XenError( 

1878 'VDIUnavailable', 

1879 opterr='Failed to detach VDI during "prepare thin": {}' 

1880 .format(e) 

1881 ) 

1882 

1883 # We remove only on slaves because the volume can be used by the GC. 

1884 if self.sr.is_master(): 

1885 return 

1886 

1887 while vdi_uuid: 

1888 try: 

1889 path = self._linstor.build_device_path(self._linstor.get_volume_name(vdi_uuid)) 

1890 parent_vdi_uuid = self.sr.linstorcowutil.get_info(vdi_uuid).parentUuid 

1891 except Exception: 

1892 break 

1893 

1894 if util.pathexists(path): 

1895 try: 

1896 self._linstor.remove_volume_if_diskless(vdi_uuid) 

1897 except Exception as e: 

1898 # Ensure we can always detach properly. 

1899 # I don't want to corrupt the XAPI info. 

1900 util.SMlog('Failed to clean VDI {} during detach: {}'.format(vdi_uuid, e)) 

1901 vdi_uuid = parent_vdi_uuid 

1902 

1903 @override 

1904 def resize(self, sr_uuid, vdi_uuid, size) -> str: 

1905 util.SMlog('LinstorVDI.resize for {}'.format(self.uuid)) 

1906 if not self.sr.is_master(): 

1907 raise xs_errors.XenError( 

1908 'VDISize', 

1909 opterr='resize on slave not allowed' 

1910 ) 

1911 

1912 if self.hidden: 

1913 raise xs_errors.XenError('VDIUnavailable', opterr='hidden VDI') 

1914 

1915 # Compute the virtual COW and DRBD volume size. 

1916 size = self.linstorcowutil.cowutil.validateAndRoundImageSize(int(size)) 

1917 volume_size = self.linstorcowutil.compute_volume_size(size) 

1918 util.SMlog( 

1919 'LinstorVDI.resize: type={}, cow-size={}, volume-size={}' 

1920 .format(self.vdi_type, size, volume_size) 

1921 ) 

1922 

1923 if size < self.size: 

1924 util.SMlog( 

1925 'vdi_resize: shrinking not supported: ' 

1926 '(current size: {}, new size: {})'.format(self.size, size) 

1927 ) 

1928 raise xs_errors.XenError('VDISize', opterr='shrinking not allowed') 

1929 

1930 if size == self.size: 

1931 return VDI.VDI.get_params(self) 

1932 

1933 if not VdiType.isCowImage(self.vdi_type): 

1934 old_volume_size = self.size 

1935 new_volume_size = LinstorVolumeManager.round_up_volume_size(size) 

1936 else: 

1937 old_volume_size = self.utilisation 

1938 if self.sr._provisioning == 'thin': 

1939 # VDI is currently deflated, so keep it deflated. 

1940 new_volume_size = old_volume_size 

1941 else: 

1942 new_volume_size = self.linstorcowutil.compute_volume_size(size) 

1943 assert new_volume_size >= old_volume_size 

1944 

1945 space_needed = new_volume_size - old_volume_size 

1946 self.sr._ensure_space_available(space_needed) 

1947 

1948 old_size = self.size 

1949 if not VdiType.isCowImage(self.vdi_type): 

1950 self._linstor.resize(self.uuid, new_volume_size) 

1951 else: 

1952 if new_volume_size != old_volume_size: 

1953 self.sr.linstorcowutil.inflate( 

1954 self.sr._journaler, self.uuid, self.path, 

1955 new_volume_size, old_volume_size 

1956 ) 

1957 self.sr.linstorcowutil.set_size_virt_fast(self.path, size) 

1958 

1959 # Reload size attributes. 

1960 self._load_this() 

1961 

1962 vdi_ref = self.sr.srcmd.params['vdi_ref'] 

1963 self.session.xenapi.VDI.set_virtual_size(vdi_ref, str(self.size)) 

1964 self.session.xenapi.VDI.set_physical_utilisation( 

1965 vdi_ref, str(self.utilisation) 

1966 ) 

1967 self.sr._update_stats(self.size - old_size) 

1968 return VDI.VDI.get_params(self) 

1969 

1970 @override 

1971 def clone(self, sr_uuid, vdi_uuid) -> str: 

1972 return self._do_snapshot(sr_uuid, vdi_uuid, VDI.SNAPSHOT_DOUBLE) 

1973 

1974 @override 

1975 def compose(self, sr_uuid, vdi1, vdi2) -> None: 

1976 util.SMlog('VDI.compose for {} -> {}'.format(vdi2, vdi1)) 

1977 if not VdiType.isCowImage(self.vdi_type): 

1978 raise xs_errors.XenError('Unimplemented') 

1979 

1980 parent_uuid = vdi1 

1981 parent_path = self._linstor.get_device_path(parent_uuid) 

1982 

1983 # We must pause tapdisk to correctly change the parent. Otherwise we 

1984 # have a readonly error. 

1985 # See: https://github.com/xapi-project/xen-api/blob/b3169a16d36dae0654881b336801910811a399d9/ocaml/xapi/storage_migrate.ml#L928-L929 

1986 # and: https://github.com/xapi-project/xen-api/blob/b3169a16d36dae0654881b336801910811a399d9/ocaml/xapi/storage_migrate.ml#L775 

1987 

1988 if not blktap2.VDI.tap_pause(self.session, self.sr.uuid, self.uuid): 

1989 raise util.SMException('Failed to pause VDI {}'.format(self.uuid)) 

1990 try: 

1991 self.sr.linstorcowutil.set_parent(self.path, parent_path, False) 

1992 self.sr.linstorcowutil.set_hidden(parent_path) 

1993 self.sr.session.xenapi.VDI.set_managed( 

1994 self.sr.srcmd.params['args'][0], False 

1995 ) 

1996 finally: 

1997 blktap2.VDI.tap_unpause(self.session, self.sr.uuid, self.uuid) 

1998 

1999 if not blktap2.VDI.tap_refresh(self.session, self.sr.uuid, self.uuid): 

2000 raise util.SMException( 

2001 'Failed to refresh VDI {}'.format(self.uuid) 

2002 ) 

2003 

2004 util.SMlog('Compose done') 

2005 

2006 @override 

2007 def generate_config(self, sr_uuid, vdi_uuid) -> str: 

2008 """ 

2009 Generate the XML config required to attach and activate 

2010 a VDI for use when XAPI is not running. Attach and 

2011 activation is handled by vdi_attach_from_config below. 

2012 """ 

2013 

2014 util.SMlog('LinstorVDI.generate_config for {}'.format(self.uuid)) 

2015 

2016 resp = {} 

2017 resp['device_config'] = self.sr.dconf 

2018 resp['sr_uuid'] = sr_uuid 

2019 resp['vdi_uuid'] = self.uuid 

2020 resp['sr_sm_config'] = self.sr.sm_config 

2021 resp['command'] = 'vdi_attach_from_config' 

2022 

2023 # By default, we generate a normal config. 

2024 # But if the disk is persistent, we must use a HTTP/NBD 

2025 # server to ensure we can always write or read data. 

2026 # Why? DRBD is unsafe when used with more than 4 hosts: 

2027 # We are limited to use 1 diskless and 3 full. 

2028 # We can't increase this limitation, so we use a NBD/HTTP device 

2029 # instead. 

2030 volume_name = self._linstor.get_volume_name(self.uuid) 

2031 if not USE_HTTP_NBD_SERVERS or volume_name not in [ 

2032 HA_VOLUME_NAME, REDO_LOG_VOLUME_NAME 

2033 ]: 

2034 if not self.path or not util.pathexists(self.path): 

2035 available = False 

2036 # Try to refresh symlink path... 

2037 try: 

2038 self.path = self._linstor.get_device_path(vdi_uuid) 

2039 available = util.pathexists(self.path) 

2040 except Exception: 

2041 pass 

2042 if not available: 

2043 raise xs_errors.XenError('VDIUnavailable') 

2044 

2045 resp['vdi_path'] = self.path 

2046 else: 

2047 # Axiom: DRBD device is present on at least one host. 

2048 resp['vdi_path'] = '/dev/http-nbd/' + volume_name 

2049 

2050 config = xmlrpc.client.dumps(tuple([resp]), 'vdi_attach_from_config') 

2051 return xmlrpc.client.dumps((config,), "", True) 

2052 

2053 @override 

2054 def attach_from_config(self, sr_uuid, vdi_uuid) -> str: 

2055 """ 

2056 Attach and activate a VDI using config generated by 

2057 vdi_generate_config above. This is used for cases such as 

2058 the HA state-file and the redo-log. 

2059 """ 

2060 

2061 util.SMlog('LinstorVDI.attach_from_config for {}'.format(vdi_uuid)) 

2062 

2063 try: 

2064 if not util.pathexists(self.sr.path): 

2065 self.sr.attach(sr_uuid) 

2066 

2067 if not DRIVER_CONFIG['ATTACH_FROM_CONFIG_WITH_TAPDISK']: 

2068 return self.attach(sr_uuid, vdi_uuid) 

2069 except Exception: 

2070 util.logException('LinstorVDI.attach_from_config') 

2071 raise xs_errors.XenError( 

2072 'SRUnavailable', 

2073 opterr='Unable to attach from config' 

2074 ) 

2075 return '' 

2076 

2077 def reset_leaf(self, sr_uuid, vdi_uuid): 

2078 if not VdiType.isCowImage(self.vdi_type): 

2079 raise xs_errors.XenError('Unimplemented') 

2080 

2081 if not self.linstorcowutil.has_parent(self.uuid): 

2082 raise util.SMException( 

2083 'ERROR: VDI {} has no parent, will not reset contents' 

2084 .format(self.uuid) 

2085 ) 

2086 

2087 self.linstorcowutil.kill_data(self.path) 

2088 

2089 def _load_this(self): 

2090 volume_metadata = None 

2091 if self.sr._all_volume_metadata_cache: 

2092 volume_metadata = self.sr._all_volume_metadata_cache.get(self.uuid) 

2093 if volume_metadata is None: 

2094 volume_metadata = self._linstor.get_volume_metadata(self.uuid) 

2095 

2096 volume_info = None 

2097 if self.sr._all_volume_info_cache: 

2098 volume_info = self.sr._all_volume_info_cache.get(self.uuid) 

2099 if volume_info is None: 

2100 volume_info = self._linstor.get_volume_info(self.uuid) 

2101 

2102 # Contains the max physical size used on a disk. 

2103 # When LINSTOR LVM driver is used, the size should be similar to 

2104 # virtual size (i.e. the LINSTOR max volume size). 

2105 # When LINSTOR Thin LVM driver is used, the used physical size should 

2106 # be lower than virtual size at creation. 

2107 # The physical size increases after each write in a new block. 

2108 self.utilisation = volume_info.allocated_size 

2109 self.capacity = volume_info.virtual_size 

2110 

2111 if not VdiType.isCowImage(self.vdi_type): 

2112 self.hidden = int(volume_metadata.get(HIDDEN_TAG) or 0) 

2113 self.size = volume_info.virtual_size 

2114 self.parent = '' 

2115 else: 

2116 image_info = self.sr.linstorcowutil.get_info(self.uuid) 

2117 self.hidden = image_info.hidden 

2118 self.size = image_info.sizeVirt 

2119 self.parent = image_info.parentUuid 

2120 

2121 if self.hidden: 

2122 self.managed = False 

2123 

2124 self.label = volume_metadata.get(NAME_LABEL_TAG) or '' 

2125 self.description = volume_metadata.get(NAME_DESCRIPTION_TAG) or '' 

2126 

2127 # Update sm_config_override of VDI parent class. 

2128 self.sm_config_override = {'vhd-parent': self.parent or None} 

2129 

2130 def _mark_hidden(self, hidden=True): 

2131 if self.hidden == hidden: 

2132 return 

2133 

2134 if VdiType.isCowImage(self.vdi_type): 

2135 self.linstorcowutil.set_hidden(self.path, hidden) 

2136 else: 

2137 self._linstor.update_volume_metadata(self.uuid, { 

2138 HIDDEN_TAG: hidden 

2139 }) 

2140 self.hidden = hidden 

2141 

2142 @override 

2143 def update(self, sr_uuid, vdi_uuid) -> None: 

2144 xenapi = self.session.xenapi 

2145 vdi_ref = xenapi.VDI.get_by_uuid(self.uuid) 

2146 

2147 volume_metadata = { 

2148 NAME_LABEL_TAG: util.to_plain_string( 

2149 xenapi.VDI.get_name_label(vdi_ref) 

2150 ), 

2151 NAME_DESCRIPTION_TAG: util.to_plain_string( 

2152 xenapi.VDI.get_name_description(vdi_ref) 

2153 ) 

2154 } 

2155 

2156 try: 

2157 self._linstor.update_volume_metadata(self.uuid, volume_metadata) 

2158 except LinstorVolumeManagerError as e: 

2159 if e.code == LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

2160 raise xs_errors.XenError( 

2161 'VDIUnavailable', 

2162 opterr='LINSTOR volume {} not found'.format(self.uuid) 

2163 ) 

2164 raise xs_errors.XenError('VDIUnavailable', opterr=str(e)) 

2165 

2166 # -------------------------------------------------------------------------- 

2167 # Thin provisioning. 

2168 # -------------------------------------------------------------------------- 

2169 

2170 def _prepare_thin(self, attach): 

2171 if self.sr.is_master(): 

2172 if attach: 

2173 attach_thin( 

2174 self.session, self.sr._journaler, self._linstor, 

2175 self.sr.uuid, self.uuid 

2176 ) 

2177 else: 

2178 detach_thin( 

2179 self.session, self._linstor, self.sr.uuid, self.uuid 

2180 ) 

2181 else: 

2182 fn = 'attach' if attach else 'detach' 

2183 

2184 master = util.get_master_ref(self.session) 

2185 

2186 args = { 

2187 'groupName': self.sr._group_name, 

2188 'srUuid': self.sr.uuid, 

2189 'vdiUuid': self.uuid 

2190 } 

2191 

2192 try: 

2193 self.sr._exec_manager_command(master, fn, args, 'VDIUnavailable') 

2194 except Exception: 

2195 if fn != 'detach': 

2196 raise 

2197 

2198 # Reload size attrs after inflate or deflate! 

2199 self._load_this() 

2200 self.sr._update_physical_size() 

2201 

2202 vdi_ref = self.sr.srcmd.params['vdi_ref'] 

2203 self.session.xenapi.VDI.set_physical_utilisation( 

2204 vdi_ref, str(self.utilisation) 

2205 ) 

2206 

2207 self.session.xenapi.SR.set_physical_utilisation( 

2208 self.sr.sr_ref, str(self.sr.physical_utilisation) 

2209 ) 

2210 

2211 # -------------------------------------------------------------------------- 

2212 # Generic helpers. 

2213 # -------------------------------------------------------------------------- 

2214 

2215 def _set_type(self, vdi_type: str) -> None: 

2216 self.vdi_type = vdi_type 

2217 self.linstorcowutil = LinstorCowUtil(self.session, self.sr._linstor_proxy, self.vdi_type) 

2218 

2219 def _determine_type_and_path(self): 

2220 """ 

2221 Determine whether this is a RAW or a COW VDI. 

2222 """ 

2223 

2224 # 1. Check vdi_ref and vdi_type in config. 

2225 try: 

2226 vdi_ref = self.session.xenapi.VDI.get_by_uuid(self.uuid) 

2227 if vdi_ref: 

2228 sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref) 

2229 vdi_type = sm_config.get('vdi_type') 

2230 if vdi_type: 

2231 # Update parent fields. 

2232 self._set_type(vdi_type) 

2233 self.sm_config_override = sm_config 

2234 self._update_device_name( 

2235 self._linstor.get_volume_name(self.uuid) 

2236 ) 

2237 return 

2238 except Exception: 

2239 pass 

2240 

2241 # 2. Otherwise use the LINSTOR volume manager directly. 

2242 # It's probably a new VDI created via snapshot. 

2243 volume_metadata = self._linstor.get_volume_metadata(self.uuid) 

2244 self._set_type(volume_metadata.get(VDI_TYPE_TAG)) 

2245 if not self.vdi_type: 

2246 raise xs_errors.XenError( 

2247 'VDIUnavailable', 

2248 opterr='failed to get vdi_type in metadata' 

2249 ) 

2250 self._update_device_name(self._linstor.get_volume_name(self.uuid)) 

2251 

2252 def _update_device_name(self, device_name): 

2253 self._device_name = device_name 

2254 

2255 # Mark path of VDI parent class. 

2256 if device_name: 

2257 self.path = self._linstor.build_device_path(self._device_name) 

2258 else: 

2259 self.path = None 

2260 

2261 def _create_snapshot(self, snap_vdi_type, snap_uuid, snap_of_uuid=None): 

2262 """ 

2263 Snapshot self and return the snapshot VDI object. 

2264 """ 

2265 

2266 # 1. Create a new LINSTOR volume with the same size than self. 

2267 snap_path = self._linstor.shallow_clone_volume( 

2268 self.uuid, snap_uuid, persistent=False 

2269 ) 

2270 

2271 # 2. Write the snapshot content. 

2272 is_raw = (self.vdi_type == VdiType.RAW) 

2273 self.linstorcowutil.snapshot( 

2274 snap_path, self.path, is_raw, max(self.size, self.linstorcowutil.cowutil.getDefaultPreallocationSizeVirt()) 

2275 ) 

2276 

2277 # 3. Get snapshot parent. 

2278 snap_parent = self.linstorcowutil.get_parent(snap_uuid) 

2279 

2280 # 4. Update metadata. 

2281 util.SMlog('Set VDI {} metadata of snapshot'.format(snap_uuid)) 

2282 volume_metadata = { 

2283 NAME_LABEL_TAG: util.to_plain_string(self.label), 

2284 NAME_DESCRIPTION_TAG: util.to_plain_string(self.description), 

2285 IS_A_SNAPSHOT_TAG: bool(snap_of_uuid), 

2286 SNAPSHOT_OF_TAG: snap_of_uuid, 

2287 SNAPSHOT_TIME_TAG: '', 

2288 TYPE_TAG: self.ty, 

2289 VDI_TYPE_TAG: snap_vdi_type, 

2290 READ_ONLY_TAG: False, 

2291 METADATA_OF_POOL_TAG: '' 

2292 } 

2293 self._linstor.set_volume_metadata(snap_uuid, volume_metadata) 

2294 

2295 # 5. Set size. 

2296 snap_vdi = LinstorVDI(self.sr, snap_uuid) 

2297 if not snap_vdi._exists: 

2298 raise xs_errors.XenError('VDISnapshot') 

2299 

2300 volume_info = self._linstor.get_volume_info(snap_uuid) 

2301 

2302 snap_vdi.size = self.linstorcowutil.get_size_virt(snap_uuid) 

2303 snap_vdi.utilisation = volume_info.allocated_size 

2304 

2305 # 6. Update sm config. 

2306 snap_vdi.sm_config = {} 

2307 snap_vdi.sm_config['vdi_type'] = snap_vdi.vdi_type 

2308 if snap_parent: 

2309 snap_vdi.sm_config['vhd-parent'] = snap_parent 

2310 snap_vdi.parent = snap_parent 

2311 

2312 snap_vdi.label = self.label 

2313 snap_vdi.description = self.description 

2314 

2315 self._linstor.mark_volume_as_persistent(snap_uuid) 

2316 

2317 return snap_vdi 

2318 

2319 # -------------------------------------------------------------------------- 

2320 # Implement specific SR methods. 

2321 # -------------------------------------------------------------------------- 

2322 

2323 @override 

2324 def _rename(self, oldpath, newpath) -> None: 

2325 # TODO: I'm not sure... Used by CBT. 

2326 volume_uuid = self._linstor.get_volume_uuid_from_device_path(oldpath) 

2327 self._linstor.update_volume_name(volume_uuid, newpath) 

2328 

2329 @override 

2330 def _do_snapshot(self, sr_uuid, vdi_uuid, snapType, 

2331 cloneOp=False, secondary=None, cbtlog=None) -> str: 

2332 # If cbt enabled, save file consistency state. 

2333 if cbtlog is not None: 

2334 if blktap2.VDI.tap_status(self.session, vdi_uuid): 

2335 consistency_state = False 

2336 else: 

2337 consistency_state = True 

2338 util.SMlog( 

2339 'Saving log consistency state of {} for vdi: {}' 

2340 .format(consistency_state, vdi_uuid) 

2341 ) 

2342 else: 

2343 consistency_state = None 

2344 

2345 if not VdiType.isCowImage(self.vdi_type): 

2346 raise xs_errors.XenError('Unimplemented') 

2347 

2348 if not blktap2.VDI.tap_pause(self.session, sr_uuid, vdi_uuid): 

2349 raise util.SMException('Failed to pause VDI {}'.format(vdi_uuid)) 

2350 try: 

2351 return self._snapshot(snapType, cbtlog, consistency_state) 

2352 finally: 

2353 self.disable_leaf_on_secondary(vdi_uuid, secondary=secondary) 

2354 blktap2.VDI.tap_unpause(self.session, sr_uuid, vdi_uuid, secondary) 

2355 

2356 def _snapshot(self, snap_type, cbtlog=None, cbt_consistency=None): 

2357 util.SMlog( 

2358 'LinstorVDI._snapshot for {} (type {})' 

2359 .format(self.uuid, snap_type) 

2360 ) 

2361 

2362 # 1. Checks... 

2363 if self.hidden: 

2364 raise xs_errors.XenError('VDIClone', opterr='hidden VDI') 

2365 

2366 snap_vdi_type = self.sr._get_snap_vdi_type(self.vdi_type, self.size) 

2367 

2368 depth = self.linstorcowutil.get_depth(self.uuid) 

2369 if depth == -1: 

2370 raise xs_errors.XenError( 

2371 'VDIUnavailable', 

2372 opterr='failed to get COW depth' 

2373 ) 

2374 elif depth >= self.linstorcowutil.cowutil.getMaxChainLength(): 

2375 raise xs_errors.XenError('SnapshotChainTooLong') 

2376 

2377 # Ensure we have a valid path if we don't have a local diskful. 

2378 self.linstorcowutil.create_chain_paths(self.uuid, readonly=True) 

2379 

2380 volume_path = self.path 

2381 if not util.pathexists(volume_path): 

2382 raise xs_errors.XenError( 

2383 'EIO', 

2384 opterr='IO error checking path {}'.format(volume_path) 

2385 ) 

2386 

2387 # 2. Create base and snap uuid (if required) and a journal entry. 

2388 base_uuid = util.gen_uuid() 

2389 snap_uuid = None 

2390 

2391 if snap_type == VDI.SNAPSHOT_DOUBLE: 

2392 snap_uuid = util.gen_uuid() 

2393 

2394 clone_info = '{}_{}'.format(base_uuid, snap_uuid) 

2395 

2396 active_uuid = self.uuid 

2397 self.sr._journaler.create( 

2398 LinstorJournaler.CLONE, active_uuid, clone_info 

2399 ) 

2400 

2401 try: 

2402 # 3. Self becomes the new base. 

2403 # The device path remains the same. 

2404 self._linstor.update_volume_uuid(self.uuid, base_uuid) 

2405 self.uuid = base_uuid 

2406 self.location = self.uuid 

2407 self.read_only = True 

2408 self.managed = False 

2409 

2410 # 4. Create snapshots (new active and snap). 

2411 active_vdi = self._create_snapshot(snap_vdi_type, active_uuid) 

2412 

2413 snap_vdi = None 

2414 if snap_type == VDI.SNAPSHOT_DOUBLE: 

2415 snap_vdi = self._create_snapshot(snap_vdi_type, snap_uuid, active_uuid) 

2416 

2417 self.label = 'base copy' 

2418 self.description = '' 

2419 

2420 # 5. Mark the base VDI as hidden so that it does not show up 

2421 # in subsequent scans. 

2422 self._mark_hidden() 

2423 self._linstor.update_volume_metadata( 

2424 self.uuid, {READ_ONLY_TAG: True} 

2425 ) 

2426 

2427 # 6. We must update the new active VDI with the "paused" and 

2428 # "host_" properties. Why? Because the original VDI has been 

2429 # paused and we we must unpause it after the snapshot. 

2430 # See: `tap_unpause` in `blktap2.py`. 

2431 vdi_ref = self.session.xenapi.VDI.get_by_uuid(active_uuid) 

2432 sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref) 

2433 for key in [x for x in sm_config.keys() if x == 'paused' or x.startswith('host_')]: 

2434 active_vdi.sm_config[key] = sm_config[key] 

2435 

2436 # 7. Verify parent locator field of both children and 

2437 # delete base if unused. 

2438 introduce_parent = True 

2439 try: 

2440 snap_parent = None 

2441 if snap_vdi: 

2442 snap_parent = snap_vdi.parent 

2443 

2444 if active_vdi.parent != self.uuid and ( 

2445 snap_type == VDI.SNAPSHOT_SINGLE or 

2446 snap_type == VDI.SNAPSHOT_INTERNAL or 

2447 snap_parent != self.uuid 

2448 ): 

2449 util.SMlog( 

2450 'Destroy unused base volume: {} (path={})' 

2451 .format(self.uuid, self.path) 

2452 ) 

2453 introduce_parent = False 

2454 self._linstor.destroy_volume(self.uuid) 

2455 except Exception as e: 

2456 util.SMlog('Ignoring exception: {}'.format(e)) 

2457 pass 

2458 

2459 # 8. Introduce the new VDI records. 

2460 if snap_vdi: 

2461 # If the parent is encrypted set the key_hash for the 

2462 # new snapshot disk. 

2463 vdi_ref = self.sr.srcmd.params['vdi_ref'] 

2464 sm_config = self.session.xenapi.VDI.get_sm_config(vdi_ref) 

2465 # TODO: Maybe remove key_hash support. 

2466 if 'key_hash' in sm_config: 

2467 snap_vdi.sm_config['key_hash'] = sm_config['key_hash'] 

2468 # If we have CBT enabled on the VDI, 

2469 # set CBT status for the new snapshot disk. 

2470 if cbtlog: 

2471 snap_vdi.cbt_enabled = True 

2472 

2473 if snap_vdi: 

2474 snap_vdi_ref = snap_vdi._db_introduce() 

2475 util.SMlog( 

2476 'vdi_clone: introduced VDI: {} ({})' 

2477 .format(snap_vdi_ref, snap_vdi.uuid) 

2478 ) 

2479 if introduce_parent: 

2480 base_vdi_ref = self._db_introduce() 

2481 self.session.xenapi.VDI.set_managed(base_vdi_ref, False) 

2482 util.SMlog( 

2483 'vdi_clone: introduced VDI: {} ({})' 

2484 .format(base_vdi_ref, self.uuid) 

2485 ) 

2486 self._linstor.update_volume_metadata(self.uuid, { 

2487 NAME_LABEL_TAG: util.to_plain_string(self.label), 

2488 NAME_DESCRIPTION_TAG: util.to_plain_string( 

2489 self.description 

2490 ), 

2491 READ_ONLY_TAG: True, 

2492 METADATA_OF_POOL_TAG: '' 

2493 }) 

2494 

2495 # 9. Update cbt files if user created snapshot (SNAPSHOT_DOUBLE) 

2496 if snap_type == VDI.SNAPSHOT_DOUBLE and cbtlog: 

2497 try: 

2498 self._cbt_snapshot(snap_uuid, cbt_consistency) 

2499 except Exception: 

2500 # CBT operation failed. 

2501 # TODO: Implement me. 

2502 raise 

2503 

2504 if snap_type != VDI.SNAPSHOT_INTERNAL: 

2505 self.sr._update_stats(self.size) 

2506 

2507 # 10. Return info on the new user-visible leaf VDI. 

2508 ret_vdi = snap_vdi 

2509 if not ret_vdi: 

2510 ret_vdi = self 

2511 if not ret_vdi: 

2512 ret_vdi = active_vdi 

2513 

2514 vdi_ref = self.sr.srcmd.params['vdi_ref'] 

2515 self.session.xenapi.VDI.set_sm_config( 

2516 vdi_ref, active_vdi.sm_config 

2517 ) 

2518 except Exception: 

2519 util.logException('Failed to snapshot!') 

2520 try: 

2521 self.sr._handle_interrupted_clone( 

2522 active_uuid, clone_info, force_undo=True 

2523 ) 

2524 self.sr._journaler.remove(LinstorJournaler.CLONE, active_uuid) 

2525 except Exception as clean_error: 

2526 util.SMlog( 

2527 'WARNING: Failed to clean up failed snapshot: {}' 

2528 .format(clean_error) 

2529 ) 

2530 raise xs_errors.XenError('VDIClone', opterr=str(e)) 

2531 

2532 self.sr._journaler.remove(LinstorJournaler.CLONE, active_uuid) 

2533 

2534 return ret_vdi.get_params() 

2535 

2536 @staticmethod 

2537 def _start_persistent_http_server(volume_name): 

2538 pid_path = None 

2539 http_server = None 

2540 

2541 try: 

2542 if volume_name == HA_VOLUME_NAME: 

2543 port = '8076' 

2544 else: 

2545 port = '8077' 

2546 

2547 try: 

2548 # Use a timeout call because XAPI may be unusable on startup 

2549 # or if the host has been ejected. So in this case the call can 

2550 # block indefinitely. 

2551 session = util.timeout_call(5, util.get_localAPI_session) 

2552 host_ip = util.get_this_host_address(session) 

2553 except: 

2554 # Fallback using the XHA file if session not available. 

2555 host_ip, _ = get_ips_from_xha_config_file() 

2556 if not host_ip: 

2557 raise Exception( 

2558 'Cannot start persistent HTTP server: no XAPI session, nor XHA config file' 

2559 ) 

2560 

2561 arguments = [ 

2562 'http-disk-server', 

2563 '--disk', 

2564 '/dev/drbd/by-res/{}/0'.format(volume_name), 

2565 '--ip', 

2566 host_ip, 

2567 '--port', 

2568 port 

2569 ] 

2570 

2571 util.SMlog('Starting {} on port {}...'.format(arguments[0], port)) 

2572 http_server = subprocess.Popen( 

2573 [FORK_LOG_DAEMON] + arguments, 

2574 stdout=subprocess.PIPE, 

2575 stderr=subprocess.STDOUT, 

2576 universal_newlines=True, 

2577 # Ensure we use another group id to kill this process without 

2578 # touch the current one. 

2579 preexec_fn=os.setsid 

2580 ) 

2581 

2582 pid_path = '/run/http-server-{}.pid'.format(volume_name) 

2583 with open(pid_path, 'w') as pid_file: 

2584 pid_file.write(str(http_server.pid)) 

2585 

2586 reg_server_ready = re.compile("Server ready!$") 

2587 def is_ready(): 

2588 while http_server.poll() is None: 

2589 line = http_server.stdout.readline() 

2590 if reg_server_ready.search(line): 

2591 return True 

2592 return False 

2593 try: 

2594 if not util.timeout_call(10, is_ready): 

2595 raise Exception('Failed to wait HTTP server startup, bad output') 

2596 except util.TimeoutException: 

2597 raise Exception('Failed to wait for HTTP server startup during given delay') 

2598 except Exception as e: 

2599 if pid_path: 

2600 try: 

2601 os.remove(pid_path) 

2602 except Exception: 

2603 pass 

2604 

2605 if http_server: 

2606 # Kill process and children in this case... 

2607 try: 

2608 os.killpg(os.getpgid(http_server.pid), signal.SIGTERM) 

2609 except: 

2610 pass 

2611 

2612 raise xs_errors.XenError( 

2613 'VDIUnavailable', 

2614 opterr='Failed to start http-server: {}'.format(e) 

2615 ) 

2616 

2617 def _start_persistent_nbd_server(self, volume_name): 

2618 pid_path = None 

2619 nbd_path = None 

2620 nbd_server = None 

2621 

2622 try: 

2623 # We use a precomputed device size. 

2624 # So if the XAPI is modified, we must update these values! 

2625 if volume_name == HA_VOLUME_NAME: 

2626 # See: https://github.com/xapi-project/xen-api/blob/703479fa448a8d7141954bb6e8964d8e25c4ac2e/ocaml/xapi/xha_statefile.ml#L32-L37 

2627 port = '8076' 

2628 device_size = 4 * 1024 * 1024 

2629 else: 

2630 # See: https://github.com/xapi-project/xen-api/blob/703479fa448a8d7141954bb6e8964d8e25c4ac2e/ocaml/database/redo_log.ml#L41-L44 

2631 port = '8077' 

2632 device_size = 256 * 1024 * 1024 

2633 

2634 try: 

2635 session = util.timeout_call(5, util.get_localAPI_session) 

2636 ips = util.get_host_addresses(session) 

2637 except Exception as e: 

2638 _, ips = get_ips_from_xha_config_file() 

2639 if not ips: 

2640 raise Exception( 

2641 'Cannot start persistent NBD server: no XAPI session, nor XHA config file ({})'.format(e) 

2642 ) 

2643 ips = ips.values() 

2644 

2645 arguments = [ 

2646 'nbd-http-server', 

2647 '--socket-path', 

2648 '/run/{}.socket'.format(volume_name), 

2649 '--nbd-name', 

2650 volume_name, 

2651 '--urls', 

2652 ','.join(['http://' + ip + ':' + port for ip in ips]), 

2653 '--device-size', 

2654 str(device_size) 

2655 ] 

2656 

2657 util.SMlog('Starting {} using port {}...'.format(arguments[0], port)) 

2658 nbd_server = subprocess.Popen( 

2659 [FORK_LOG_DAEMON] + arguments, 

2660 stdout=subprocess.PIPE, 

2661 stderr=subprocess.STDOUT, 

2662 universal_newlines=True, 

2663 # Ensure we use another group id to kill this process without 

2664 # touch the current one. 

2665 preexec_fn=os.setsid 

2666 ) 

2667 

2668 pid_path = '/run/nbd-server-{}.pid'.format(volume_name) 

2669 with open(pid_path, 'w') as pid_file: 

2670 pid_file.write(str(nbd_server.pid)) 

2671 

2672 reg_nbd_path = re.compile("NBD `(/dev/nbd[0-9]+)` is now attached.$") 

2673 def get_nbd_path(): 

2674 while nbd_server.poll() is None: 

2675 line = nbd_server.stdout.readline() 

2676 match = reg_nbd_path.search(line) 

2677 if match: 

2678 return match.group(1) 

2679 # Use a timeout to never block the smapi if there is a problem. 

2680 try: 

2681 nbd_path = util.timeout_call(10, get_nbd_path) 

2682 if nbd_path is None: 

2683 raise Exception('Empty NBD path (NBD server is probably dead)') 

2684 except util.TimeoutException: 

2685 raise Exception('Unable to read NBD path') 

2686 

2687 util.SMlog('Create symlink: {} -> {}'.format(self.path, nbd_path)) 

2688 os.symlink(nbd_path, self.path) 

2689 except Exception as e: 

2690 if pid_path: 

2691 try: 

2692 os.remove(pid_path) 

2693 except Exception: 

2694 pass 

2695 

2696 if nbd_path: 

2697 try: 

2698 os.remove(nbd_path) 

2699 except Exception: 

2700 pass 

2701 

2702 if nbd_server: 

2703 # Kill process and children in this case... 

2704 try: 

2705 os.killpg(os.getpgid(nbd_server.pid), signal.SIGTERM) 

2706 except: 

2707 pass 

2708 

2709 raise xs_errors.XenError( 

2710 'VDIUnavailable', 

2711 opterr='Failed to start nbd-server: {}'.format(e) 

2712 ) 

2713 

2714 @classmethod 

2715 def _kill_persistent_server(self, type, volume_name, sig): 

2716 try: 

2717 path = '/run/{}-server-{}.pid'.format(type, volume_name) 

2718 if not os.path.exists(path): 

2719 return 

2720 

2721 pid = None 

2722 with open(path, 'r') as pid_file: 

2723 try: 

2724 pid = int(pid_file.read()) 

2725 except Exception: 

2726 pass 

2727 

2728 if pid is not None and util.check_pid_exists(pid): 

2729 util.SMlog('Kill {} server {} (pid={})'.format(type, path, pid)) 

2730 try: 

2731 os.killpg(os.getpgid(pid), sig) 

2732 except Exception as e: 

2733 util.SMlog('Failed to kill {} server: {}'.format(type, e)) 

2734 

2735 os.remove(path) 

2736 except: 

2737 pass 

2738 

2739 @classmethod 

2740 def _kill_persistent_http_server(self, volume_name, sig=signal.SIGTERM): 

2741 return self._kill_persistent_server('nbd', volume_name, sig) 

2742 

2743 @classmethod 

2744 def _kill_persistent_nbd_server(self, volume_name, sig=signal.SIGTERM): 

2745 return self._kill_persistent_server('http', volume_name, sig) 

2746 

2747 def _check_http_nbd_volume_name(self): 

2748 volume_name = self.path[14:] 

2749 if volume_name not in [ 

2750 HA_VOLUME_NAME, REDO_LOG_VOLUME_NAME 

2751 ]: 

2752 raise xs_errors.XenError( 

2753 'VDIUnavailable', 

2754 opterr='Unsupported path: {}'.format(self.path) 

2755 ) 

2756 return volume_name 

2757 

2758 def _attach_using_http_nbd(self): 

2759 volume_name = self._check_http_nbd_volume_name() 

2760 

2761 # Ensure there is no NBD and HTTP server running. 

2762 self._kill_persistent_nbd_server(volume_name) 

2763 self._kill_persistent_http_server(volume_name) 

2764 

2765 # 0. Fetch drbd path. 

2766 must_get_device_path = True 

2767 if not self.sr.is_master(): 

2768 # We are on a slave, we must try to find a diskful locally. 

2769 try: 

2770 volume_info = self._linstor.get_volume_info(self.uuid) 

2771 except Exception as e: 

2772 raise xs_errors.XenError( 

2773 'VDIUnavailable', 

2774 opterr='Cannot get volume info of {}: {}' 

2775 .format(self.uuid, e) 

2776 ) 

2777 

2778 hostname = socket.gethostname() 

2779 must_get_device_path = hostname in volume_info.diskful 

2780 

2781 drbd_path = None 

2782 if must_get_device_path or self.sr.is_master(): 

2783 # If we are master, we must ensure we have a diskless 

2784 # or diskful available to init HA. 

2785 # It also avoid this error in xensource.log 

2786 # (/usr/libexec/xapi/cluster-stack/xhad/ha_set_pool_state): 

2787 # init exited with code 8 [stdout = ''; stderr = 'SF: failed to write in State-File \x10 (fd 4208696). (sys 28)\x0A'] 

2788 # init returned MTC_EXIT_CAN_NOT_ACCESS_STATEFILE (State-File is inaccessible) 

2789 available = False 

2790 try: 

2791 drbd_path = self._linstor.get_device_path(self.uuid) 

2792 available = util.pathexists(drbd_path) 

2793 except Exception: 

2794 pass 

2795 

2796 if not available: 

2797 raise xs_errors.XenError( 

2798 'VDIUnavailable', 

2799 opterr='Cannot get device path of {}'.format(self.uuid) 

2800 ) 

2801 

2802 # 1. Prepare http-nbd folder. 

2803 try: 

2804 if not os.path.exists('/dev/http-nbd/'): 

2805 os.makedirs('/dev/http-nbd/') 

2806 elif os.path.islink(self.path): 

2807 os.remove(self.path) 

2808 except OSError as e: 

2809 if e.errno != errno.EEXIST: 

2810 raise xs_errors.XenError( 

2811 'VDIUnavailable', 

2812 opterr='Cannot prepare http-nbd: {}'.format(e) 

2813 ) 

2814 

2815 # 2. Start HTTP service if we have a diskful or if we are master. 

2816 http_service = None 

2817 if drbd_path: 

2818 assert(drbd_path in ( 

2819 '/dev/drbd/by-res/{}/0'.format(HA_VOLUME_NAME), 

2820 '/dev/drbd/by-res/{}/0'.format(REDO_LOG_VOLUME_NAME) 

2821 )) 

2822 self._start_persistent_http_server(volume_name) 

2823 

2824 # 3. Start NBD server in all cases. 

2825 try: 

2826 self._start_persistent_nbd_server(volume_name) 

2827 except Exception as e: 

2828 if drbd_path: 

2829 self._kill_persistent_http_server(volume_name) 

2830 raise 

2831 

2832 self.attached = True 

2833 return VDI.VDI.attach(self, self.sr.uuid, self.uuid) 

2834 

2835 def _detach_using_http_nbd(self): 

2836 volume_name = self._check_http_nbd_volume_name() 

2837 self._kill_persistent_nbd_server(volume_name) 

2838 self._kill_persistent_http_server(volume_name) 

2839 

2840# ------------------------------------------------------------------------------ 

2841 

2842 

2843if __name__ == '__main__': 

2844 def run(): 

2845 SRCommand.run(LinstorSR, DRIVER_INFO) 

2846 

2847 if not TRACE_PERFS: 

2848 run() 

2849 else: 

2850 util.make_profile('LinstorSR', run) 

2851else: 

2852 SR.registerSR(LinstorSR)