Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect COW-based SR's in the background 

19# 

20 

21from sm_typing import Any, Optional, List, override 

22 

23import os 

24import os.path 

25import sys 

26import time 

27import signal 

28import subprocess 

29import getopt 

30import datetime 

31import traceback 

32import base64 

33import zlib 

34import errno 

35import stat 

36 

37import XenAPI # pylint: disable=import-error 

38import util 

39import lvutil 

40import lvmcache 

41import journaler 

42import fjournaler 

43import lock 

44import blktap2 

45import xs_errors 

46from refcounter import RefCounter 

47from ipc import IPCFlag 

48from lvmanager import LVActivator 

49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

50from functools import reduce 

51from time import monotonic as _time 

52 

53from constants import NS_PREFIX_LVM, VG_LOCATION, VG_PREFIX 

54from cowutil import CowImageInfo, CowUtil, getCowUtil 

55from lvmcowutil import LV_PREFIX, LvmCowUtil 

56from vditype import VdiType, VdiTypeExtension, VDI_COW_TYPES, VDI_TYPE_TO_EXTENSION 

57 

58try: 

59 from linstorcowutil import LinstorCowUtil 

60 from linstorjournaler import LinstorJournaler 

61 from linstorvolumemanager import get_controller_uri 

62 from linstorvolumemanager import LinstorVolumeManager 

63 from linstorvolumemanager import LinstorVolumeManagerError 

64 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

65 

66 LINSTOR_AVAILABLE = True 

67except ImportError: 

68 LINSTOR_AVAILABLE = False 

69 

70# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

71# possible due to lvhd_stop_using_() not working correctly. However, we leave 

72# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

73# record for use by the offline tool (which makes the operation safe by pausing 

74# the VM first) 

75AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

76 

77FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

78 

79# process "lock", used simply as an indicator that a process already exists 

80# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

81# check for the fact by trying the lock). 

82lockGCRunning = None 

83 

84# process "lock" to indicate that the GC process has been activated but may not 

85# yet be running, stops a second process from being started. 

86LOCK_TYPE_GC_ACTIVE = "gc_active" 

87lockGCActive = None 

88 

89# Default coalesce error rate limit, in messages per minute. A zero value 

90# disables throttling, and a negative value disables error reporting. 

91DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

92 

93COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

94COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

95VAR_RUN = "/var/run/" 

96SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

97 

98N_RUNNING_AVERAGE = 10 

99 

100NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

101 

102# Signal Handler 

103SIGTERM = False 

104 

105 

106class AbortException(util.SMException): 

107 pass 

108 

109class CancelException(util.SMException): 

110 pass 

111 

112def receiveSignal(signalNumber, frame): 

113 global SIGTERM 

114 

115 util.SMlog("GC: recieved SIGTERM") 

116 SIGTERM = True 

117 return 

118 

119 

120################################################################################ 

121# 

122# Util 

123# 

124class Util: 

125 RET_RC = 1 

126 RET_STDOUT = 2 

127 RET_STDERR = 4 

128 

129 UUID_LEN = 36 

130 

131 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

132 

133 @staticmethod 

134 def log(text) -> None: 

135 util.SMlog(text, ident="SMGC") 

136 

137 @staticmethod 

138 def logException(tag): 

139 info = sys.exc_info() 

140 if info[0] == SystemExit: 140 ↛ 142line 140 didn't jump to line 142, because the condition on line 140 was never true

141 # this should not be happening when catching "Exception", but it is 

142 sys.exit(0) 

143 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

144 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

145 Util.log(" ***********************") 

146 Util.log(" * E X C E P T I O N *") 

147 Util.log(" ***********************") 

148 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

149 Util.log(tb) 

150 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

151 

152 @staticmethod 

153 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

154 "Execute a subprocess, then return its return code, stdout, stderr" 

155 proc = subprocess.Popen(args, 

156 stdin=subprocess.PIPE, \ 

157 stdout=subprocess.PIPE, \ 

158 stderr=subprocess.PIPE, \ 

159 shell=True, \ 

160 close_fds=True) 

161 (stdout, stderr) = proc.communicate(inputtext) 

162 stdout = str(stdout) 

163 stderr = str(stderr) 

164 rc = proc.returncode 

165 if log: 

166 Util.log("`%s`: %s" % (args, rc)) 

167 if type(expectedRC) != type([]): 

168 expectedRC = [expectedRC] 

169 if not rc in expectedRC: 

170 reason = stderr.strip() 

171 if stdout.strip(): 

172 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

173 Util.log("Failed: %s" % reason) 

174 raise util.CommandException(rc, args, reason) 

175 

176 if ret == Util.RET_RC: 

177 return rc 

178 if ret == Util.RET_STDERR: 

179 return stderr 

180 return stdout 

181 

182 @staticmethod 

183 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut, prefSig=signal.SIGKILL): 

184 """execute func in a separate thread and kill it if abortTest signals 

185 so""" 

186 abortSignaled = abortTest() # check now before we clear resultFlag 

187 resultFlag = IPCFlag(ns) 

188 resultFlag.clearAll() 

189 pid = os.fork() 

190 if pid: 

191 startTime = _time() 

192 try: 

193 while True: 

194 if resultFlag.test("success"): 

195 Util.log(" Child process completed successfully") 

196 resultFlag.clear("success") 

197 return 

198 if resultFlag.test("failure"): 

199 resultFlag.clear("failure") 

200 raise util.SMException("Child process exited with error") 

201 if abortTest() or abortSignaled or SIGTERM: 

202 os.killpg(pid, prefSig) 

203 raise AbortException("Aborting due to signal") 

204 if timeOut and _time() - startTime > timeOut: 

205 os.killpg(pid, prefSig) 

206 resultFlag.clearAll() 

207 raise util.SMException("Timed out") 

208 time.sleep(pollInterval) 

209 finally: 

210 wait_pid = 0 

211 rc = -1 

212 count = 0 

213 while wait_pid == 0 and count < 10: 

214 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

215 if wait_pid == 0: 

216 time.sleep(2) 

217 count += 1 

218 

219 if wait_pid == 0: 

220 Util.log("runAbortable: wait for process completion timed out") 

221 else: 

222 os.setpgrp() 

223 try: 

224 if func() == ret: 

225 resultFlag.set("success") 

226 else: 

227 resultFlag.set("failure") 

228 except Exception as e: 

229 Util.log("Child process failed with : (%s)" % e) 

230 resultFlag.set("failure") 

231 Util.logException("This exception has occured") 

232 os._exit(0) 

233 

234 @staticmethod 

235 def num2str(number): 

236 for prefix in ("G", "M", "K"): 236 ↛ 239line 236 didn't jump to line 239, because the loop on line 236 didn't complete

237 if number >= Util.PREFIX[prefix]: 

238 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

239 return "%s" % number 

240 

241 @staticmethod 

242 def numBits(val): 

243 count = 0 

244 while val: 

245 count += val & 1 

246 val = val >> 1 

247 return count 

248 

249 @staticmethod 

250 def countBits(bitmap1, bitmap2): 

251 """return bit count in the bitmap produced by ORing the two bitmaps""" 

252 len1 = len(bitmap1) 

253 len2 = len(bitmap2) 

254 lenLong = len1 

255 lenShort = len2 

256 bitmapLong = bitmap1 

257 if len2 > len1: 

258 lenLong = len2 

259 lenShort = len1 

260 bitmapLong = bitmap2 

261 

262 count = 0 

263 for i in range(lenShort): 

264 val = bitmap1[i] | bitmap2[i] 

265 count += Util.numBits(val) 

266 

267 for i in range(i + 1, lenLong): 

268 val = bitmapLong[i] 

269 count += Util.numBits(val) 

270 return count 

271 

272 @staticmethod 

273 def getThisScript(): 

274 thisScript = util.get_real_path(__file__) 

275 if thisScript.endswith(".pyc"): 

276 thisScript = thisScript[:-1] 

277 return thisScript 

278 

279 

280################################################################################ 

281# 

282# XAPI 

283# 

284class XAPI: 

285 USER = "root" 

286 PLUGIN_ON_SLAVE = "on-slave" 

287 

288 CONFIG_SM = 0 

289 CONFIG_OTHER = 1 

290 CONFIG_ON_BOOT = 2 

291 CONFIG_ALLOW_CACHING = 3 

292 

293 CONFIG_NAME = { 

294 CONFIG_SM: "sm-config", 

295 CONFIG_OTHER: "other-config", 

296 CONFIG_ON_BOOT: "on-boot", 

297 CONFIG_ALLOW_CACHING: "allow_caching" 

298 } 

299 

300 class LookupError(util.SMException): 

301 pass 

302 

303 @staticmethod 

304 def getSession(): 

305 session = XenAPI.xapi_local() 

306 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

307 return session 

308 

309 def __init__(self, session, srUuid): 

310 self.sessionPrivate = False 

311 self.session = session 

312 if self.session is None: 

313 self.session = self.getSession() 

314 self.sessionPrivate = True 

315 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

316 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

317 self.hostUuid = util.get_this_host() 

318 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

319 self.task = None 

320 self.task_progress = {"coalescable": 0, "done": 0} 

321 

322 def __del__(self): 

323 if self.sessionPrivate: 

324 self.session.xenapi.session.logout() 

325 

326 @property 

327 def srRef(self): 

328 return self._srRef 

329 

330 def isPluggedHere(self): 

331 pbds = self.getAttachedPBDs() 

332 for pbdRec in pbds: 

333 if pbdRec["host"] == self._hostRef: 

334 return True 

335 return False 

336 

337 def poolOK(self): 

338 host_recs = self.session.xenapi.host.get_all_records() 

339 for host_ref, host_rec in host_recs.items(): 

340 if not host_rec["enabled"]: 

341 Util.log("Host %s not enabled" % host_rec["uuid"]) 

342 return False 

343 return True 

344 

345 def isMaster(self): 

346 if self.srRecord["shared"]: 

347 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

348 return pool["master"] == self._hostRef 

349 else: 

350 pbds = self.getAttachedPBDs() 

351 if len(pbds) < 1: 

352 raise util.SMException("Local SR not attached") 

353 elif len(pbds) > 1: 

354 raise util.SMException("Local SR multiply attached") 

355 return pbds[0]["host"] == self._hostRef 

356 

357 def getAttachedPBDs(self): 

358 """Return PBD records for all PBDs of this SR that are currently 

359 attached""" 

360 attachedPBDs = [] 

361 pbds = self.session.xenapi.PBD.get_all_records() 

362 for pbdRec in pbds.values(): 

363 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

364 attachedPBDs.append(pbdRec) 

365 return attachedPBDs 

366 

367 def getOnlineHosts(self): 

368 return util.get_online_hosts(self.session) 

369 

370 def ensureInactive(self, hostRef, args): 

371 text = self.session.xenapi.host.call_plugin( \ 

372 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

373 Util.log("call-plugin returned: '%s'" % text) 

374 

375 def getRecordHost(self, hostRef): 

376 return self.session.xenapi.host.get_record(hostRef) 

377 

378 def _getRefVDI(self, uuid): 

379 return self.session.xenapi.VDI.get_by_uuid(uuid) 

380 

381 def getRefVDI(self, vdi): 

382 return self._getRefVDI(vdi.uuid) 

383 

384 def getRecordVDI(self, uuid): 

385 try: 

386 ref = self._getRefVDI(uuid) 

387 return self.session.xenapi.VDI.get_record(ref) 

388 except XenAPI.Failure: 

389 return None 

390 

391 def singleSnapshotVDI(self, vdi): 

392 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

393 {"type": "internal"}) 

394 

395 def forgetVDI(self, srUuid, vdiUuid): 

396 """Forget the VDI, but handle the case where the VDI has already been 

397 forgotten (i.e. ignore errors)""" 

398 try: 

399 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

400 self.session.xenapi.VDI.forget(vdiRef) 

401 except XenAPI.Failure: 

402 pass 

403 

404 def getConfigVDI(self, vdi, key): 

405 kind = vdi.CONFIG_TYPE[key] 

406 if kind == self.CONFIG_SM: 

407 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

408 elif kind == self.CONFIG_OTHER: 

409 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

410 elif kind == self.CONFIG_ON_BOOT: 

411 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

412 elif kind == self.CONFIG_ALLOW_CACHING: 

413 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

414 else: 

415 assert(False) 

416 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

417 return cfg 

418 

419 def removeFromConfigVDI(self, vdi, key): 

420 kind = vdi.CONFIG_TYPE[key] 

421 if kind == self.CONFIG_SM: 

422 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

423 elif kind == self.CONFIG_OTHER: 

424 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

425 else: 

426 assert(False) 

427 

428 def addToConfigVDI(self, vdi, key, val): 

429 kind = vdi.CONFIG_TYPE[key] 

430 if kind == self.CONFIG_SM: 

431 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

432 elif kind == self.CONFIG_OTHER: 

433 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

434 else: 

435 assert(False) 

436 

437 def isSnapshot(self, vdi): 

438 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

439 

440 def markCacheSRsDirty(self): 

441 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

442 'field "local_cache_enabled" = "true"') 

443 for sr_ref in sr_refs: 

444 Util.log("Marking SR %s dirty" % sr_ref) 

445 util.set_dirty(self.session, sr_ref) 

446 

447 def srUpdate(self): 

448 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

449 abortFlag = IPCFlag(self.srRecord["uuid"]) 

450 task = self.session.xenapi.Async.SR.update(self._srRef) 

451 cancelTask = True 

452 try: 

453 for i in range(60): 

454 status = self.session.xenapi.task.get_status(task) 

455 if not status == "pending": 

456 Util.log("SR.update_asynch status changed to [%s]" % status) 

457 cancelTask = False 

458 return 

459 if abortFlag.test(FLAG_TYPE_ABORT): 

460 Util.log("Abort signalled during srUpdate, cancelling task...") 

461 try: 

462 self.session.xenapi.task.cancel(task) 

463 cancelTask = False 

464 Util.log("Task cancelled") 

465 except: 

466 pass 

467 return 

468 time.sleep(1) 

469 finally: 

470 if cancelTask: 

471 self.session.xenapi.task.cancel(task) 

472 self.session.xenapi.task.destroy(task) 

473 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

474 

475 def update_task(self): 

476 self.session.xenapi.task.set_other_config( 

477 self.task, 

478 { 

479 "applies_to": self._srRef 

480 }) 

481 total = self.task_progress['coalescable'] + self.task_progress['done'] 

482 if (total > 0): 

483 self.session.xenapi.task.set_progress( 

484 self.task, float(self.task_progress['done']) / total) 

485 

486 def create_task(self, label, description): 

487 self.task = self.session.xenapi.task.create(label, description) 

488 self.update_task() 

489 

490 def update_task_progress(self, key, value): 

491 self.task_progress[key] = value 

492 if self.task: 

493 self.update_task() 

494 

495 def set_task_status(self, status): 

496 if self.task: 

497 self.session.xenapi.task.set_status(self.task, status) 

498 

499 

500################################################################################ 

501# 

502# VDI 

503# 

504class VDI(object): 

505 """Object representing a VDI of a COW-based SR""" 

506 

507 POLL_INTERVAL = 1 

508 POLL_TIMEOUT = 30 

509 DEVICE_MAJOR = 202 

510 

511 # config keys & values 

512 DB_VDI_PARENT = "vhd-parent" 

513 DB_VDI_TYPE = "vdi_type" 

514 DB_VDI_BLOCKS = "vhd-blocks" 

515 DB_VDI_PAUSED = "paused" 

516 DB_VDI_RELINKING = "relinking" 

517 DB_VDI_ACTIVATING = "activating" 

518 DB_GC = "gc" 

519 DB_COALESCE = "coalesce" 

520 DB_LEAFCLSC = "leaf-coalesce" # config key 

521 DB_GC_NO_SPACE = "gc_no_space" 

522 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

523 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

524 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

525 # no space to snap-coalesce or unable to keep 

526 # up with VDI. This is not used by the SM, it 

527 # might be used by external components. 

528 DB_ONBOOT = "on-boot" 

529 ONBOOT_RESET = "reset" 

530 DB_ALLOW_CACHING = "allow_caching" 

531 

532 CONFIG_TYPE = { 

533 DB_VDI_PARENT: XAPI.CONFIG_SM, 

534 DB_VDI_TYPE: XAPI.CONFIG_SM, 

535 DB_VDI_BLOCKS: XAPI.CONFIG_SM, 

536 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

537 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

538 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

539 DB_GC: XAPI.CONFIG_OTHER, 

540 DB_COALESCE: XAPI.CONFIG_OTHER, 

541 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

542 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

543 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

544 DB_GC_NO_SPACE: XAPI.CONFIG_SM 

545 } 

546 

547 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

548 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

549 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

550 # feasibility of leaf coalesce 

551 

552 JRN_RELINK = "relink" # journal entry type for relinking children 

553 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

554 JRN_LEAF = "leaf" # used in coalesce-leaf 

555 

556 STR_TREE_INDENT = 4 

557 

558 def __init__(self, sr, uuid, vdi_type): 

559 self.sr = sr 

560 self.scanError = True 

561 self.uuid = uuid 

562 self.vdi_type = vdi_type 

563 self.fileName = "" 

564 self.parentUuid = "" 

565 self.sizeVirt = -1 

566 self._sizePhys = -1 

567 self._sizeAllocated = -1 

568 self._hidden = False 

569 self.parent = None 

570 self.children = [] 

571 self._vdiRef = None 

572 self.cowutil = getCowUtil(vdi_type) 

573 self._clearRef() 

574 

575 @staticmethod 

576 def extractUuid(path): 

577 raise NotImplementedError("Implement in sub class") 

578 

579 def load(self, info=None) -> None: 

580 """Load VDI info""" 

581 pass 

582 

583 def getDriverName(self) -> str: 

584 return self.vdi_type 

585 

586 def getRef(self): 

587 if self._vdiRef is None: 

588 self._vdiRef = self.sr.xapi.getRefVDI(self) 

589 return self._vdiRef 

590 

591 def getConfig(self, key, default=None): 

592 config = self.sr.xapi.getConfigVDI(self, key) 

593 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 593 ↛ 594line 593 didn't jump to line 594, because the condition on line 593 was never true

594 val = config 

595 else: 

596 val = config.get(key) 

597 if val: 

598 return val 

599 return default 

600 

601 def setConfig(self, key, val): 

602 self.sr.xapi.removeFromConfigVDI(self, key) 

603 self.sr.xapi.addToConfigVDI(self, key, val) 

604 Util.log("Set %s = %s for %s" % (key, val, self)) 

605 

606 def delConfig(self, key): 

607 self.sr.xapi.removeFromConfigVDI(self, key) 

608 Util.log("Removed %s from %s" % (key, self)) 

609 

610 def ensureUnpaused(self): 

611 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

612 Util.log("Unpausing VDI %s" % self) 

613 self.unpause() 

614 

615 def pause(self, failfast=False) -> None: 

616 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

617 self.uuid, failfast): 

618 raise util.SMException("Failed to pause VDI %s" % self) 

619 

620 def _report_tapdisk_unpause_error(self): 

621 try: 

622 xapi = self.sr.xapi.session.xenapi 

623 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

624 msg_name = "failed to unpause tapdisk" 

625 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

626 "VMs using this tapdisk have lost access " \ 

627 "to the corresponding disk(s)" % self.uuid 

628 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

629 except Exception as e: 

630 util.SMlog("failed to generate message: %s" % e) 

631 

632 def unpause(self): 

633 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

634 self.uuid): 

635 self._report_tapdisk_unpause_error() 

636 raise util.SMException("Failed to unpause VDI %s" % self) 

637 

638 def refresh(self, ignoreNonexistent=True): 

639 """Pause-unpause in one step""" 

640 self.sr.lock() 

641 try: 

642 try: 

643 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 643 ↛ 645line 643 didn't jump to line 645, because the condition on line 643 was never true

644 self.sr.uuid, self.uuid): 

645 self._report_tapdisk_unpause_error() 

646 raise util.SMException("Failed to refresh %s" % self) 

647 except XenAPI.Failure as e: 

648 if util.isInvalidVDI(e) and ignoreNonexistent: 

649 Util.log("VDI %s not found, ignoring" % self) 

650 return 

651 raise 

652 finally: 

653 self.sr.unlock() 

654 

655 def isSnapshot(self): 

656 return self.sr.xapi.isSnapshot(self) 

657 

658 def isAttachedRW(self): 

659 return util.is_attached_rw( 

660 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

661 

662 def getVDIBlocks(self): 

663 val = self.updateBlockInfo() 

664 bitmap = zlib.decompress(base64.b64decode(val)) 

665 return bitmap 

666 

667 def isCoalesceable(self): 

668 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

669 return not self.scanError and \ 

670 self.parent and \ 

671 len(self.parent.children) == 1 and \ 

672 self.isHidden() and \ 

673 len(self.children) > 0 

674 

675 def isLeafCoalesceable(self): 

676 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

677 return not self.scanError and \ 

678 self.parent and \ 

679 len(self.parent.children) == 1 and \ 

680 not self.isHidden() and \ 

681 len(self.children) == 0 

682 

683 def canLiveCoalesce(self, speed): 

684 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

685 isLeafCoalesceable() already""" 

686 feasibleSize = False 

687 allowedDownTime = \ 

688 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

689 allocated_size = self.getAllocatedSize() 

690 if speed: 

691 feasibleSize = \ 

692 allocated_size // speed < allowedDownTime 

693 else: 

694 feasibleSize = \ 

695 allocated_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

696 

697 return (feasibleSize or 

698 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

699 

700 def getAllPrunable(self): 

701 if len(self.children) == 0: # base case 

702 # it is possible to have a hidden leaf that was recently coalesced 

703 # onto its parent, its children already relinked but not yet 

704 # reloaded - in which case it may not be garbage collected yet: 

705 # some tapdisks could still be using the file. 

706 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

707 return [] 

708 if not self.scanError and self.isHidden(): 

709 return [self] 

710 return [] 

711 

712 thisPrunable = True 

713 vdiList = [] 

714 for child in self.children: 

715 childList = child.getAllPrunable() 

716 vdiList.extend(childList) 

717 if child not in childList: 

718 thisPrunable = False 

719 

720 # We can destroy the current VDI if all childs are hidden BUT the 

721 # current VDI must be hidden too to do that! 

722 # Example in this case (after a failed live leaf coalesce): 

723 # 

724 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

725 # SMGC: [32436] b5458d61(1.000G/4.127M) 

726 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

727 # 

728 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

729 # Normally we are not in this function when the delete action is 

730 # executed but in `_liveLeafCoalesce`. 

731 

732 if not self.scanError and not self.isHidden() and thisPrunable: 

733 vdiList.append(self) 

734 return vdiList 

735 

736 def getSizePhys(self) -> int: 

737 return self._sizePhys 

738 

739 def getAllocatedSize(self) -> int: 

740 return self._sizeAllocated 

741 

742 def getTreeRoot(self): 

743 "Get the root of the tree that self belongs to" 

744 root = self 

745 while root.parent: 

746 root = root.parent 

747 return root 

748 

749 def getTreeHeight(self): 

750 "Get the height of the subtree rooted at self" 

751 if len(self.children) == 0: 

752 return 1 

753 

754 maxChildHeight = 0 

755 for child in self.children: 

756 childHeight = child.getTreeHeight() 

757 if childHeight > maxChildHeight: 

758 maxChildHeight = childHeight 

759 

760 return maxChildHeight + 1 

761 

762 def getAllLeaves(self) -> List["VDI"]: 

763 "Get all leaf nodes in the subtree rooted at self" 

764 if len(self.children) == 0: 

765 return [self] 

766 

767 leaves = [] 

768 for child in self.children: 

769 leaves.extend(child.getAllLeaves()) 

770 return leaves 

771 

772 def updateBlockInfo(self) -> Optional[str]: 

773 val = base64.b64encode(self._queryCowBlocks()).decode() 

774 try: 

775 self.setConfig(VDI.DB_VDI_BLOCKS, val) 

776 except Exception: 

777 if self.vdi_type != VdiType.QCOW2: 

778 raise 

779 # Sometime with QCOW2, our allocation table is too big to be stored in XAPI, in this case we do not store it 

780 # and we write `skipped` instead so that hasWork is happy (and the GC doesn't run in loop indefinitely). 

781 self.setConfig(VDI.DB_VDI_BLOCKS, "skipped") 

782 

783 return val 

784 

785 def rename(self, uuid) -> None: 

786 "Rename the VDI file" 

787 assert(not self.sr.vdis.get(uuid)) 

788 self._clearRef() 

789 oldUuid = self.uuid 

790 self.uuid = uuid 

791 self.children = [] 

792 # updating the children themselves is the responsibility of the caller 

793 del self.sr.vdis[oldUuid] 

794 self.sr.vdis[self.uuid] = self 

795 

796 def delete(self) -> None: 

797 "Physically delete the VDI" 

798 lock.Lock.cleanup(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

799 lock.Lock.cleanupAll(self.uuid) 

800 self._clear() 

801 

802 def getParent(self) -> str: 

803 return self.cowutil.getParent(self.path, lambda x: x.strip()) 803 ↛ exitline 803 didn't run the lambda on line 803

804 

805 def repair(self, parent) -> None: 

806 self.cowutil.repair(parent) 

807 

808 @override 

809 def __str__(self) -> str: 

810 strHidden = "" 

811 if self.isHidden(): 811 ↛ 812line 811 didn't jump to line 812, because the condition on line 811 was never true

812 strHidden = "*" 

813 strSizeVirt = "?" 

814 if self.sizeVirt > 0: 814 ↛ 815line 814 didn't jump to line 815, because the condition on line 814 was never true

815 strSizeVirt = Util.num2str(self.sizeVirt) 

816 strSizePhys = "?" 

817 if self._sizePhys > 0: 817 ↛ 818line 817 didn't jump to line 818, because the condition on line 817 was never true

818 strSizePhys = "/%s" % Util.num2str(self._sizePhys) 

819 strSizeAllocated = "?" 

820 if self._sizeAllocated >= 0: 

821 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

822 strType = "[{}]".format(self.vdi_type) 

823 

824 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

825 strSizePhys, strSizeAllocated, strType) 

826 

827 def validate(self, fast=False) -> None: 

828 if self.cowutil.check(self.path, fast=fast) != CowUtil.CheckResult.Success: 828 ↛ 829line 828 didn't jump to line 829, because the condition on line 828 was never true

829 raise util.SMException("COW image %s corrupted" % self) 

830 

831 def _clear(self): 

832 self.uuid = "" 

833 self.path = "" 

834 self.parentUuid = "" 

835 self.parent = None 

836 self._clearRef() 

837 

838 def _clearRef(self): 

839 self._vdiRef = None 

840 

841 @staticmethod 

842 def _cancel_exception(sig, frame): 

843 raise CancelException() 

844 

845 def _call_plugin_coalesce(self, hostRef): 

846 signal.signal(signal.SIGTERM, self._cancel_exception) 

847 args = {"path": self.path, "vdi_type": self.vdi_type} 

848 util.SMlog("DAMS: Calling remote coalesce with: {}".format(args)) 

849 try: 

850 ret = self.sr.xapi.session.xenapi.host.call_plugin( \ 

851 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_tapdisk", args) 

852 util.SMlog("DAMS: Remote coalesce returned {}".format(ret)) 

853 except CancelException: 

854 util.SMlog(f"DAMS: Cancelling online coalesce following signal {args}") 

855 self.sr.xapi.session.xenapi.host.call_plugin( \ 

856 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_cancel", args) 

857 except Exception: 

858 raise 

859 

860 def _doCoalesceOnHost(self, hostRef): 

861 self.validate() 

862 self.parent.validate(True) 

863 self.parent._increaseSizeVirt(self.sizeVirt) 

864 self.sr._updateSlavesOnResize(self.parent) 

865 #TODO: We might need to make the LV RW on the slave directly for coalesce? 

866 # Children and parent need to be RW for QCOW2 coalesce, otherwise tapdisk(libqcow) will crash trying to access them 

867 

868 def abortTest(): 

869 file = self.sr._gc_running_file(self) 

870 try: 

871 with open(file, "r") as f: 

872 if not f.read(): 

873 util.SMlog("DAMS: abortTest: Cancelling coalesce") 

874 return True 

875 except OSError as e: 

876 if e.errno == errno.ENOENT: 

877 util.SMlog("File {} does not exist".format(file)) 

878 else: 

879 util.SMlog("IOError: {}".format(e)) 

880 return True 

881 return False 

882 

883 Util.runAbortable(lambda: self._call_plugin_coalesce(hostRef), \ 

884 None, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0, prefSig=signal.SIGTERM) 

885 

886 self.parent.validate(True) 

887 #self._verifyContents(0) 

888 self.parent.updateBlockInfo() 

889 

890 def _isOpenOnHosts(self) -> Optional[str]: 

891 for pbdRecord in self.sr.xapi.getAttachedPBDs(): 

892 hostRef = pbdRecord["host"] 

893 args = {"path": self.path} 

894 is_openers = util.strtobool(self.sr.xapi.session.xenapi.host.call_plugin( \ 

895 hostRef, XAPI.PLUGIN_ON_SLAVE, "is_openers", args)) 

896 if is_openers: 

897 return hostRef 

898 return None 

899 

900 def _doCoalesce(self) -> None: 

901 """Coalesce self onto parent. Only perform the actual coalescing of 

902 an image, but not the subsequent relinking. We'll do that as the next step, 

903 after reloading the entire SR in case things have changed while we 

904 were coalescing""" 

905 self.validate() 

906 self.parent.validate(True) 

907 self.parent._increaseSizeVirt(self.sizeVirt) 

908 self.sr._updateSlavesOnResize(self.parent) 

909 self._coalesceCowImage(0) 

910 self.parent.validate(True) 

911 #self._verifyContents(0) 

912 self.parent.updateBlockInfo() 

913 

914 def _verifyContents(self, timeOut): 

915 Util.log(" Coalesce verification on %s" % self) 

916 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

917 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

918 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

919 Util.log(" Coalesce verification succeeded") 

920 

921 def _runTapdiskDiff(self): 

922 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

923 (self.getDriverName(), self.path, \ 

924 self.parent.getDriverName(), self.parent.path) 

925 Util.doexec(cmd, 0) 

926 return True 

927 

928 @staticmethod 

929 def _reportCoalesceError(vdi, ce): 

930 """Reports a coalesce error to XenCenter. 

931 

932 vdi: the VDI object on which the coalesce error occured 

933 ce: the CommandException that was raised""" 

934 

935 msg_name = os.strerror(ce.code) 

936 if ce.code == errno.ENOSPC: 

937 # TODO We could add more information here, e.g. exactly how much 

938 # space is required for the particular coalesce, as well as actions 

939 # to be taken by the user and consequences of not taking these 

940 # actions. 

941 msg_body = 'Run out of space while coalescing.' 

942 elif ce.code == errno.EIO: 

943 msg_body = 'I/O error while coalescing.' 

944 else: 

945 msg_body = '' 

946 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

947 % (vdi.sr.uuid, msg_name, msg_body)) 

948 

949 # Create a XenCenter message, but don't spam. 

950 xapi = vdi.sr.xapi.session.xenapi 

951 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

952 oth_cfg = xapi.SR.get_other_config(sr_ref) 

953 if COALESCE_ERR_RATE_TAG in oth_cfg: 

954 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

955 else: 

956 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

957 

958 xcmsg = False 

959 if coalesce_err_rate == 0: 

960 xcmsg = True 

961 elif coalesce_err_rate > 0: 

962 now = datetime.datetime.now() 

963 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

964 if COALESCE_LAST_ERR_TAG in sm_cfg: 

965 # seconds per message (minimum distance in time between two 

966 # messages in seconds) 

967 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

968 last = datetime.datetime.fromtimestamp( 

969 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

970 if now - last >= spm: 

971 xapi.SR.remove_from_sm_config(sr_ref, 

972 COALESCE_LAST_ERR_TAG) 

973 xcmsg = True 

974 else: 

975 xcmsg = True 

976 if xcmsg: 

977 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

978 str(now.strftime('%s'))) 

979 if xcmsg: 

980 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

981 

982 def coalesce(self) -> int: 

983 return self.cowutil.coalesce(self.path) 

984 

985 @staticmethod 

986 def _doCoalesceCowImage(vdi: "VDI"): 

987 try: 

988 startTime = time.time() 

989 allocated_size = vdi.getAllocatedSize() 

990 coalesced_size = vdi.coalesce() 

991 endTime = time.time() 

992 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

993 except util.CommandException as ce: 

994 # We use try/except for the following piece of code because it runs 

995 # in a separate process context and errors will not be caught and 

996 # reported by anyone. 

997 try: 

998 # Report coalesce errors back to user via XC 

999 VDI._reportCoalesceError(vdi, ce) 

1000 except Exception as e: 

1001 util.SMlog('failed to create XenCenter message: %s' % e) 

1002 raise ce 

1003 except: 

1004 raise 

1005 

1006 def _vdi_is_raw(self, vdi_path): 

1007 """ 

1008 Given path to vdi determine if it is raw 

1009 """ 

1010 uuid = self.extractUuid(vdi_path) 

1011 return self.sr.vdis[uuid].vdi_type == VdiType.RAW 

1012 

1013 def _coalesceCowImage(self, timeOut): 

1014 Util.log(" Running COW coalesce on %s" % self) 

1015 def abortTest(): 

1016 if self.cowutil.isCoalesceableOnRemote(): 

1017 file = self.sr._gc_running_file(self) 

1018 try: 

1019 with open(file, "r") as f: 

1020 if not f.read(): 

1021 return True 

1022 except OSError as e: 

1023 if e.errno == errno.ENOENT: 

1024 util.SMlog("File {} does not exist".format(file)) 

1025 else: 

1026 util.SMlog("IOError: {}".format(e)) 

1027 return True 

1028 return IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1029 

1030 try: 

1031 util.fistpoint.activate_custom_fn( 

1032 "cleanup_coalesceVHD_inject_failure", 

1033 util.inject_failure) 

1034 Util.runAbortable(lambda: VDI._doCoalesceCowImage(self), None, 

1035 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

1036 except: 

1037 # Exception at this phase could indicate a failure in COW coalesce 

1038 # or a kill of COW coalesce by runAbortable due to timeOut 

1039 # Try a repair and reraise the exception 

1040 parent = "" 

1041 try: 

1042 parent = self.getParent() 

1043 if not self._vdi_is_raw(parent): 

1044 # Repair error is logged and ignored. Error reraised later 

1045 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

1046 'parent %s' % (self.uuid, parent)) 

1047 self.repair(parent) 

1048 except Exception as e: 

1049 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

1050 'after failed coalesce on %s, err: %s' % 

1051 (parent, self.path, e)) 

1052 raise 

1053 

1054 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

1055 

1056 def _relinkSkip(self) -> None: 

1057 """Relink children of this VDI to point to the parent of this VDI""" 

1058 abortFlag = IPCFlag(self.sr.uuid) 

1059 for child in self.children: 

1060 if abortFlag.test(FLAG_TYPE_ABORT): 1060 ↛ 1061line 1060 didn't jump to line 1061, because the condition on line 1060 was never true

1061 raise AbortException("Aborting due to signal") 

1062 Util.log(" Relinking %s from %s to %s" % \ 

1063 (child, self, self.parent)) 

1064 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

1065 child._setParent(self.parent) 

1066 self.children = [] 

1067 

1068 def _reloadChildren(self, vdiSkip): 

1069 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

1070 the COW image metadata for this file in any online VDI""" 

1071 abortFlag = IPCFlag(self.sr.uuid) 

1072 for child in self.children: 

1073 if child == vdiSkip: 

1074 continue 

1075 if abortFlag.test(FLAG_TYPE_ABORT): 1075 ↛ 1076line 1075 didn't jump to line 1076, because the condition on line 1075 was never true

1076 raise AbortException("Aborting due to signal") 

1077 Util.log(" Reloading VDI %s" % child) 

1078 child._reload() 

1079 

1080 def _reload(self): 

1081 """Pause & unpause to cause blktap to reload the image metadata""" 

1082 for child in self.children: 1082 ↛ 1083line 1082 didn't jump to line 1083, because the loop on line 1082 never started

1083 child._reload() 

1084 

1085 # only leaves can be attached 

1086 if len(self.children) == 0: 1086 ↛ exitline 1086 didn't return from function '_reload', because the condition on line 1086 was never false

1087 try: 

1088 self.delConfig(VDI.DB_VDI_RELINKING) 

1089 except XenAPI.Failure as e: 

1090 if not util.isInvalidVDI(e): 

1091 raise 

1092 self.refresh() 

1093 

1094 def _tagChildrenForRelink(self): 

1095 if len(self.children) == 0: 

1096 retries = 0 

1097 try: 

1098 while retries < 15: 

1099 retries += 1 

1100 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1101 Util.log("VDI %s is activating, wait to relink" % 

1102 self.uuid) 

1103 else: 

1104 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1105 

1106 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1107 self.delConfig(VDI.DB_VDI_RELINKING) 

1108 Util.log("VDI %s started activating while tagging" % 

1109 self.uuid) 

1110 else: 

1111 return 

1112 time.sleep(2) 

1113 

1114 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1115 except XenAPI.Failure as e: 

1116 if not util.isInvalidVDI(e): 

1117 raise 

1118 

1119 for child in self.children: 

1120 child._tagChildrenForRelink() 

1121 

1122 def _loadInfoParent(self): 

1123 ret = self.cowutil.getParent(self.path, LvmCowUtil.extractUuid) 

1124 if ret: 

1125 self.parentUuid = ret 

1126 

1127 def _setParent(self, parent) -> None: 

1128 self.cowutil.setParent(self.path, parent.path, False) 

1129 self.parent = parent 

1130 self.parentUuid = parent.uuid 

1131 parent.children.append(self) 

1132 try: 

1133 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1134 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1135 (self.uuid, self.parentUuid)) 

1136 except: 

1137 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1138 (self.uuid, self.parentUuid)) 

1139 

1140 def isHidden(self) -> bool: 

1141 if self._hidden is None: 1141 ↛ 1142line 1141 didn't jump to line 1142, because the condition on line 1141 was never true

1142 self._loadInfoHidden() 

1143 return self._hidden 

1144 

1145 def _loadInfoHidden(self) -> None: 

1146 hidden = self.cowutil.getHidden(self.path) 

1147 self._hidden = (hidden != 0) 

1148 

1149 def _setHidden(self, hidden=True) -> None: 

1150 self._hidden = None 

1151 self.cowutil.setHidden(self.path, hidden) 

1152 self._hidden = hidden 

1153 

1154 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1155 """ensure the virtual size of 'self' is at least 'size'. Note that 

1156 resizing a COW image must always be offline and atomically: the file must 

1157 not be open by anyone and no concurrent operations may take place. 

1158 Thus we use the Agent API call for performing paused atomic 

1159 operations. If the caller is already in the atomic context, it must 

1160 call with atomic = False""" 

1161 if self.sizeVirt >= size: 1161 ↛ 1163line 1161 didn't jump to line 1163, because the condition on line 1161 was never false

1162 return 

1163 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1164 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1165 

1166 msize = self.cowutil.getMaxResizeSize(self.path) 

1167 if (size <= msize): 

1168 self.cowutil.setSizeVirtFast(self.path, size) 

1169 else: 

1170 if atomic: 

1171 vdiList = self._getAllSubtree() 

1172 self.sr.lock() 

1173 try: 

1174 self.sr.pauseVDIs(vdiList) 

1175 try: 

1176 self._setSizeVirt(size) 

1177 finally: 

1178 self.sr.unpauseVDIs(vdiList) 

1179 finally: 

1180 self.sr.unlock() 

1181 else: 

1182 self._setSizeVirt(size) 

1183 

1184 self.sizeVirt = self.cowutil.getSizeVirt(self.path) 

1185 

1186 def _setSizeVirt(self, size) -> None: 

1187 """WARNING: do not call this method directly unless all VDIs in the 

1188 subtree are guaranteed to be unplugged (and remain so for the duration 

1189 of the operation): this operation is only safe for offline COW images""" 

1190 jFile = os.path.join(self.sr.path, self.uuid) 

1191 self.cowutil.setSizeVirt(self.path, size, jFile) 

1192 

1193 def _queryCowBlocks(self) -> bytes: 

1194 return self.cowutil.getBlockBitmap(self.path) 

1195 

1196 def _getCoalescedSizeData(self): 

1197 """Get the data size of the resulting image if we coalesce self onto 

1198 parent. We calculate the actual size by using the image block allocation 

1199 information (as opposed to just adding up the two image sizes to get an 

1200 upper bound)""" 

1201 # make sure we don't use stale BAT info from vdi_rec since the child 

1202 # was writable all this time 

1203 self.delConfig(VDI.DB_VDI_BLOCKS) 

1204 blocksChild = self.getVDIBlocks() 

1205 blocksParent = self.parent.getVDIBlocks() 

1206 numBlocks = Util.countBits(blocksChild, blocksParent) 

1207 Util.log("Num combined blocks = %d" % numBlocks) 

1208 sizeData = numBlocks * self.cowutil.getBlockSize(self.path) 

1209 assert(sizeData <= self.sizeVirt) 

1210 return sizeData 

1211 

1212 def _calcExtraSpaceForCoalescing(self) -> int: 

1213 sizeData = self._getCoalescedSizeData() 

1214 sizeCoalesced = sizeData + self.cowutil.calcOverheadBitmap(sizeData) + \ 

1215 self.cowutil.calcOverheadEmpty(self.sizeVirt) 

1216 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1217 return sizeCoalesced - self.parent.getSizePhys() 

1218 

1219 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1220 """How much extra space in the SR will be required to 

1221 [live-]leaf-coalesce this VDI""" 

1222 # the space requirements are the same as for inline coalesce 

1223 return self._calcExtraSpaceForCoalescing() 

1224 

1225 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1226 """How much extra space in the SR will be required to 

1227 snapshot-coalesce this VDI""" 

1228 return self._calcExtraSpaceForCoalescing() + \ 

1229 self.cowutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1230 

1231 def _getAllSubtree(self): 

1232 """Get self and all VDIs in the subtree of self as a flat list""" 

1233 vdiList = [self] 

1234 for child in self.children: 

1235 vdiList.extend(child._getAllSubtree()) 

1236 return vdiList 

1237 

1238 

1239class FileVDI(VDI): 

1240 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1241 

1242 @override 

1243 @staticmethod 

1244 def extractUuid(path): 

1245 fileName = os.path.basename(path) 

1246 return os.path.splitext(fileName)[0] 

1247 

1248 def __init__(self, sr, uuid, vdi_type): 

1249 VDI.__init__(self, sr, uuid, vdi_type) 

1250 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1251 

1252 @override 

1253 def load(self, info=None) -> None: 

1254 if not info: 

1255 if not util.pathexists(self.path): 

1256 raise util.SMException("%s not found" % self.path) 

1257 try: 

1258 info = self.cowutil.getInfo(self.path, self.extractUuid) 

1259 except util.SMException: 

1260 Util.log(" [VDI %s: failed to read COW image metadata]" % self.uuid) 

1261 return 

1262 self.parent = None 

1263 self.children = [] 

1264 self.parentUuid = info.parentUuid 

1265 self.sizeVirt = info.sizeVirt 

1266 self._sizePhys = info.sizePhys 

1267 self._sizeAllocated = info.sizeAllocated 

1268 self._hidden = info.hidden 

1269 self.scanError = False 

1270 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1271 (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])) 

1272 

1273 @override 

1274 def rename(self, uuid) -> None: 

1275 oldPath = self.path 

1276 VDI.rename(self, uuid) 

1277 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1278 self.path = os.path.join(self.sr.path, self.fileName) 

1279 assert(not util.pathexists(self.path)) 

1280 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1281 os.rename(oldPath, self.path) 

1282 

1283 @override 

1284 def delete(self) -> None: 

1285 if len(self.children) > 0: 1285 ↛ 1286line 1285 didn't jump to line 1286, because the condition on line 1285 was never true

1286 raise util.SMException("VDI %s has children, can't delete" % \ 

1287 self.uuid) 

1288 try: 

1289 self.sr.lock() 

1290 try: 

1291 os.unlink(self.path) 

1292 self.sr.forgetVDI(self.uuid) 

1293 finally: 

1294 self.sr.unlock() 

1295 except OSError: 

1296 raise util.SMException("os.unlink(%s) failed" % self.path) 

1297 VDI.delete(self) 

1298 

1299 @override 

1300 def getAllocatedSize(self) -> int: 

1301 if self._sizeAllocated == -1: 1301 ↛ 1302line 1301 didn't jump to line 1302, because the condition on line 1301 was never true

1302 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1303 return self._sizeAllocated 

1304 

1305 

1306class LVMVDI(VDI): 

1307 """Object representing a VDI in an LVM SR""" 

1308 

1309 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1310 

1311 @override 

1312 def load(self, info=None) -> None: 

1313 # `info` is always set. `None` default value is only here to match parent method. 

1314 assert info, "No info given to LVMVDI.load" 

1315 self.parent = None 

1316 self.children = [] 

1317 self._sizePhys = -1 

1318 self._sizeAllocated = -1 

1319 self.scanError = info.scanError 

1320 self.sizeLV = info.sizeLV 

1321 self.sizeVirt = info.sizeVirt 

1322 self.fileName = info.lvName 

1323 self.lvActive = info.lvActive 

1324 self.lvOpen = info.lvOpen 

1325 self.lvReadonly = info.lvReadonly 

1326 self._hidden = info.hidden 

1327 self.parentUuid = info.parentUuid 

1328 self.path = os.path.join(self.sr.path, self.fileName) 

1329 self.lvmcowutil = LvmCowUtil(self.cowutil) 

1330 

1331 @override 

1332 @staticmethod 

1333 def extractUuid(path): 

1334 return LvmCowUtil.extractUuid(path) 

1335 

1336 def inflate(self, size): 

1337 """inflate the LV containing the COW image to 'size'""" 

1338 if not VdiType.isCowImage(self.vdi_type): 

1339 return 

1340 self._activate() 

1341 self.sr.lock() 

1342 try: 

1343 self.lvmcowutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size) 

1344 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1345 finally: 

1346 self.sr.unlock() 

1347 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1348 self._sizePhys = -1 

1349 self._sizeAllocated = -1 

1350 

1351 def deflate(self): 

1352 """deflate the LV containing the image to minimum""" 

1353 if not VdiType.isCowImage(self.vdi_type): 

1354 return 

1355 self._activate() 

1356 self.sr.lock() 

1357 try: 

1358 self.lvmcowutil.deflate(self.sr.lvmCache, self.fileName, self.getSizePhys()) 

1359 finally: 

1360 self.sr.unlock() 

1361 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1362 self._sizePhys = -1 

1363 self._sizeAllocated = -1 

1364 

1365 def inflateFully(self): 

1366 self.inflate(self.lvmcowutil.calcVolumeSize(self.sizeVirt)) 

1367 

1368 def inflateParentForCoalesce(self): 

1369 """Inflate the parent only as much as needed for the purposes of 

1370 coalescing""" 

1371 if not VdiType.isCowImage(self.parent.vdi_type): 

1372 return 

1373 inc = self._calcExtraSpaceForCoalescing() 

1374 if inc > 0: 

1375 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1376 self.parent.inflate(self.parent.sizeLV + inc) 

1377 

1378 @override 

1379 def updateBlockInfo(self) -> Optional[str]: 

1380 if VdiType.isCowImage(self.vdi_type): 

1381 return VDI.updateBlockInfo(self) 

1382 return None 

1383 

1384 @override 

1385 def rename(self, uuid) -> None: 

1386 oldUuid = self.uuid 

1387 oldLVName = self.fileName 

1388 VDI.rename(self, uuid) 

1389 self.fileName = LV_PREFIX[self.vdi_type] + self.uuid 

1390 self.path = os.path.join(self.sr.path, self.fileName) 

1391 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1392 

1393 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1394 if self.sr.lvActivator.get(oldUuid, False): 

1395 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1396 

1397 ns = NS_PREFIX_LVM + self.sr.uuid 

1398 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1399 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1400 RefCounter.reset(oldUuid, ns) 

1401 

1402 @override 

1403 def delete(self) -> None: 

1404 if len(self.children) > 0: 

1405 raise util.SMException("VDI %s has children, can't delete" % \ 

1406 self.uuid) 

1407 self.sr.lock() 

1408 try: 

1409 self.sr.lvmCache.remove(self.fileName) 

1410 self.sr.forgetVDI(self.uuid) 

1411 finally: 

1412 self.sr.unlock() 

1413 RefCounter.reset(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

1414 VDI.delete(self) 

1415 

1416 @override 

1417 def getSizePhys(self) -> int: 

1418 if self._sizePhys == -1: 

1419 self._loadInfoSizePhys() 

1420 return self._sizePhys 

1421 

1422 def _loadInfoSizePhys(self): 

1423 """Get the physical utilization of the COW image file. We do it individually 

1424 (and not using the COW batch scanner) as an optimization: this info is 

1425 relatively expensive and we need it only for VDI's involved in 

1426 coalescing.""" 

1427 if not VdiType.isCowImage(self.vdi_type): 

1428 return 

1429 self._activate() 

1430 self._sizePhys = self.cowutil.getSizePhys(self.path) 

1431 if self._sizePhys <= 0: 

1432 raise util.SMException("phys size of %s = %d" % \ 

1433 (self, self._sizePhys)) 

1434 

1435 @override 

1436 def getAllocatedSize(self) -> int: 

1437 if self._sizeAllocated == -1: 

1438 self._loadInfoSizeAllocated() 

1439 return self._sizeAllocated 

1440 

1441 def _loadInfoSizeAllocated(self): 

1442 """ 

1443 Get the allocated size of the COW volume. 

1444 """ 

1445 if not VdiType.isCowImage(self.vdi_type): 

1446 return 

1447 self._activate() 

1448 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1449 

1450 @override 

1451 def _loadInfoHidden(self) -> None: 

1452 if not VdiType.isCowImage(self.vdi_type): 

1453 self._hidden = self.sr.lvmCache.getHidden(self.fileName) 

1454 else: 

1455 VDI._loadInfoHidden(self) 

1456 

1457 @override 

1458 def _setHidden(self, hidden=True) -> None: 

1459 if not VdiType.isCowImage(self.vdi_type): 

1460 self._hidden = None 

1461 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1462 self._hidden = hidden 

1463 else: 

1464 VDI._setHidden(self, hidden) 

1465 

1466 @override 

1467 def __str__(self) -> str: 

1468 strType = self.vdi_type 

1469 if self.vdi_type == VdiType.RAW: 

1470 strType = "RAW" 

1471 strHidden = "" 

1472 if self.isHidden(): 

1473 strHidden = "*" 

1474 strSizePhys = "" 

1475 if self._sizePhys > 0: 

1476 strSizePhys = Util.num2str(self._sizePhys) 

1477 strSizeAllocated = "" 

1478 if self._sizeAllocated >= 0: 

1479 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1480 strActive = "n" 

1481 if self.lvActive: 

1482 strActive = "a" 

1483 if self.lvOpen: 

1484 strActive += "o" 

1485 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1486 Util.num2str(self.sizeVirt), strSizePhys, strSizeAllocated, 

1487 Util.num2str(self.sizeLV), strActive) 

1488 

1489 @override 

1490 def validate(self, fast=False) -> None: 

1491 if VdiType.isCowImage(self.vdi_type): 

1492 VDI.validate(self, fast) 

1493 

1494 def _setChainRw(self) -> List[str]: 

1495 """ 

1496 Set the readonly LV and children writable. 

1497 It's needed because the coalesce can be done by tapdisk directly 

1498 and it will need to write parent information for children. 

1499 The VDI we want to coalesce into it's parent need to be writable for libqcow coalesce part. 

1500 Return a list of the LV that were previously readonly to be made RO again after the coalesce. 

1501 """ 

1502 was_ro = [] 

1503 if self.lvReadonly: 

1504 self.sr.lvmCache.setReadonly(self.fileName, False) 

1505 was_ro.append(self.fileName) 

1506 

1507 for child in self.children: 

1508 if child.lvReadonly: 

1509 self.sr.lvmCache.setReadonly(child.fileName, False) 

1510 was_ro.append(child.fileName) 

1511 

1512 return was_ro 

1513 

1514 def _setChainRo(self, was_ro: List[str]) -> None: 

1515 """Set the list of LV in parameters to readonly""" 

1516 for lvName in was_ro: 

1517 self.sr.lvmCache.setReadonly(lvName, True) 

1518 

1519 @override 

1520 def _doCoalesce(self) -> None: 

1521 """LVMVDI parents must first be activated, inflated, and made writable""" 

1522 was_ro = [] 

1523 try: 

1524 self._activateChain() 

1525 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1526 self.parent.validate() 

1527 self.inflateParentForCoalesce() 

1528 was_ro = self._setChainRw() 

1529 VDI._doCoalesce(self) 

1530 finally: 

1531 self.parent._loadInfoSizePhys() 

1532 self.parent.deflate() 

1533 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1534 self._setChainRo(was_ro) 

1535 

1536 @override 

1537 def _setParent(self, parent) -> None: 

1538 self._activate() 

1539 if self.lvReadonly: 

1540 self.sr.lvmCache.setReadonly(self.fileName, False) 

1541 

1542 try: 

1543 self.cowutil.setParent(self.path, parent.path, parent.vdi_type == VdiType.RAW) 

1544 finally: 

1545 if self.lvReadonly: 

1546 self.sr.lvmCache.setReadonly(self.fileName, True) 

1547 self._deactivate() 

1548 self.parent = parent 

1549 self.parentUuid = parent.uuid 

1550 parent.children.append(self) 

1551 try: 

1552 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1553 Util.log("Updated the VDI-parent field for child %s with %s" % \ 

1554 (self.uuid, self.parentUuid)) 

1555 except: 

1556 Util.log("Failed to update the VDI-parent with %s for child %s" % \ 

1557 (self.parentUuid, self.uuid)) 

1558 

1559 def _activate(self): 

1560 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1561 

1562 def _activateChain(self): 

1563 vdi = self 

1564 while vdi: 

1565 vdi._activate() 

1566 vdi = vdi.parent 

1567 

1568 def _deactivate(self): 

1569 self.sr.lvActivator.deactivate(self.uuid, False) 

1570 

1571 @override 

1572 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1573 "ensure the virtual size of 'self' is at least 'size'" 

1574 self._activate() 

1575 if VdiType.isCowImage(self.vdi_type): 

1576 VDI._increaseSizeVirt(self, size, atomic) 

1577 return 

1578 

1579 # raw VDI case 

1580 offset = self.sizeLV 

1581 if self.sizeVirt < size: 

1582 oldSize = self.sizeLV 

1583 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1584 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1585 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1586 offset = oldSize 

1587 unfinishedZero = False 

1588 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1589 if jval: 

1590 unfinishedZero = True 

1591 offset = int(jval) 

1592 length = self.sizeLV - offset 

1593 if not length: 

1594 return 

1595 

1596 if unfinishedZero: 

1597 Util.log(" ==> Redoing unfinished zeroing out") 

1598 else: 

1599 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1600 str(offset)) 

1601 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1602 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1603 func = lambda: util.zeroOut(self.path, offset, length) 

1604 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1605 VDI.POLL_INTERVAL, 0) 

1606 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1607 

1608 @override 

1609 def _setSizeVirt(self, size) -> None: 

1610 """WARNING: do not call this method directly unless all VDIs in the 

1611 subtree are guaranteed to be unplugged (and remain so for the duration 

1612 of the operation): this operation is only safe for offline COW images.""" 

1613 self._activate() 

1614 jFile = self.lvmcowutil.createResizeJournal(self.sr.lvmCache, self.uuid) 

1615 try: 

1616 self.lvmcowutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size, jFile) 

1617 finally: 

1618 self.lvmcowutil.destroyResizeJournal(self.sr.lvmCache, self.uuid) 

1619 

1620 @override 

1621 def _queryCowBlocks(self) -> bytes: 

1622 self._activate() 

1623 return VDI._queryCowBlocks(self) 

1624 

1625 @override 

1626 def _calcExtraSpaceForCoalescing(self) -> int: 

1627 if not VdiType.isCowImage(self.parent.vdi_type): 

1628 return 0 # raw parents are never deflated in the first place 

1629 sizeCoalesced = self.lvmcowutil.calcVolumeSize(self._getCoalescedSizeData()) 

1630 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1631 return sizeCoalesced - self.parent.sizeLV 

1632 

1633 @override 

1634 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1635 """How much extra space in the SR will be required to 

1636 [live-]leaf-coalesce this VDI""" 

1637 # we can deflate the leaf to minimize the space requirements 

1638 deflateDiff = self.sizeLV - lvutil.calcSizeLV(self.getSizePhys()) 

1639 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1640 

1641 @override 

1642 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1643 return self._calcExtraSpaceForCoalescing() + \ 

1644 lvutil.calcSizeLV(self.getSizePhys()) 

1645 

1646 

1647class LinstorVDI(VDI): 

1648 """Object representing a VDI in a LINSTOR SR""" 

1649 

1650 VOLUME_LOCK_TIMEOUT = 30 

1651 

1652 @override 

1653 def load(self, info=None) -> None: 

1654 self.parentUuid = info.parentUuid 

1655 self.scanError = True 

1656 self.parent = None 

1657 self.children = [] 

1658 

1659 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1660 self.path = self.sr._linstor.build_device_path(self.fileName) 

1661 self.linstorcowutil = LinstorCowUtil(self.sr.xapi.session, self.sr._linstor, info.vdiType) 

1662 

1663 if not info: 

1664 try: 

1665 info = self.linstorcowutil.get_info(self.uuid) 

1666 except util.SMException: 

1667 Util.log( 

1668 ' [VDI {}: failed to read COW image metadata]'.format(self.uuid) 

1669 ) 

1670 return 

1671 

1672 self.parentUuid = info.parentUuid 

1673 self.sizeVirt = info.sizeVirt 

1674 self._sizePhys = -1 

1675 self._sizeAllocated = -1 

1676 self.drbd_size = -1 

1677 self._hidden = info.hidden 

1678 self.scanError = False 

1679 

1680 @override 

1681 def getSizePhys(self, fetch=False) -> int: 

1682 if self._sizePhys < 0 or fetch: 

1683 self._sizePhys = self.linstorcowutil.get_size_phys(self.uuid) 

1684 return self._sizePhys 

1685 

1686 def getDrbdSize(self, fetch=False): 

1687 if self.drbd_size < 0 or fetch: 

1688 self.drbd_size = self.linstorcowutil.get_drbd_size(self.uuid) 

1689 return self.drbd_size 

1690 

1691 @override 

1692 def getAllocatedSize(self) -> int: 

1693 if self._sizeAllocated == -1: 

1694 if VdiType.isCowImage(self.vdi_type): 

1695 self._sizeAllocated = self.linstorcowutil.get_allocated_size(self.uuid) 

1696 return self._sizeAllocated 

1697 

1698 def inflate(self, size): 

1699 if not VdiType.isCowImage(self.vdi_type): 

1700 return 

1701 self.sr.lock() 

1702 try: 

1703 # Ensure we use the real DRBD size and not the cached one. 

1704 # Why? Because this attribute can be changed if volume is resized by user. 

1705 self.drbd_size = self.getDrbdSize(fetch=True) 

1706 self.linstorcowutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1707 finally: 

1708 self.sr.unlock() 

1709 self.drbd_size = -1 

1710 self._sizePhys = -1 

1711 self._sizeAllocated = -1 

1712 

1713 def deflate(self): 

1714 if not VdiType.isCowImage(self.vdi_type): 

1715 return 

1716 self.sr.lock() 

1717 try: 

1718 # Ensure we use the real sizes and not the cached info. 

1719 self.drbd_size = self.getDrbdSize(fetch=True) 

1720 self._sizePhys = self.getSizePhys(fetch=True) 

1721 self.linstorcowutil.force_deflate(self.path, self._sizePhys, self.drbd_size, zeroize=False) 

1722 finally: 

1723 self.sr.unlock() 

1724 self.drbd_size = -1 

1725 self._sizePhys = -1 

1726 self._sizeAllocated = -1 

1727 

1728 def inflateFully(self): 

1729 if VdiType.isCowImage(self.vdi_type): 

1730 self.inflate(self.linstorcowutil.compute_volume_size(self.sizeVirt)) 

1731 

1732 @override 

1733 def rename(self, uuid) -> None: 

1734 Util.log('Renaming {} -> {} (path={})'.format( 

1735 self.uuid, uuid, self.path 

1736 )) 

1737 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1738 VDI.rename(self, uuid) 

1739 

1740 @override 

1741 def delete(self) -> None: 

1742 if len(self.children) > 0: 

1743 raise util.SMException( 

1744 'VDI {} has children, can\'t delete'.format(self.uuid) 

1745 ) 

1746 self.sr.lock() 

1747 try: 

1748 self.sr._linstor.destroy_volume(self.uuid) 

1749 self.sr.forgetVDI(self.uuid) 

1750 finally: 

1751 self.sr.unlock() 

1752 VDI.delete(self) 

1753 

1754 @override 

1755 def validate(self, fast=False) -> None: 

1756 if VdiType.isCowImage(self.vdi_type) and self.linstorcowutil.check(self.uuid, fast=fast) != CowUtil.CheckResult.Success: 

1757 raise util.SMException('COW image {} corrupted'.format(self)) 

1758 

1759 @override 

1760 def pause(self, failfast=False) -> None: 

1761 self.sr._linstor.ensure_volume_is_not_locked( 

1762 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1763 ) 

1764 return super(LinstorVDI, self).pause(failfast) 

1765 

1766 @override 

1767 def coalesce(self) -> int: 

1768 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1769 # Using another exception we can't execute the next coalesce calls. 

1770 return self.linstorcowutil.force_coalesce(self.path) 

1771 

1772 @override 

1773 def getParent(self) -> str: 

1774 return self.linstorcowutil.get_parent( 

1775 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1776 ) 

1777 

1778 @override 

1779 def repair(self, parent_uuid) -> None: 

1780 self.linstorcowutil.force_repair( 

1781 self.sr._linstor.get_device_path(parent_uuid) 

1782 ) 

1783 

1784 @override 

1785 def _relinkSkip(self) -> None: 

1786 abortFlag = IPCFlag(self.sr.uuid) 

1787 for child in self.children: 

1788 if abortFlag.test(FLAG_TYPE_ABORT): 

1789 raise AbortException('Aborting due to signal') 

1790 Util.log( 

1791 ' Relinking {} from {} to {}'.format( 

1792 child, self, self.parent 

1793 ) 

1794 ) 

1795 

1796 session = child.sr.xapi.session 

1797 sr_uuid = child.sr.uuid 

1798 vdi_uuid = child.uuid 

1799 try: 

1800 self.sr._linstor.ensure_volume_is_not_locked( 

1801 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1802 ) 

1803 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1804 child._setParent(self.parent) 

1805 finally: 

1806 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1807 self.children = [] 

1808 

1809 @override 

1810 def _setParent(self, parent) -> None: 

1811 self.sr._linstor.get_device_path(self.uuid) 

1812 self.linstorcowutil.force_parent(self.path, parent.path) 

1813 self.parent = parent 

1814 self.parentUuid = parent.uuid 

1815 parent.children.append(self) 

1816 try: 

1817 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1818 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1819 (self.uuid, self.parentUuid)) 

1820 except: 

1821 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1822 (self.uuid, self.parentUuid)) 

1823 

1824 @override 

1825 def _doCoalesce(self) -> None: 

1826 try: 

1827 self._activateChain() 

1828 self.parent.validate() 

1829 self._inflateParentForCoalesce() 

1830 VDI._doCoalesce(self) 

1831 finally: 

1832 self.parent.deflate() 

1833 

1834 def _activateChain(self): 

1835 vdi = self 

1836 while vdi: 

1837 try: 

1838 p = self.sr._linstor.get_device_path(vdi.uuid) 

1839 except Exception as e: 

1840 # Use SMException to skip coalesce. 

1841 # Otherwise the GC is stopped... 

1842 raise util.SMException(str(e)) 

1843 vdi = vdi.parent 

1844 

1845 @override 

1846 def _setHidden(self, hidden=True) -> None: 

1847 HIDDEN_TAG = 'hidden' 

1848 

1849 if not VdiType.isCowImage(self.vdi_type): 

1850 self._hidden = None 

1851 self.sr._linstor.update_volume_metadata(self.uuid, { 

1852 HIDDEN_TAG: hidden 

1853 }) 

1854 self._hidden = hidden 

1855 else: 

1856 VDI._setHidden(self, hidden) 

1857 

1858 @override 

1859 def _increaseSizeVirt(self, size, atomic=True): 

1860 if self.vdi_type == VdiType.RAW: 

1861 offset = self.drbd_size 

1862 if self.sizeVirt < size: 

1863 oldSize = self.drbd_size 

1864 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) 

1865 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) 

1866 self.sr._linstor.resize_volume(self.uuid, self.drbd_size) 

1867 offset = oldSize 

1868 unfinishedZero = False 

1869 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) 

1870 if jval: 

1871 unfinishedZero = True 

1872 offset = int(jval) 

1873 length = self.drbd_size - offset 

1874 if not length: 

1875 return 

1876 

1877 if unfinishedZero: 

1878 Util.log(" ==> Redoing unfinished zeroing out") 

1879 else: 

1880 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) 

1881 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1882 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1883 func = lambda: util.zeroOut(self.path, offset, length) 

1884 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

1885 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) 

1886 return 

1887 

1888 if self.sizeVirt >= size: 

1889 return 

1890 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1891 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1892 

1893 msize = self.linstorcowutil.get_max_resize_size(self.uuid) * 1024 * 1024 

1894 if (size <= msize): 

1895 self.linstorcowutil.set_size_virt_fast(self.path, size) 

1896 else: 

1897 if atomic: 

1898 vdiList = self._getAllSubtree() 

1899 self.sr.lock() 

1900 try: 

1901 self.sr.pauseVDIs(vdiList) 

1902 try: 

1903 self._setSizeVirt(size) 

1904 finally: 

1905 self.sr.unpauseVDIs(vdiList) 

1906 finally: 

1907 self.sr.unlock() 

1908 else: 

1909 self._setSizeVirt(size) 

1910 

1911 self.sizeVirt = self.linstorcowutil.get_size_virt(self.uuid) 

1912 

1913 @override 

1914 def _setSizeVirt(self, size) -> None: 

1915 jfile = self.uuid + '-jvhd' 

1916 self.sr._linstor.create_volume( 

1917 jfile, self.cowutil.getResizeJournalSize(), persistent=False, volume_name=jfile 

1918 ) 

1919 try: 

1920 self.inflate(self.linstorcowutil.compute_volume_size(size)) 

1921 self.linstorcowutil.set_size_virt(self.path, size, jfile) 

1922 finally: 

1923 try: 

1924 self.sr._linstor.destroy_volume(jfile) 

1925 except Exception: 

1926 # We can ignore it, in any case this volume is not persistent. 

1927 pass 

1928 

1929 @override 

1930 def _queryCowBlocks(self) -> bytes: 

1931 return self.linstorcowutil.get_block_bitmap(self.uuid) 

1932 

1933 def _inflateParentForCoalesce(self): 

1934 if not VdiType.isCowImage(self.parent.vdi_type): 

1935 return 

1936 inc = self._calcExtraSpaceForCoalescing() 

1937 if inc > 0: 

1938 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1939 

1940 @override 

1941 def _calcExtraSpaceForCoalescing(self) -> int: 

1942 if not VdiType.isCowImage(self.parent.vdi_type): 

1943 return 0 

1944 size_coalesced = self.linstorcowutil.compute_volume_size(self._getCoalescedSizeData()) 

1945 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1946 return size_coalesced - self.parent.getDrbdSize() 

1947 

1948 @override 

1949 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1950 assert self.getDrbdSize() > 0 

1951 assert self.getSizePhys() > 0 

1952 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1953 assert deflate_diff >= 0 

1954 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1955 

1956 @override 

1957 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1958 assert self.getSizePhys() > 0 

1959 return self._calcExtraSpaceForCoalescing() + \ 

1960 LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1961 

1962################################################################################ 

1963# 

1964# SR 

1965# 

1966class SR(object): 

1967 class LogFilter: 

1968 def __init__(self, sr): 

1969 self.sr = sr 

1970 self.stateLogged = False 

1971 self.prevState = {} 

1972 self.currState = {} 

1973 

1974 def logState(self): 

1975 changes = "" 

1976 self.currState.clear() 

1977 for vdi in self.sr.vdiTrees: 

1978 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1979 if not self.prevState.get(vdi.uuid) or \ 

1980 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1981 changes += self.currState[vdi.uuid] 

1982 

1983 for uuid in self.prevState: 

1984 if not self.currState.get(uuid): 

1985 changes += "Tree %s gone\n" % uuid 

1986 

1987 result = "SR %s (%d VDIs in %d COW trees): " % \ 

1988 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1989 

1990 if len(changes) > 0: 

1991 if self.stateLogged: 

1992 result += "showing only COW trees that changed:" 

1993 result += "\n%s" % changes 

1994 else: 

1995 result += "no changes" 

1996 

1997 for line in result.split("\n"): 

1998 Util.log("%s" % line) 

1999 self.prevState.clear() 

2000 for key, val in self.currState.items(): 

2001 self.prevState[key] = val 

2002 self.stateLogged = True 

2003 

2004 def logNewVDI(self, uuid): 

2005 if self.stateLogged: 

2006 Util.log("Found new VDI when scanning: %s" % uuid) 

2007 

2008 def _getTreeStr(self, vdi, indent=8): 

2009 treeStr = "%s%s\n" % (" " * indent, vdi) 

2010 for child in vdi.children: 

2011 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

2012 return treeStr 

2013 

2014 TYPE_FILE = "file" 

2015 TYPE_LVHD = "lvhd" 

2016 TYPE_LINSTOR = "linstor" 

2017 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

2018 

2019 LOCK_RETRY_INTERVAL = 3 

2020 LOCK_RETRY_ATTEMPTS = 20 

2021 LOCK_RETRY_ATTEMPTS_LOCK = 100 

2022 

2023 SCAN_RETRY_ATTEMPTS = 3 

2024 

2025 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

2026 TMP_RENAME_PREFIX = "OLD_" 

2027 

2028 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

2029 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

2030 

2031 @staticmethod 

2032 def getInstance(uuid, xapiSession, createLock=True, force=False): 

2033 xapi = XAPI(xapiSession, uuid) 

2034 type = normalizeType(xapi.srRecord["type"]) 

2035 if type == SR.TYPE_FILE: 

2036 return FileSR(uuid, xapi, createLock, force) 

2037 elif type == SR.TYPE_LVHD: 

2038 return LVMSR(uuid, xapi, createLock, force) 

2039 elif type == SR.TYPE_LINSTOR: 

2040 return LinstorSR(uuid, xapi, createLock, force) 

2041 raise util.SMException("SR type %s not recognized" % type) 

2042 

2043 def __init__(self, uuid, xapi, createLock, force): 

2044 self.logFilter = self.LogFilter(self) 

2045 self.uuid = uuid 

2046 self.path = "" 

2047 self.name = "" 

2048 self.vdis = {} 

2049 self.vdiTrees = [] 

2050 self.journaler = None 

2051 self.xapi = xapi 

2052 self._locked = 0 

2053 self._srLock = None 

2054 if createLock: 2054 ↛ 2055line 2054 didn't jump to line 2055, because the condition on line 2054 was never true

2055 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, self.uuid) 

2056 else: 

2057 Util.log("Requested no SR locking") 

2058 self.name = self.xapi.srRecord["name_label"] 

2059 self._failedCoalesceTargets = [] 

2060 

2061 if not self.xapi.isPluggedHere(): 

2062 if force: 2062 ↛ 2063line 2062 didn't jump to line 2063, because the condition on line 2062 was never true

2063 Util.log("SR %s not attached on this host, ignoring" % uuid) 

2064 else: 

2065 if not self.wait_for_plug(): 

2066 raise util.SMException("SR %s not attached on this host" % uuid) 

2067 

2068 if force: 2068 ↛ 2069line 2068 didn't jump to line 2069, because the condition on line 2068 was never true

2069 Util.log("Not checking if we are Master (SR %s)" % uuid) 

2070 elif not self.xapi.isMaster(): 2070 ↛ 2071line 2070 didn't jump to line 2071, because the condition on line 2070 was never true

2071 raise util.SMException("This host is NOT master, will not run") 

2072 

2073 self.no_space_candidates = {} 

2074 

2075 def msg_cleared(self, xapi_session, msg_ref): 

2076 try: 

2077 msg = xapi_session.xenapi.message.get_record(msg_ref) 

2078 except XenAPI.Failure: 

2079 return True 

2080 

2081 return msg is None 

2082 

2083 def check_no_space_candidates(self): 

2084 xapi_session = self.xapi.getSession() 

2085 

2086 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE) 

2087 if self.no_space_candidates: 

2088 if msg_id is None or self.msg_cleared(xapi_session, msg_id): 

2089 util.SMlog("Could not coalesce due to a lack of space " 

2090 f"in SR {self.uuid}") 

2091 msg_body = ("Unable to perform data coalesce due to a lack " 

2092 f"of space in SR {self.uuid}") 

2093 msg_id = xapi_session.xenapi.message.create( 

2094 'SM_GC_NO_SPACE', 

2095 3, 

2096 "SR", 

2097 self.uuid, 

2098 msg_body) 

2099 xapi_session.xenapi.SR.remove_from_sm_config( 

2100 self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2101 xapi_session.xenapi.SR.add_to_sm_config( 

2102 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id) 

2103 

2104 for candidate in self.no_space_candidates.values(): 

2105 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id) 

2106 elif msg_id is not None: 

2107 # Everything was coalescable, remove the message 

2108 xapi_session.xenapi.message.destroy(msg_id) 

2109 

2110 def clear_no_space_msg(self, vdi): 

2111 msg_id = None 

2112 try: 

2113 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE) 

2114 except XenAPI.Failure: 

2115 pass 

2116 

2117 self.no_space_candidates.pop(vdi.uuid, None) 

2118 if msg_id is not None: 2118 ↛ exitline 2118 didn't return from function 'clear_no_space_msg', because the condition on line 2118 was never false

2119 vdi.delConfig(VDI.DB_GC_NO_SPACE) 

2120 

2121 

2122 def wait_for_plug(self): 

2123 for _ in range(1, 10): 

2124 time.sleep(2) 

2125 if self.xapi.isPluggedHere(): 

2126 return True 

2127 return False 

2128 

2129 def gcEnabled(self, refresh=True): 

2130 if refresh: 

2131 self.xapi.srRecord = \ 

2132 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

2133 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

2134 Util.log("GC is disabled for this SR, abort") 

2135 return False 

2136 return True 

2137 

2138 def scan(self, force=False) -> None: 

2139 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

2140 update VDI objects if they already exist""" 

2141 pass 

2142 

2143 def scanLocked(self, force=False): 

2144 self.lock() 

2145 try: 

2146 self.scan(force) 

2147 finally: 

2148 self.unlock() 

2149 

2150 def getVDI(self, uuid): 

2151 return self.vdis.get(uuid) 

2152 

2153 def hasWork(self): 

2154 if len(self.findGarbage()) > 0: 

2155 return True 

2156 if self.findCoalesceable(): 

2157 return True 

2158 if self.findLeafCoalesceable(): 

2159 return True 

2160 if self.needUpdateBlockInfo(): 

2161 return True 

2162 return False 

2163 

2164 def findCoalesceable(self): 

2165 """Find a coalesceable VDI. Return a vdi that should be coalesced 

2166 (choosing one among all coalesceable candidates according to some 

2167 criteria) or None if there is no VDI that could be coalesced""" 

2168 

2169 candidates = [] 

2170 

2171 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

2172 if srSwitch == "false": 

2173 Util.log("Coalesce disabled for this SR") 

2174 return candidates 

2175 

2176 # finish any VDI for which a relink journal entry exists first 

2177 journals = self.journaler.getAll(VDI.JRN_RELINK) 

2178 for uuid in journals: 

2179 vdi = self.getVDI(uuid) 

2180 if vdi and vdi not in self._failedCoalesceTargets: 

2181 return vdi 

2182 

2183 for vdi in self.vdis.values(): 

2184 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

2185 candidates.append(vdi) 

2186 Util.log("%s is coalescable" % vdi.uuid) 

2187 

2188 self.xapi.update_task_progress("coalescable", len(candidates)) 

2189 

2190 # pick one in the tallest tree 

2191 treeHeight = dict() 

2192 for c in candidates: 

2193 height = c.getTreeRoot().getTreeHeight() 

2194 if treeHeight.get(height): 

2195 treeHeight[height].append(c) 

2196 else: 

2197 treeHeight[height] = [c] 

2198 

2199 freeSpace = self.getFreeSpace() 

2200 heights = list(treeHeight.keys()) 

2201 heights.sort(reverse=True) 

2202 for h in heights: 

2203 for c in treeHeight[h]: 

2204 spaceNeeded = c._calcExtraSpaceForCoalescing() 

2205 if spaceNeeded <= freeSpace: 

2206 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

2207 self.clear_no_space_msg(c) 

2208 return c 

2209 else: 

2210 self.no_space_candidates[c.uuid] = c 

2211 Util.log("No space to coalesce %s (free space: %d)" % \ 

2212 (c, freeSpace)) 

2213 return None 

2214 

2215 def getSwitch(self, key): 

2216 return self.xapi.srRecord["other_config"].get(key) 

2217 

2218 def forbiddenBySwitch(self, switch, condition, fail_msg): 

2219 srSwitch = self.getSwitch(switch) 

2220 ret = False 

2221 if srSwitch: 

2222 ret = srSwitch == condition 

2223 

2224 if ret: 

2225 Util.log(fail_msg) 

2226 

2227 return ret 

2228 

2229 def leafCoalesceForbidden(self): 

2230 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

2231 "false", 

2232 "Coalesce disabled for this SR") or 

2233 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

2234 VDI.LEAFCLSC_DISABLED, 

2235 "Leaf-coalesce disabled for this SR")) 

2236 

2237 def findLeafCoalesceable(self): 

2238 """Find leaf-coalesceable VDIs in each COW tree""" 

2239 

2240 candidates = [] 

2241 if self.leafCoalesceForbidden(): 

2242 return candidates 

2243 

2244 self.gatherLeafCoalesceable(candidates) 

2245 

2246 self.xapi.update_task_progress("coalescable", len(candidates)) 

2247 

2248 freeSpace = self.getFreeSpace() 

2249 for candidate in candidates: 

2250 # check the space constraints to see if leaf-coalesce is actually 

2251 # feasible for this candidate 

2252 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

2253 spaceNeededLive = spaceNeeded 

2254 if spaceNeeded > freeSpace: 

2255 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

2256 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

2257 spaceNeeded = spaceNeededLive 

2258 

2259 if spaceNeeded <= freeSpace: 

2260 Util.log("Leaf-coalesce candidate: %s" % candidate) 

2261 self.clear_no_space_msg(candidate) 

2262 return candidate 

2263 else: 

2264 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

2265 (candidate, freeSpace)) 

2266 if spaceNeededLive <= freeSpace: 

2267 Util.log("...but enough space if skip snap-coalesce") 

2268 candidate.setConfig(VDI.DB_LEAFCLSC, 

2269 VDI.LEAFCLSC_OFFLINE) 

2270 self.no_space_candidates[candidate.uuid] = candidate 

2271 

2272 return None 

2273 

2274 def gatherLeafCoalesceable(self, candidates): 

2275 for vdi in self.vdis.values(): 

2276 if not vdi.isLeafCoalesceable(): 

2277 continue 

2278 if vdi in self._failedCoalesceTargets: 

2279 continue 

2280 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2281 Util.log("Skipping reset-on-boot %s" % vdi) 

2282 continue 

2283 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2284 Util.log("Skipping allow_caching=true %s" % vdi) 

2285 continue 

2286 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2287 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2288 continue 

2289 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2290 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2291 continue 

2292 candidates.append(vdi) 

2293 

2294 def coalesce(self, vdi, dryRun=False): 

2295 """Coalesce vdi onto parent""" 

2296 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2297 if dryRun: 2297 ↛ 2298line 2297 didn't jump to line 2298, because the condition on line 2297 was never true

2298 return 

2299 

2300 try: 

2301 self._coalesce(vdi) 

2302 except util.SMException as e: 

2303 if isinstance(e, AbortException): 2303 ↛ 2304line 2303 didn't jump to line 2304, because the condition on line 2303 was never true

2304 self.cleanup() 

2305 raise 

2306 else: 

2307 self._failedCoalesceTargets.append(vdi) 

2308 Util.logException("coalesce") 

2309 Util.log("Coalesce failed, skipping") 

2310 self.cleanup() 

2311 

2312 def coalesceLeaf(self, vdi, dryRun=False): 

2313 """Leaf-coalesce vdi onto parent""" 

2314 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2315 if dryRun: 

2316 return 

2317 

2318 try: 

2319 uuid = vdi.uuid 

2320 try: 

2321 # "vdi" object will no longer be valid after this call 

2322 self._coalesceLeaf(vdi) 

2323 finally: 

2324 vdi = self.getVDI(uuid) 

2325 if vdi: 

2326 vdi.delConfig(vdi.DB_LEAFCLSC) 

2327 except AbortException: 

2328 self.cleanup() 

2329 raise 

2330 except (util.SMException, XenAPI.Failure) as e: 

2331 self._failedCoalesceTargets.append(vdi) 

2332 Util.logException("leaf-coalesce") 

2333 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2334 self.cleanup() 

2335 

2336 def garbageCollect(self, dryRun=False): 

2337 vdiList = self.findGarbage() 

2338 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2339 for vdi in vdiList: 

2340 Util.log(" %s" % vdi) 

2341 if not dryRun: 

2342 self.deleteVDIs(vdiList) 

2343 self.cleanupJournals(dryRun) 

2344 

2345 def findGarbage(self): 

2346 vdiList = [] 

2347 for vdi in self.vdiTrees: 

2348 vdiList.extend(vdi.getAllPrunable()) 

2349 return vdiList 

2350 

2351 def deleteVDIs(self, vdiList) -> None: 

2352 for vdi in vdiList: 

2353 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2354 raise AbortException("Aborting due to signal") 

2355 Util.log("Deleting unlinked VDI %s" % vdi) 

2356 self.deleteVDI(vdi) 

2357 

2358 def deleteVDI(self, vdi) -> None: 

2359 assert(len(vdi.children) == 0) 

2360 del self.vdis[vdi.uuid] 

2361 if vdi.parent: 2361 ↛ 2363line 2361 didn't jump to line 2363, because the condition on line 2361 was never false

2362 vdi.parent.children.remove(vdi) 

2363 if vdi in self.vdiTrees: 2363 ↛ 2364line 2363 didn't jump to line 2364, because the condition on line 2363 was never true

2364 self.vdiTrees.remove(vdi) 

2365 vdi.delete() 

2366 

2367 def forgetVDI(self, vdiUuid) -> None: 

2368 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2369 

2370 def pauseVDIs(self, vdiList) -> None: 

2371 paused = [] 

2372 failed = False 

2373 for vdi in vdiList: 

2374 try: 

2375 vdi.pause() 

2376 paused.append(vdi) 

2377 except: 

2378 Util.logException("pauseVDIs") 

2379 failed = True 

2380 break 

2381 

2382 if failed: 

2383 self.unpauseVDIs(paused) 

2384 raise util.SMException("Failed to pause VDIs") 

2385 

2386 def unpauseVDIs(self, vdiList): 

2387 failed = False 

2388 for vdi in vdiList: 

2389 try: 

2390 vdi.unpause() 

2391 except: 

2392 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2393 failed = True 

2394 if failed: 

2395 raise util.SMException("Failed to unpause VDIs") 

2396 

2397 def getFreeSpace(self) -> int: 

2398 return 0 

2399 

2400 def cleanup(self): 

2401 Util.log("In cleanup") 

2402 return 

2403 

2404 @override 

2405 def __str__(self) -> str: 

2406 if self.name: 

2407 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2408 else: 

2409 ret = "%s" % self.uuid 

2410 return ret 

2411 

2412 def lock(self): 

2413 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2414 signal to avoid deadlocking (trying to acquire the SR lock while the 

2415 lock is held by a process that is trying to abort us)""" 

2416 if not self._srLock: 

2417 return 

2418 

2419 if self._locked == 0: 

2420 abortFlag = IPCFlag(self.uuid) 

2421 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2422 if self._srLock.acquireNoblock(): 

2423 self._locked += 1 

2424 return 

2425 if abortFlag.test(FLAG_TYPE_ABORT): 

2426 raise AbortException("Abort requested") 

2427 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2428 raise util.SMException("Unable to acquire the SR lock") 

2429 

2430 self._locked += 1 

2431 

2432 def unlock(self): 

2433 if not self._srLock: 2433 ↛ 2435line 2433 didn't jump to line 2435, because the condition on line 2433 was never false

2434 return 

2435 assert(self._locked > 0) 

2436 self._locked -= 1 

2437 if self._locked == 0: 

2438 self._srLock.release() 

2439 

2440 def needUpdateBlockInfo(self) -> bool: 

2441 for vdi in self.vdis.values(): 

2442 if vdi.scanError or len(vdi.children) == 0: 

2443 continue 

2444 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2445 return True 

2446 return False 

2447 

2448 def updateBlockInfo(self) -> None: 

2449 for vdi in self.vdis.values(): 

2450 if vdi.scanError or len(vdi.children) == 0: 

2451 continue 

2452 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2453 vdi.updateBlockInfo() 

2454 

2455 def cleanupCoalesceJournals(self): 

2456 """Remove stale coalesce VDI indicators""" 

2457 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2458 for uuid, jval in entries.items(): 

2459 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2460 

2461 def cleanupJournals(self, dryRun=False): 

2462 """delete journal entries for non-existing VDIs""" 

2463 for t in [LVMVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2464 entries = self.journaler.getAll(t) 

2465 for uuid, jval in entries.items(): 

2466 if self.getVDI(uuid): 

2467 continue 

2468 if t == SR.JRN_CLONE: 

2469 baseUuid, clonUuid = jval.split("_") 

2470 if self.getVDI(baseUuid): 

2471 continue 

2472 Util.log(" Deleting stale '%s' journal entry for %s " 

2473 "(%s)" % (t, uuid, jval)) 

2474 if not dryRun: 

2475 self.journaler.remove(t, uuid) 

2476 

2477 def cleanupCache(self, maxAge=-1) -> int: 

2478 return 0 

2479 

2480 def _hasLeavesAttachedOn(self, vdi: VDI): 

2481 leaves = vdi.getAllLeaves() 

2482 leaves_vdi = [leaf.uuid for leaf in leaves] 

2483 return util.get_hosts_attached_on(self.xapi.session, leaves_vdi) 

2484 

2485 def _gc_running_file(self, vdi: VDI): 

2486 run_file = "gc_running_{}".format(vdi.uuid) 

2487 return os.path.join(NON_PERSISTENT_DIR, str(self.uuid), run_file) 

2488 

2489 def _create_running_file(self, vdi: VDI): 

2490 with open(self._gc_running_file(vdi), "w") as f: 

2491 f.write("1") 

2492 

2493 def _delete_running_file(self, vdi: VDI): 

2494 os.unlink(self._gc_running_file(vdi)) 

2495 

2496 def _coalesce(self, vdi: VDI): 

2497 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2497 ↛ 2500line 2497 didn't jump to line 2500, because the condition on line 2497 was never true

2498 # this means we had done the actual coalescing already and just 

2499 # need to finish relinking and/or refreshing the children 

2500 Util.log("==> Coalesce apparently already done: skipping") 

2501 else: 

2502 # JRN_COALESCE is used to check which VDI is being coalesced in 

2503 # order to decide whether to abort the coalesce. We remove the 

2504 # journal as soon as the COW coalesce step is done, because we 

2505 # don't expect the rest of the process to take long 

2506 

2507 if os.path.exists(self._gc_running_file(vdi)): 2507 ↛ 2508line 2507 didn't jump to line 2508, because the condition on line 2507 was never true

2508 util.SMlog("gc_running already exist for {}. Ignoring...".format(self.uuid)) 

2509 

2510 self._create_running_file(vdi) 

2511 

2512 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2513 host_refs = self._hasLeavesAttachedOn(vdi) 

2514 #TODO: this check of multiple host_refs should be done earlier in `is_coalesceable` to avoid stopping this late every time 

2515 if len(host_refs) > 1: 2515 ↛ 2516line 2515 didn't jump to line 2516, because the condition on line 2515 was never true

2516 util.SMlog("Not coalesceable, chain activated more than once") 

2517 raise Exception("Not coalesceable, chain activated more than once") #TODO: Use correct error 

2518 

2519 try: 

2520 if host_refs and vdi.cowutil.isCoalesceableOnRemote(): 2520 ↛ 2522line 2520 didn't jump to line 2522, because the condition on line 2520 was never true

2521 #Leaf opened on another host, we need to call online coalesce 

2522 util.SMlog("Remote coalesce for {}".format(vdi.path)) 

2523 vdi._doCoalesceOnHost(list(host_refs)[0]) 

2524 else: 

2525 util.SMlog("Offline coalesce for {}".format(vdi.path)) 

2526 vdi._doCoalesce() 

2527 except Exception as e: 

2528 util.SMlog("EXCEPTION while coalescing: {}".format(e)) 

2529 self._delete_running_file(vdi) 

2530 raise 

2531 

2532 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2533 self._delete_running_file(vdi) 

2534 

2535 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2536 

2537 # we now need to relink the children: lock the SR to prevent ops 

2538 # like SM.clone from manipulating the VDIs we'll be relinking and 

2539 # rescan the SR first in case the children changed since the last 

2540 # scan 

2541 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2542 

2543 self.lock() 

2544 try: 

2545 vdi.parent._tagChildrenForRelink() 

2546 self.scan() 

2547 vdi._relinkSkip() #TODO: We could check if the parent is already the right one before doing the relink, or we could do the relink a second time, it doesn't seem to cause issues 

2548 finally: 

2549 self.unlock() 

2550 # Reload the children to leave things consistent 

2551 vdi.parent._reloadChildren(vdi) 

2552 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2553 

2554 self.deleteVDI(vdi) 

2555 

2556 class CoalesceTracker: 

2557 GRACE_ITERATIONS = 2 

2558 MAX_ITERATIONS_NO_PROGRESS = 3 

2559 MAX_ITERATIONS = 10 

2560 MAX_INCREASE_FROM_MINIMUM = 1.2 

2561 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2562 " --> Final size {finSize}" 

2563 

2564 def __init__(self, sr): 

2565 self.itsNoProgress = 0 

2566 self.its = 0 

2567 self.minSize = float("inf") 

2568 self.history = [] 

2569 self.reason = "" 

2570 self.startSize = None 

2571 self.finishSize = None 

2572 self.sr = sr 

2573 self.grace_remaining = self.GRACE_ITERATIONS 

2574 

2575 def abortCoalesce(self, prevSize, curSize): 

2576 self.its += 1 

2577 self.history.append(self.HISTORY_STRING.format(its=self.its, 

2578 initSize=prevSize, 

2579 finSize=curSize)) 

2580 

2581 self.finishSize = curSize 

2582 

2583 if self.startSize is None: 

2584 self.startSize = prevSize 

2585 

2586 if curSize < self.minSize: 

2587 self.minSize = curSize 

2588 

2589 if prevSize < self.minSize: 

2590 self.minSize = prevSize 

2591 

2592 if self.its == 1: 

2593 # Skip evaluating conditions on first iteration 

2594 return False 

2595 

2596 if prevSize < curSize: 

2597 self.itsNoProgress += 1 

2598 Util.log("No progress, attempt:" 

2599 " {attempt}".format(attempt=self.itsNoProgress)) 

2600 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2601 else: 

2602 # We made progress 

2603 return False 

2604 

2605 if self.its > self.MAX_ITERATIONS: 

2606 max = self.MAX_ITERATIONS 

2607 self.reason = \ 

2608 "Max iterations ({max}) exceeded".format(max=max) 

2609 return True 

2610 

2611 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS: 

2612 max = self.MAX_ITERATIONS_NO_PROGRESS 

2613 self.reason = \ 

2614 "No progress made for {max} iterations".format(max=max) 

2615 return True 

2616 

2617 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2618 if curSize > maxSizeFromMin: 

2619 self.grace_remaining -= 1 

2620 if self.grace_remaining == 0: 

2621 self.reason = "Unexpected bump in size," \ 

2622 " compared to minimum achieved" 

2623 

2624 return True 

2625 

2626 return False 

2627 

2628 def printSizes(self): 

2629 Util.log("Starting size was {size}" 

2630 .format(size=self.startSize)) 

2631 Util.log("Final size was {size}" 

2632 .format(size=self.finishSize)) 

2633 Util.log("Minimum size achieved was {size}" 

2634 .format(size=self.minSize)) 

2635 

2636 def printReasoning(self): 

2637 Util.log("Aborted coalesce") 

2638 for hist in self.history: 

2639 Util.log(hist) 

2640 Util.log(self.reason) 

2641 self.printSizes() 

2642 

2643 def printSummary(self): 

2644 if self.its == 0: 

2645 return 

2646 

2647 if self.reason: 2647 ↛ 2648line 2647 didn't jump to line 2648, because the condition on line 2647 was never true

2648 Util.log("Aborted coalesce") 

2649 Util.log(self.reason) 

2650 else: 

2651 Util.log("Coalesce summary") 

2652 

2653 Util.log(f"Performed {self.its} iterations") 

2654 self.printSizes() 

2655 

2656 

2657 def _coalesceLeaf(self, vdi): 

2658 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2659 complete due to external changes, namely vdi_delete and vdi_snapshot 

2660 that alter leaf-coalescibility of vdi""" 

2661 tracker = self.CoalesceTracker(self) 

2662 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2663 prevSizePhys = vdi.getSizePhys() 

2664 if not self._snapshotCoalesce(vdi): 2664 ↛ 2665line 2664 didn't jump to line 2665, because the condition on line 2664 was never true

2665 return False 

2666 if tracker.abortCoalesce(prevSizePhys, vdi.getSizePhys()): 

2667 tracker.printReasoning() 

2668 raise util.SMException("VDI {uuid} could not be coalesced" 

2669 .format(uuid=vdi.uuid)) 

2670 tracker.printSummary() 

2671 return self._liveLeafCoalesce(vdi) 

2672 

2673 def calcStorageSpeed(self, startTime, endTime, coalescedSize): 

2674 speed = None 

2675 total_time = endTime - startTime 

2676 if total_time > 0: 

2677 speed = float(coalescedSize) / float(total_time) 

2678 return speed 

2679 

2680 def writeSpeedToFile(self, speed): 

2681 content = [] 

2682 speedFile = None 

2683 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2684 self.lock() 

2685 try: 

2686 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2687 lines = "" 

2688 if not os.path.isfile(path): 

2689 lines = str(speed) + "\n" 

2690 else: 

2691 speedFile = open(path, "r+") 

2692 content = speedFile.readlines() 

2693 content.append(str(speed) + "\n") 

2694 if len(content) > N_RUNNING_AVERAGE: 

2695 del content[0] 

2696 lines = "".join(content) 

2697 

2698 util.atomicFileWrite(path, VAR_RUN, lines) 

2699 finally: 

2700 if speedFile is not None: 

2701 speedFile.close() 

2702 Util.log("Closing file: {myfile}".format(myfile=path)) 

2703 self.unlock() 

2704 

2705 def recordStorageSpeed(self, startTime, endTime, coalescedSize): 

2706 speed = self.calcStorageSpeed(startTime, endTime, coalescedSize) 

2707 if speed is None: 

2708 return 

2709 

2710 self.writeSpeedToFile(speed) 

2711 

2712 def getStorageSpeed(self): 

2713 speedFile = None 

2714 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2715 self.lock() 

2716 try: 

2717 speed = None 

2718 if os.path.isfile(path): 

2719 speedFile = open(path) 

2720 content = speedFile.readlines() 

2721 try: 

2722 content = [float(i) for i in content] 

2723 except ValueError: 

2724 Util.log("Something bad in the speed log:{log}". 

2725 format(log=speedFile.readlines())) 

2726 return speed 

2727 

2728 if len(content): 

2729 speed = sum(content) / float(len(content)) 

2730 if speed <= 0: 2730 ↛ 2732line 2730 didn't jump to line 2732, because the condition on line 2730 was never true

2731 # Defensive, should be impossible. 

2732 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2733 format(speed=speed, uuid=self.uuid)) 

2734 speed = None 

2735 else: 

2736 Util.log("Speed file empty for SR: {uuid}". 

2737 format(uuid=self.uuid)) 

2738 else: 

2739 Util.log("Speed log missing for SR: {uuid}". 

2740 format(uuid=self.uuid)) 

2741 return speed 

2742 finally: 

2743 if not (speedFile is None): 

2744 speedFile.close() 

2745 self.unlock() 

2746 

2747 def _snapshotCoalesce(self, vdi): 

2748 # Note that because we are not holding any locks here, concurrent SM 

2749 # operations may change this tree under our feet. In particular, vdi 

2750 # can be deleted, or it can be snapshotted. 

2751 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2752 Util.log("Single-snapshotting %s" % vdi) 

2753 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2754 try: 

2755 ret = self.xapi.singleSnapshotVDI(vdi) 

2756 Util.log("Single-snapshot returned: %s" % ret) 

2757 except XenAPI.Failure as e: 

2758 if util.isInvalidVDI(e): 

2759 Util.log("The VDI appears to have been concurrently deleted") 

2760 return False 

2761 raise 

2762 self.scanLocked() 

2763 tempSnap = vdi.parent 

2764 if not tempSnap.isCoalesceable(): 

2765 Util.log("The VDI appears to have been concurrently snapshotted") 

2766 return False 

2767 Util.log("Coalescing parent %s" % tempSnap) 

2768 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2769 sizePhys = vdi.getSizePhys() 

2770 self._coalesce(tempSnap) 

2771 if not vdi.isLeafCoalesceable(): 

2772 Util.log("The VDI tree appears to have been altered since") 

2773 return False 

2774 return True 

2775 

2776 def _liveLeafCoalesce(self, vdi: VDI) -> bool: 

2777 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2778 self.lock() 

2779 try: 

2780 self.scan() 

2781 if not self.getVDI(vdi.uuid): 

2782 Util.log("The VDI appears to have been deleted meanwhile") 

2783 return False 

2784 if not vdi.isLeafCoalesceable(): 

2785 Util.log("The VDI is no longer leaf-coalesceable") 

2786 return False 

2787 

2788 uuid = vdi.uuid 

2789 vdi.pause(failfast=True) 

2790 try: 

2791 try: 

2792 # "vdi" object will no longer be valid after this call 

2793 self._create_running_file(vdi) 

2794 self._doCoalesceLeaf(vdi) 

2795 except: 

2796 Util.logException("_doCoalesceLeaf") 

2797 self._handleInterruptedCoalesceLeaf() 

2798 raise 

2799 finally: 

2800 vdi = self.getVDI(uuid) 

2801 if vdi: 

2802 vdi.ensureUnpaused() 

2803 self._delete_running_file(vdi) 

2804 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2805 if vdiOld: 

2806 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2807 self.deleteVDI(vdiOld) 

2808 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2809 finally: 

2810 self.cleanup() 

2811 self.unlock() 

2812 self.logFilter.logState() 

2813 return True 

2814 

2815 def _doCoalesceLeaf(self, vdi: VDI): 

2816 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2817 offline/atomic context""" 

2818 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2819 self._prepareCoalesceLeaf(vdi) 

2820 vdi.parent._setHidden(False) 

2821 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2822 vdi.validate(True) 

2823 vdi.parent.validate(True) 

2824 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2825 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2826 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 2826 ↛ 2827line 2826 didn't jump to line 2827, because the condition on line 2826 was never true

2827 Util.log("Leaf-coalesce forced, will not use timeout") 

2828 timeout = 0 

2829 vdi._coalesceCowImage(timeout) 

2830 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2831 vdi.parent.validate(True) 

2832 #vdi._verifyContents(timeout / 2) 

2833 

2834 # rename 

2835 vdiUuid = vdi.uuid 

2836 oldName = vdi.fileName 

2837 origParentUuid = vdi.parent.uuid 

2838 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2839 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2840 vdi.parent.rename(vdiUuid) 

2841 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2842 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2843 

2844 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2845 # garbage 

2846 

2847 # update the VDI record 

2848 if vdi.parent.vdi_type == VdiType.RAW: 2848 ↛ 2849line 2848 didn't jump to line 2849, because the condition on line 2848 was never true

2849 vdi.parent.setConfig(VDI.DB_VDI_TYPE, VdiType.RAW) 

2850 vdi.parent.delConfig(VDI.DB_VDI_BLOCKS) 

2851 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2852 

2853 self._updateNode(vdi) 

2854 

2855 # delete the obsolete leaf & inflate the parent (in that order, to 

2856 # minimize free space requirements) 

2857 parent = vdi.parent 

2858 vdi._setHidden(True) 

2859 vdi.parent.children = [] 

2860 vdi.parent = None 

2861 

2862 if parent.parent is None: 

2863 parent.delConfig(VDI.DB_VDI_PARENT) 

2864 

2865 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2866 freeSpace = self.getFreeSpace() 

2867 if freeSpace < extraSpace: 2867 ↛ 2870line 2867 didn't jump to line 2870, because the condition on line 2867 was never true

2868 # don't delete unless we need the space: deletion is time-consuming 

2869 # because it requires contacting the slaves, and we're paused here 

2870 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2871 self.deleteVDI(vdi) 

2872 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2873 

2874 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2875 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2876 

2877 self.forgetVDI(origParentUuid) 

2878 self._finishCoalesceLeaf(parent) 

2879 self._updateSlavesOnResize(parent) 

2880 

2881 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

2882 assert(VdiType.isCowImage(parent.vdi_type)) 

2883 extra = child.getSizePhys() - parent.getSizePhys() 

2884 if extra < 0: 2884 ↛ 2885line 2884 didn't jump to line 2885, because the condition on line 2884 was never true

2885 extra = 0 

2886 return extra 

2887 

2888 def _prepareCoalesceLeaf(self, vdi) -> None: 

2889 pass 

2890 

2891 def _updateNode(self, vdi) -> None: 

2892 pass 

2893 

2894 def _finishCoalesceLeaf(self, parent) -> None: 

2895 pass 

2896 

2897 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

2898 pass 

2899 

2900 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: 

2901 pass 

2902 

2903 def _updateSlavesOnResize(self, vdi) -> None: 

2904 pass 

2905 

2906 def _removeStaleVDIs(self, uuidsPresent) -> None: 

2907 for uuid in list(self.vdis.keys()): 

2908 if not uuid in uuidsPresent: 

2909 Util.log("VDI %s disappeared since last scan" % \ 

2910 self.vdis[uuid]) 

2911 del self.vdis[uuid] 

2912 

2913 def _handleInterruptedCoalesceLeaf(self) -> None: 

2914 """An interrupted leaf-coalesce operation may leave the COW tree in an 

2915 inconsistent state. If the old-leaf VDI is still present, we revert the 

2916 operation (in case the original error is persistent); otherwise we must 

2917 finish the operation""" 

2918 pass 

2919 

2920 def _buildTree(self, force): 

2921 self.vdiTrees = [] 

2922 for vdi in self.vdis.values(): 

2923 if vdi.parentUuid: 

2924 parent = self.getVDI(vdi.parentUuid) 

2925 if not parent: 

2926 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2927 self.vdiTrees.append(vdi) 

2928 continue 

2929 if force: 

2930 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2931 (vdi.parentUuid, vdi.uuid)) 

2932 self.vdiTrees.append(vdi) 

2933 continue 

2934 else: 

2935 raise util.SMException("Parent VDI %s of %s not " \ 

2936 "found" % (vdi.parentUuid, vdi.uuid)) 

2937 vdi.parent = parent 

2938 parent.children.append(vdi) 

2939 else: 

2940 self.vdiTrees.append(vdi) 

2941 

2942 

2943class FileSR(SR): 

2944 TYPE = SR.TYPE_FILE 

2945 CACHE_FILE_EXT = ".vhdcache" 

2946 # cache cleanup actions 

2947 CACHE_ACTION_KEEP = 0 

2948 CACHE_ACTION_REMOVE = 1 

2949 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2950 

2951 def __init__(self, uuid, xapi, createLock, force): 

2952 SR.__init__(self, uuid, xapi, createLock, force) 

2953 self.path = "/var/run/sr-mount/%s" % self.uuid 

2954 self.journaler = fjournaler.Journaler(self.path) 

2955 

2956 @override 

2957 def scan(self, force=False) -> None: 

2958 if not util.pathexists(self.path): 

2959 raise util.SMException("directory %s not found!" % self.uuid) 

2960 

2961 uuidsPresent: List[str] = [] 

2962 

2963 for vdi_type in VDI_COW_TYPES: 

2964 scan_result = self._scan(vdi_type, force) 

2965 for uuid, image_info in scan_result.items(): 

2966 vdi = self.getVDI(uuid) 

2967 if not vdi: 

2968 self.logFilter.logNewVDI(uuid) 

2969 vdi = FileVDI(self, uuid, vdi_type) 

2970 self.vdis[uuid] = vdi 

2971 vdi.load(image_info) 

2972 uuidsPresent.extend(scan_result.keys()) 

2973 

2974 rawList = [x for x in os.listdir(self.path) if x.endswith(VdiTypeExtension.RAW)] 

2975 for rawName in rawList: 

2976 uuid = FileVDI.extractUuid(rawName) 

2977 uuidsPresent.append(uuid) 

2978 vdi = self.getVDI(uuid) 

2979 if not vdi: 

2980 self.logFilter.logNewVDI(uuid) 

2981 vdi = FileVDI(self, uuid, VdiType.RAW) 

2982 self.vdis[uuid] = vdi 

2983 self._removeStaleVDIs(uuidsPresent) 

2984 self._buildTree(force) 

2985 self.logFilter.logState() 

2986 self._handleInterruptedCoalesceLeaf() 

2987 

2988 @override 

2989 def getFreeSpace(self) -> int: 

2990 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2991 

2992 @override 

2993 def deleteVDIs(self, vdiList) -> None: 

2994 rootDeleted = False 

2995 for vdi in vdiList: 

2996 if not vdi.parent: 

2997 rootDeleted = True 

2998 break 

2999 SR.deleteVDIs(self, vdiList) 

3000 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

3001 self.xapi.markCacheSRsDirty() 

3002 

3003 @override 

3004 def cleanupCache(self, maxAge=-1) -> int: 

3005 """Clean up IntelliCache cache files. Caches for leaf nodes are 

3006 removed when the leaf node no longer exists or its allow-caching 

3007 attribute is not set. Caches for parent nodes are removed when the 

3008 parent node no longer exists or it hasn't been used in more than 

3009 <maxAge> hours. 

3010 Return number of caches removed. 

3011 """ 

3012 numRemoved = 0 

3013 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

3014 Util.log("Found %d cache files" % len(cacheFiles)) 

3015 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

3016 for cacheFile in cacheFiles: 

3017 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

3018 action = self.CACHE_ACTION_KEEP 

3019 rec = self.xapi.getRecordVDI(uuid) 

3020 if not rec: 

3021 Util.log("Cache %s: VDI doesn't exist" % uuid) 

3022 action = self.CACHE_ACTION_REMOVE 

3023 elif rec["managed"] and not rec["allow_caching"]: 

3024 Util.log("Cache %s: caching disabled" % uuid) 

3025 action = self.CACHE_ACTION_REMOVE 

3026 elif not rec["managed"] and maxAge >= 0: 

3027 lastAccess = datetime.datetime.fromtimestamp( \ 

3028 os.path.getatime(os.path.join(self.path, cacheFile))) 

3029 if lastAccess < cutoff: 

3030 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

3031 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

3032 

3033 if action == self.CACHE_ACTION_KEEP: 

3034 Util.log("Keeping cache %s" % uuid) 

3035 continue 

3036 

3037 lockId = uuid 

3038 parentUuid = None 

3039 if rec and rec["managed"]: 

3040 parentUuid = rec["sm_config"].get("vhd-parent") 

3041 if parentUuid: 

3042 lockId = parentUuid 

3043 

3044 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

3045 cacheLock.acquire() 

3046 try: 

3047 if self._cleanupCache(uuid, action): 

3048 numRemoved += 1 

3049 finally: 

3050 cacheLock.release() 

3051 return numRemoved 

3052 

3053 def _cleanupCache(self, uuid, action): 

3054 assert(action != self.CACHE_ACTION_KEEP) 

3055 rec = self.xapi.getRecordVDI(uuid) 

3056 if rec and rec["allow_caching"]: 

3057 Util.log("Cache %s appears to have become valid" % uuid) 

3058 return False 

3059 

3060 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

3061 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

3062 if tapdisk: 

3063 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

3064 Util.log("Cache %s still in use" % uuid) 

3065 return False 

3066 Util.log("Shutting down tapdisk for %s" % fullPath) 

3067 tapdisk.shutdown() 

3068 

3069 Util.log("Deleting file %s" % fullPath) 

3070 os.unlink(fullPath) 

3071 return True 

3072 

3073 def _isCacheFileName(self, name): 

3074 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

3075 name.endswith(self.CACHE_FILE_EXT) 

3076 

3077 def _scan(self, vdi_type, force): 

3078 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3079 error = False 

3080 pattern = os.path.join(self.path, "*%s" % VDI_TYPE_TO_EXTENSION[vdi_type]) 

3081 scan_result = getCowUtil(vdi_type).getAllInfoFromVG(pattern, FileVDI.extractUuid) 

3082 for uuid, vdiInfo in scan_result.items(): 

3083 if vdiInfo.error: 

3084 error = True 

3085 break 

3086 if not error: 

3087 return scan_result 

3088 Util.log("Scan error on attempt %d" % i) 

3089 if force: 

3090 return scan_result 

3091 raise util.SMException("Scan error") 

3092 

3093 @override 

3094 def deleteVDI(self, vdi) -> None: 

3095 self._checkSlaves(vdi) 

3096 SR.deleteVDI(self, vdi) 

3097 

3098 def _checkSlaves(self, vdi): 

3099 onlineHosts = self.xapi.getOnlineHosts() 

3100 abortFlag = IPCFlag(self.uuid) 

3101 for pbdRecord in self.xapi.getAttachedPBDs(): 

3102 hostRef = pbdRecord["host"] 

3103 if hostRef == self.xapi._hostRef: 

3104 continue 

3105 if abortFlag.test(FLAG_TYPE_ABORT): 

3106 raise AbortException("Aborting due to signal") 

3107 try: 

3108 self._checkSlave(hostRef, vdi) 

3109 except util.CommandException: 

3110 if hostRef in onlineHosts: 

3111 raise 

3112 

3113 def _checkSlave(self, hostRef, vdi): 

3114 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

3115 Util.log("Checking with slave: %s" % repr(call)) 

3116 _host = self.xapi.session.xenapi.host 

3117 text = _host.call_plugin( * call) 

3118 

3119 @override 

3120 def _handleInterruptedCoalesceLeaf(self) -> None: 

3121 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3122 for uuid, parentUuid in entries.items(): 

3123 fileList = os.listdir(self.path) 

3124 childName = uuid + VdiTypeExtension.VHD 

3125 tmpChildName = self.TMP_RENAME_PREFIX + uuid + VdiTypeExtension.VHD 

3126 parentName1 = parentUuid + VdiTypeExtension.VHD 

3127 parentName2 = parentUuid + VdiTypeExtension.RAW 

3128 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

3129 if parentPresent or tmpChildName in fileList: 

3130 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3131 else: 

3132 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3133 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3134 vdi = self.getVDI(uuid) 

3135 if vdi: 

3136 vdi.ensureUnpaused() 

3137 

3138 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3139 Util.log("*** UNDO LEAF-COALESCE") 

3140 parent = self.getVDI(parentUuid) 

3141 if not parent: 

3142 parent = self.getVDI(childUuid) 

3143 if not parent: 

3144 raise util.SMException("Neither %s nor %s found" % \ 

3145 (parentUuid, childUuid)) 

3146 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3147 parent.rename(parentUuid) 

3148 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3149 

3150 child = self.getVDI(childUuid) 

3151 if not child: 

3152 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3153 if not child: 

3154 raise util.SMException("Neither %s nor %s found" % \ 

3155 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3156 Util.log("Renaming child back to %s" % childUuid) 

3157 child.rename(childUuid) 

3158 Util.log("Updating the VDI record") 

3159 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3160 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3161 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3162 

3163 if child.isHidden(): 

3164 child._setHidden(False) 

3165 if not parent.isHidden(): 

3166 parent._setHidden(True) 

3167 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3168 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3169 Util.log("*** leaf-coalesce undo successful") 

3170 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3171 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3172 

3173 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3174 Util.log("*** FINISH LEAF-COALESCE") 

3175 vdi = self.getVDI(childUuid) 

3176 if not vdi: 

3177 Util.log(f"_finishInterruptedCoalesceLeaf, vdi {childUuid} not found, aborting") 

3178 raise util.SMException("VDI %s not found" % childUuid) 

3179 try: 

3180 self.forgetVDI(parentUuid) 

3181 except XenAPI.Failure: 

3182 Util.logException('_finishInterruptedCoalesceLeaf') 

3183 pass 

3184 self._updateSlavesOnResize(vdi) 

3185 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3186 Util.log("*** finished leaf-coalesce successfully") 

3187 

3188 

3189class LVMSR(SR): 

3190 TYPE = SR.TYPE_LVHD 

3191 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

3192 

3193 def __init__(self, uuid, xapi, createLock, force): 

3194 SR.__init__(self, uuid, xapi, createLock, force) 

3195 self.vgName = "%s%s" % (VG_PREFIX, self.uuid) 

3196 self.path = os.path.join(VG_LOCATION, self.vgName) 

3197 

3198 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) 

3199 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) 

3200 lvm_conf = other_conf.get('lvm-conf') if other_conf else None 

3201 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) 

3202 

3203 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

3204 self.journaler = journaler.Journaler(self.lvmCache) 

3205 

3206 @override 

3207 def deleteVDI(self, vdi) -> None: 

3208 if self.lvActivator.get(vdi.uuid, False): 

3209 self.lvActivator.deactivate(vdi.uuid, False) 

3210 self._checkSlaves(vdi) 

3211 SR.deleteVDI(self, vdi) 

3212 

3213 @override 

3214 def forgetVDI(self, vdiUuid) -> None: 

3215 SR.forgetVDI(self, vdiUuid) 

3216 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

3217 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

3218 

3219 @override 

3220 def getFreeSpace(self) -> int: 

3221 stats = lvutil._getVGstats(self.vgName) 

3222 return stats['physical_size'] - stats['physical_utilisation'] 

3223 

3224 @override 

3225 def cleanup(self): 

3226 if not self.lvActivator.deactivateAll(): 

3227 Util.log("ERROR deactivating LVs while cleaning up") 

3228 

3229 @override 

3230 def needUpdateBlockInfo(self) -> bool: 

3231 for vdi in self.vdis.values(): 

3232 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3233 continue 

3234 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3235 return True 

3236 return False 

3237 

3238 @override 

3239 def updateBlockInfo(self) -> None: 

3240 numUpdated = 0 

3241 for vdi in self.vdis.values(): 

3242 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3243 continue 

3244 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3245 vdi.updateBlockInfo() 

3246 numUpdated += 1 

3247 if numUpdated: 

3248 # deactivate the LVs back sooner rather than later. If we don't 

3249 # now, by the time this thread gets to deactivations, another one 

3250 # might have leaf-coalesced a node and deleted it, making the child 

3251 # inherit the refcount value and preventing the correct decrement 

3252 self.cleanup() 

3253 

3254 @override 

3255 def scan(self, force=False) -> None: 

3256 vdis = self._scan(force) 

3257 for uuid, vdiInfo in vdis.items(): 

3258 vdi = self.getVDI(uuid) 

3259 if not vdi: 

3260 self.logFilter.logNewVDI(uuid) 

3261 vdi = LVMVDI(self, uuid, vdiInfo.vdiType) 

3262 self.vdis[uuid] = vdi 

3263 vdi.load(vdiInfo) 

3264 self._removeStaleVDIs(vdis.keys()) 

3265 self._buildTree(force) 

3266 self.logFilter.logState() 

3267 self._handleInterruptedCoalesceLeaf() 

3268 

3269 def _scan(self, force): 

3270 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3271 error = False 

3272 self.lvmCache.refresh() 

3273 vdis = LvmCowUtil.getVDIInfo(self.lvmCache) 

3274 for uuid, vdiInfo in vdis.items(): 

3275 if vdiInfo.scanError: 

3276 error = True 

3277 break 

3278 if not error: 

3279 return vdis 

3280 Util.log("Scan error, retrying (%d)" % i) 

3281 if force: 

3282 return vdis 

3283 raise util.SMException("Scan error") 

3284 

3285 @override 

3286 def _removeStaleVDIs(self, uuidsPresent) -> None: 

3287 for uuid in list(self.vdis.keys()): 

3288 if not uuid in uuidsPresent: 

3289 Util.log("VDI %s disappeared since last scan" % \ 

3290 self.vdis[uuid]) 

3291 del self.vdis[uuid] 

3292 if self.lvActivator.get(uuid, False): 

3293 self.lvActivator.remove(uuid, False) 

3294 

3295 @override 

3296 def _liveLeafCoalesce(self, vdi) -> bool: 

3297 """If the parent is raw and the child was resized (virt. size), then 

3298 we'll need to resize the parent, which can take a while due to zeroing 

3299 out of the extended portion of the LV. Do it before pausing the child 

3300 to avoid a protracted downtime""" 

3301 if not VdiType.isCowImage(vdi.parent.vdi_type) and vdi.sizeVirt > vdi.parent.sizeVirt: 

3302 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3303 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

3304 

3305 return SR._liveLeafCoalesce(self, vdi) 

3306 

3307 @override 

3308 def _prepareCoalesceLeaf(self, vdi) -> None: 

3309 vdi._activateChain() 

3310 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3311 vdi.deflate() 

3312 vdi.inflateParentForCoalesce() 

3313 

3314 @override 

3315 def _updateNode(self, vdi) -> None: 

3316 # fix the refcounts: the remaining node should inherit the binary 

3317 # refcount from the leaf (because if it was online, it should remain 

3318 # refcounted as such), but the normal refcount from the parent (because 

3319 # this node is really the parent node) - minus 1 if it is online (since 

3320 # non-leaf nodes increment their normal counts when they are online and 

3321 # we are now a leaf, storing that 1 in the binary refcount). 

3322 ns = NS_PREFIX_LVM + self.uuid 

3323 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

3324 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

3325 pCnt = pCnt - cBcnt 

3326 assert(pCnt >= 0) 

3327 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

3328 

3329 @override 

3330 def _finishCoalesceLeaf(self, parent) -> None: 

3331 if not parent.isSnapshot() or parent.isAttachedRW(): 

3332 parent.inflateFully() 

3333 else: 

3334 parent.deflate() 

3335 

3336 @override 

3337 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3338 return parent.lvmcowutil.calcVolumeSize(parent.sizeVirt) - parent.sizeLV 

3339 

3340 @override 

3341 def _handleInterruptedCoalesceLeaf(self) -> None: 

3342 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3343 for uuid, parentUuid in entries.items(): 

3344 undo = False 

3345 for prefix in LV_PREFIX.values(): 

3346 parentLV = prefix + parentUuid 

3347 undo = self.lvmCache.checkLV(parentLV) 

3348 if undo: 

3349 break 

3350 

3351 if not undo: 

3352 for prefix in LV_PREFIX.values(): 

3353 tmpChildLV = prefix + uuid 

3354 undo = self.lvmCache.checkLV(tmpChildLV) 

3355 if undo: 

3356 break 

3357 

3358 if undo: 

3359 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3360 else: 

3361 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3362 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3363 vdi = self.getVDI(uuid) 

3364 if vdi: 

3365 vdi.ensureUnpaused() 

3366 

3367 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3368 Util.log("*** UNDO LEAF-COALESCE") 

3369 parent = self.getVDI(parentUuid) 

3370 if not parent: 

3371 parent = self.getVDI(childUuid) 

3372 if not parent: 

3373 raise util.SMException("Neither %s nor %s found" % \ 

3374 (parentUuid, childUuid)) 

3375 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3376 parent.rename(parentUuid) 

3377 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3378 

3379 child = self.getVDI(childUuid) 

3380 if not child: 

3381 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3382 if not child: 

3383 raise util.SMException("Neither %s nor %s found" % \ 

3384 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3385 Util.log("Renaming child back to %s" % childUuid) 

3386 child.rename(childUuid) 

3387 Util.log("Updating the VDI record") 

3388 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3389 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3390 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3391 

3392 # refcount (best effort - assume that it had succeeded if the 

3393 # second rename succeeded; if not, this adjustment will be wrong, 

3394 # leading to a non-deactivation of the LV) 

3395 ns = NS_PREFIX_LVM + self.uuid 

3396 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3397 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3398 pCnt = pCnt + cBcnt 

3399 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3400 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3401 

3402 parent.deflate() 

3403 child.inflateFully() 

3404 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3405 if child.isHidden(): 

3406 child._setHidden(False) 

3407 if not parent.isHidden(): 

3408 parent._setHidden(True) 

3409 if not parent.lvReadonly: 

3410 self.lvmCache.setReadonly(parent.fileName, True) 

3411 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3412 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3413 Util.log("*** leaf-coalesce undo successful") 

3414 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3415 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3416 

3417 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3418 Util.log("*** FINISH LEAF-COALESCE") 

3419 vdi = self.getVDI(childUuid) 

3420 if not vdi: 

3421 raise util.SMException("VDI %s not found" % childUuid) 

3422 vdi.inflateFully() 

3423 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3424 try: 

3425 self.forgetVDI(parentUuid) 

3426 except XenAPI.Failure: 

3427 pass 

3428 self._updateSlavesOnResize(vdi) 

3429 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3430 Util.log("*** finished leaf-coalesce successfully") 

3431 

3432 def _checkSlaves(self, vdi): 

3433 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3434 try to check all slaves, including those that the Agent believes are 

3435 offline, but ignore failures for offline hosts. This is to avoid cases 

3436 where the Agent thinks a host is offline but the host is up.""" 

3437 args = {"vgName": self.vgName, 

3438 "action1": "deactivateNoRefcount", 

3439 "lvName1": vdi.fileName, 

3440 "action2": "cleanupLockAndRefcount", 

3441 "uuid2": vdi.uuid, 

3442 "ns2": NS_PREFIX_LVM + self.uuid} 

3443 onlineHosts = self.xapi.getOnlineHosts() 

3444 abortFlag = IPCFlag(self.uuid) 

3445 for pbdRecord in self.xapi.getAttachedPBDs(): 

3446 hostRef = pbdRecord["host"] 

3447 if hostRef == self.xapi._hostRef: 

3448 continue 

3449 if abortFlag.test(FLAG_TYPE_ABORT): 

3450 raise AbortException("Aborting due to signal") 

3451 Util.log("Checking with slave %s (path %s)" % ( 

3452 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3453 try: 

3454 self.xapi.ensureInactive(hostRef, args) 

3455 except XenAPI.Failure: 

3456 if hostRef in onlineHosts: 

3457 raise 

3458 

3459 @override 

3460 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

3461 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3462 if not slaves: 

3463 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3464 child) 

3465 return 

3466 

3467 tmpName = child.vdi_type + self.TMP_RENAME_PREFIX + child.uuid 

3468 args = {"vgName": self.vgName, 

3469 "action1": "deactivateNoRefcount", 

3470 "lvName1": tmpName, 

3471 "action2": "deactivateNoRefcount", 

3472 "lvName2": child.fileName, 

3473 "action3": "refresh", 

3474 "lvName3": child.fileName, 

3475 "action4": "refresh", 

3476 "lvName4": parent.fileName} 

3477 for slave in slaves: 

3478 Util.log("Updating %s, %s, %s on slave %s" % \ 

3479 (tmpName, child.fileName, parent.fileName, 

3480 self.xapi.getRecordHost(slave)['hostname'])) 

3481 text = self.xapi.session.xenapi.host.call_plugin( \ 

3482 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3483 Util.log("call-plugin returned: '%s'" % text) 

3484 

3485 @override 

3486 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: 

3487 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3488 if not slaves: 

3489 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3490 return 

3491 

3492 args = {"vgName": self.vgName, 

3493 "action1": "deactivateNoRefcount", 

3494 "lvName1": oldNameLV, 

3495 "action2": "refresh", 

3496 "lvName2": vdi.fileName, 

3497 "action3": "cleanupLockAndRefcount", 

3498 "uuid3": origParentUuid, 

3499 "ns3": NS_PREFIX_LVM + self.uuid} 

3500 for slave in slaves: 

3501 Util.log("Updating %s to %s on slave %s" % \ 

3502 (oldNameLV, vdi.fileName, 

3503 self.xapi.getRecordHost(slave)['hostname'])) 

3504 text = self.xapi.session.xenapi.host.call_plugin( \ 

3505 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3506 Util.log("call-plugin returned: '%s'" % text) 

3507 

3508 @override 

3509 def _updateSlavesOnResize(self, vdi) -> None: 

3510 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3511 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3512 if not slaves: 

3513 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3514 return 

3515 LvmCowUtil.refreshVolumeOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3516 vdi.fileName, vdi.uuid, slaves) 

3517 

3518 

3519class LinstorSR(SR): 

3520 TYPE = SR.TYPE_LINSTOR 

3521 

3522 def __init__(self, uuid, xapi, createLock, force): 

3523 if not LINSTOR_AVAILABLE: 

3524 raise util.SMException( 

3525 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3526 ) 

3527 

3528 SR.__init__(self, uuid, xapi, createLock, force) 

3529 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3530 

3531 class LinstorProxy: 

3532 def __init__(self, sr: LinstorSR) -> None: 

3533 self.sr = sr 

3534 

3535 def __getattr__(self, attr: str) -> Any: 

3536 assert self.sr, "Cannot use `LinstorProxy` without valid `LinstorVolumeManager` instance" 

3537 return getattr(self.sr._linstor, attr) 

3538 

3539 self._linstor_proxy = LinstorProxy(self) 

3540 self._reloadLinstor(journaler_only=True) 

3541 

3542 @override 

3543 def deleteVDI(self, vdi) -> None: 

3544 self._checkSlaves(vdi) 

3545 SR.deleteVDI(self, vdi) 

3546 

3547 @override 

3548 def getFreeSpace(self) -> int: 

3549 return self._linstor.max_volume_size_allowed 

3550 

3551 @override 

3552 def scan(self, force=False) -> None: 

3553 all_vdi_info = self._scan(force) 

3554 for uuid, vdiInfo in all_vdi_info.items(): 

3555 # When vdiInfo is None, the VDI is RAW. 

3556 vdi = self.getVDI(uuid) 

3557 if not vdi: 

3558 self.logFilter.logNewVDI(uuid) 

3559 vdi = LinstorVDI(self, uuid, vdiInfo.vdiType if vdiInfo else VdiType.RAW) 

3560 self.vdis[uuid] = vdi 

3561 if vdiInfo: 

3562 vdi.load(vdiInfo) 

3563 self._removeStaleVDIs(all_vdi_info.keys()) 

3564 self._buildTree(force) 

3565 self.logFilter.logState() 

3566 self._handleInterruptedCoalesceLeaf() 

3567 

3568 @override 

3569 def pauseVDIs(self, vdiList) -> None: 

3570 self._linstor.ensure_volume_list_is_not_locked( 

3571 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3572 ) 

3573 return super(LinstorSR, self).pauseVDIs(vdiList) 

3574 

3575 def _reloadLinstor(self, journaler_only=False): 

3576 session = self.xapi.session 

3577 host_ref = util.get_this_host_ref(session) 

3578 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3579 

3580 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3581 if pbd is None: 

3582 raise util.SMException('Failed to find PBD') 

3583 

3584 dconf = session.xenapi.PBD.get_device_config(pbd) 

3585 group_name = dconf['group-name'] 

3586 

3587 controller_uri = get_controller_uri() 

3588 self.journaler = LinstorJournaler( 

3589 controller_uri, group_name, logger=util.SMlog 

3590 ) 

3591 

3592 if journaler_only: 

3593 return 

3594 

3595 self._linstor = LinstorVolumeManager( 

3596 controller_uri, 

3597 group_name, 

3598 repair=True, 

3599 logger=util.SMlog 

3600 ) 

3601 

3602 def _scan(self, force): 

3603 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3604 self._reloadLinstor() 

3605 error = False 

3606 try: 

3607 all_vdi_info = self._load_vdi_info() 

3608 for uuid, vdiInfo in all_vdi_info.items(): 

3609 if vdiInfo and vdiInfo.error: 

3610 error = True 

3611 break 

3612 if not error: 

3613 return all_vdi_info 

3614 Util.log('Scan error, retrying ({})'.format(i)) 

3615 except Exception as e: 

3616 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3617 Util.log(traceback.format_exc()) 

3618 

3619 if force: 

3620 return all_vdi_info 

3621 raise util.SMException('Scan error') 

3622 

3623 def _load_vdi_info(self): 

3624 all_vdi_info = {} 

3625 

3626 # TODO: Ensure metadata contains the right info. 

3627 

3628 all_volume_info = self._linstor.get_volumes_with_info() 

3629 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3630 for vdi_uuid, volume_info in all_volume_info.items(): 

3631 vdi_type = VdiType.RAW 

3632 try: 

3633 volume_metadata = volumes_metadata[vdi_uuid] 

3634 if not volume_info.name and not list(volume_metadata.items()): 

3635 continue # Ignore it, probably deleted. 

3636 

3637 if vdi_uuid.startswith('DELETED_'): 

3638 # Assume it's really a RAW volume of a failed snap without COW header/footer. 

3639 # We must remove this VDI now without adding it in the VDI list. 

3640 # Otherwise `Relinking` calls and other actions can be launched on it. 

3641 # We don't want that... 

3642 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3643 

3644 self.lock() 

3645 try: 

3646 self._linstor.destroy_volume(vdi_uuid) 

3647 try: 

3648 self.forgetVDI(vdi_uuid) 

3649 except: 

3650 pass 

3651 except Exception as e: 

3652 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3653 finally: 

3654 self.unlock() 

3655 continue 

3656 

3657 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3658 volume_name = self._linstor.get_volume_name(vdi_uuid) 

3659 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): 

3660 # Always RAW! 

3661 info = None 

3662 elif VdiType.isCowImage(vdi_type): 

3663 info = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type).get_info(vdi_uuid) 

3664 else: 

3665 # Ensure it's not a COW image... 

3666 linstorcowutil = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type) 

3667 try: 

3668 info = linstorcowutil.get_info(vdi_uuid) 

3669 except: 

3670 try: 

3671 linstorcowutil.force_repair( 

3672 self._linstor.get_device_path(vdi_uuid) 

3673 ) 

3674 info = linstorcowutil.get_info(vdi_uuid) 

3675 except: 

3676 info = None 

3677 

3678 except Exception as e: 

3679 Util.log( 

3680 ' [VDI {}: failed to load VDI info]: {}' 

3681 .format(vdi_uuid, e) 

3682 ) 

3683 info = CowImageInfo(vdi_uuid) 

3684 info.error = 1 

3685 

3686 if info: 

3687 info.vdiType = vdi_type 

3688 

3689 all_vdi_info[vdi_uuid] = info 

3690 

3691 return all_vdi_info 

3692 

3693 @override 

3694 def _prepareCoalesceLeaf(self, vdi) -> None: 

3695 vdi._activateChain() 

3696 vdi.deflate() 

3697 vdi._inflateParentForCoalesce() 

3698 

3699 @override 

3700 def _finishCoalesceLeaf(self, parent) -> None: 

3701 if not parent.isSnapshot() or parent.isAttachedRW(): 

3702 parent.inflateFully() 

3703 else: 

3704 parent.deflate() 

3705 

3706 @override 

3707 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3708 return LinstorCowUtil( 

3709 self.xapi.session, self._linstor, parent.vdi_type 

3710 ).compute_volume_size(parent.sizeVirt) - parent.getDrbdSize() 

3711 

3712 def _hasValidDevicePath(self, uuid): 

3713 try: 

3714 self._linstor.get_device_path(uuid) 

3715 except Exception: 

3716 # TODO: Maybe log exception. 

3717 return False 

3718 return True 

3719 

3720 @override 

3721 def _liveLeafCoalesce(self, vdi) -> bool: 

3722 self.lock() 

3723 try: 

3724 self._linstor.ensure_volume_is_not_locked( 

3725 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3726 ) 

3727 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3728 finally: 

3729 self.unlock() 

3730 

3731 @override 

3732 def _handleInterruptedCoalesceLeaf(self) -> None: 

3733 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3734 for uuid, parentUuid in entries.items(): 

3735 if self._hasValidDevicePath(parentUuid) or \ 

3736 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3737 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3738 else: 

3739 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3740 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3741 vdi = self.getVDI(uuid) 

3742 if vdi: 

3743 vdi.ensureUnpaused() 

3744 

3745 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3746 Util.log('*** UNDO LEAF-COALESCE') 

3747 parent = self.getVDI(parentUuid) 

3748 if not parent: 

3749 parent = self.getVDI(childUuid) 

3750 if not parent: 

3751 raise util.SMException( 

3752 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3753 ) 

3754 Util.log( 

3755 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3756 ) 

3757 parent.rename(parentUuid) 

3758 

3759 child = self.getVDI(childUuid) 

3760 if not child: 

3761 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3762 if not child: 

3763 raise util.SMException( 

3764 'Neither {} nor {} found'.format( 

3765 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3766 ) 

3767 ) 

3768 Util.log('Renaming child back to {}'.format(childUuid)) 

3769 child.rename(childUuid) 

3770 Util.log('Updating the VDI record') 

3771 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3772 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3773 

3774 # TODO: Maybe deflate here. 

3775 

3776 if child.isHidden(): 

3777 child._setHidden(False) 

3778 if not parent.isHidden(): 

3779 parent._setHidden(True) 

3780 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3781 Util.log('*** leaf-coalesce undo successful') 

3782 

3783 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3784 Util.log('*** FINISH LEAF-COALESCE') 

3785 vdi = self.getVDI(childUuid) 

3786 if not vdi: 

3787 raise util.SMException('VDI {} not found'.format(childUuid)) 

3788 # TODO: Maybe inflate. 

3789 try: 

3790 self.forgetVDI(parentUuid) 

3791 except XenAPI.Failure: 

3792 pass 

3793 self._updateSlavesOnResize(vdi) 

3794 Util.log('*** finished leaf-coalesce successfully') 

3795 

3796 def _checkSlaves(self, vdi): 

3797 try: 

3798 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3799 for openers in all_openers.values(): 

3800 for opener in openers.values(): 

3801 if opener['process-name'] != 'tapdisk': 

3802 raise util.SMException( 

3803 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3804 ) 

3805 except LinstorVolumeManagerError as e: 

3806 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3807 raise 

3808 

3809 

3810################################################################################ 

3811# 

3812# Helpers 

3813# 

3814def daemonize(): 

3815 pid = os.fork() 

3816 if pid: 

3817 os.waitpid(pid, 0) 

3818 Util.log("New PID [%d]" % pid) 

3819 return False 

3820 os.chdir("/") 

3821 os.setsid() 

3822 pid = os.fork() 

3823 if pid: 

3824 Util.log("Will finish as PID [%d]" % pid) 

3825 os._exit(0) 

3826 for fd in [0, 1, 2]: 

3827 try: 

3828 os.close(fd) 

3829 except OSError: 

3830 pass 

3831 # we need to fill those special fd numbers or pread won't work 

3832 sys.stdin = open("/dev/null", 'r') 

3833 sys.stderr = open("/dev/null", 'w') 

3834 sys.stdout = open("/dev/null", 'w') 

3835 # As we're a new process we need to clear the lock objects 

3836 lock.Lock.clearAll() 

3837 return True 

3838 

3839 

3840def normalizeType(type): 

3841 if type in LVMSR.SUBTYPES: 

3842 type = SR.TYPE_LVHD 

3843 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3844 # temporary while LVHD is symlinked as LVM 

3845 type = SR.TYPE_LVHD 

3846 if type in [ 

3847 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3848 "moosefs", "xfs", "zfs", "largeblock" 

3849 ]: 

3850 type = SR.TYPE_FILE 

3851 if type in ["linstor"]: 

3852 type = SR.TYPE_LINSTOR 

3853 if type not in SR.TYPES: 

3854 raise util.SMException("Unsupported SR type: %s" % type) 

3855 return type 

3856 

3857GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3858 

3859 

3860def _gc_init_file(sr_uuid): 

3861 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3862 

3863 

3864def _create_init_file(sr_uuid): 

3865 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3866 with open(os.path.join(_gc_init_file(sr_uuid)), 'w+') as f: 

3867 f.write('1') 

3868 

3869 

3870def _gcLoopPause(sr, dryRun=False, immediate=False): 

3871 if immediate: 

3872 return 

3873 

3874 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3875 # point will just return. Otherwise, fall back on an abortable sleep. 

3876 

3877 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3878 

3879 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3879 ↛ exitline 3879 didn't jump to the function exit

3880 lambda *args: None) 

3881 elif os.path.exists(_gc_init_file(sr.uuid)): 

3882 def abortTest(): 

3883 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3884 

3885 # If time.sleep hangs we are in deep trouble, however for 

3886 # completeness we set the timeout of the abort thread to 

3887 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3888 Util.log("GC active, about to go quiet") 

3889 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3889 ↛ exitline 3889 didn't run the lambda on line 3889

3890 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3891 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3892 Util.log("GC active, quiet period ended") 

3893 

3894 

3895def _gcLoop(sr, dryRun=False, immediate=False): 

3896 if not lockGCActive.acquireNoblock(): 3896 ↛ 3897line 3896 didn't jump to line 3897, because the condition on line 3896 was never true

3897 Util.log("Another GC instance already active, exiting") 

3898 return 

3899 

3900 # Check we're still attached after acquiring locks 

3901 if not sr.xapi.isPluggedHere(): 

3902 Util.log("SR no longer attached, exiting") 

3903 return 

3904 

3905 # Clean up Intellicache files 

3906 sr.cleanupCache() 

3907 

3908 # Track how many we do 

3909 coalesced = 0 

3910 task_status = "success" 

3911 try: 

3912 # Check if any work needs to be done 

3913 if not sr.xapi.isPluggedHere(): 3913 ↛ 3914line 3913 didn't jump to line 3914, because the condition on line 3913 was never true

3914 Util.log("SR no longer attached, exiting") 

3915 return 

3916 sr.scanLocked() 

3917 if not sr.hasWork(): 

3918 Util.log("No work, exiting") 

3919 return 

3920 sr.xapi.create_task( 

3921 "Garbage Collection", 

3922 "Garbage collection for SR %s" % sr.uuid) 

3923 _gcLoopPause(sr, dryRun, immediate=immediate) 

3924 while True: 

3925 if SIGTERM: 

3926 Util.log("Term requested") 

3927 return 

3928 

3929 if not sr.xapi.isPluggedHere(): 3929 ↛ 3930line 3929 didn't jump to line 3930, because the condition on line 3929 was never true

3930 Util.log("SR no longer attached, exiting") 

3931 break 

3932 sr.scanLocked() 

3933 if not sr.hasWork(): 

3934 Util.log("No work, exiting") 

3935 break 

3936 

3937 if not lockGCRunning.acquireNoblock(): 3937 ↛ 3938line 3937 didn't jump to line 3938, because the condition on line 3937 was never true

3938 Util.log("Unable to acquire GC running lock.") 

3939 return 

3940 try: 

3941 if not sr.gcEnabled(): 3941 ↛ 3942line 3941 didn't jump to line 3942, because the condition on line 3941 was never true

3942 break 

3943 

3944 sr.xapi.update_task_progress("done", coalesced) 

3945 

3946 sr.cleanupCoalesceJournals() 

3947 # Create the init file here in case startup is waiting on it 

3948 _create_init_file(sr.uuid) 

3949 sr.scanLocked() 

3950 sr.updateBlockInfo() 

3951 

3952 howmany = len(sr.findGarbage()) 

3953 if howmany > 0: 

3954 Util.log("Found %d orphaned vdis" % howmany) 

3955 sr.lock() 

3956 try: 

3957 sr.garbageCollect(dryRun) 

3958 finally: 

3959 sr.unlock() 

3960 sr.xapi.srUpdate() 

3961 

3962 candidate = sr.findCoalesceable() 

3963 if candidate: 

3964 util.fistpoint.activate( 

3965 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3966 sr.coalesce(candidate, dryRun) 

3967 sr.xapi.srUpdate() 

3968 coalesced += 1 

3969 continue 

3970 

3971 candidate = sr.findLeafCoalesceable() 

3972 if candidate: 3972 ↛ 3979line 3972 didn't jump to line 3979, because the condition on line 3972 was never false

3973 sr.coalesceLeaf(candidate, dryRun) 

3974 sr.xapi.srUpdate() 

3975 coalesced += 1 

3976 continue 

3977 

3978 finally: 

3979 lockGCRunning.release() 3979 ↛ 3984line 3979 didn't jump to line 3984, because the break on line 3942 wasn't executed

3980 except: 

3981 task_status = "failure" 

3982 raise 

3983 finally: 

3984 sr.xapi.set_task_status(task_status) 

3985 Util.log("GC process exiting, no work left") 

3986 _create_init_file(sr.uuid) 

3987 lockGCActive.release() 

3988 

3989 

3990def _gc(session, srUuid, dryRun=False, immediate=False): 

3991 init(srUuid) 

3992 sr = SR.getInstance(srUuid, session) 

3993 if not sr.gcEnabled(False): 3993 ↛ 3994line 3993 didn't jump to line 3994, because the condition on line 3993 was never true

3994 return 

3995 

3996 try: 

3997 _gcLoop(sr, dryRun, immediate=immediate) 

3998 finally: 

3999 sr.check_no_space_candidates() 

4000 sr.cleanup() 

4001 sr.logFilter.logState() 

4002 del sr.xapi 

4003 

4004 

4005def _abort(srUuid, soft=False): 

4006 """Aborts an GC/coalesce. 

4007 

4008 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

4009 soft: If set to True and there is a pending abort signal, the function 

4010 doesn't do anything. If set to False, a new abort signal is issued. 

4011 

4012 returns: If soft is set to False, we return True holding lockGCActive. If 

4013 soft is set to False and an abort signal is pending, we return False 

4014 without holding lockGCActive. An exception is raised in case of error.""" 

4015 Util.log("=== SR %s: abort ===" % (srUuid)) 

4016 init(srUuid) 

4017 if not lockGCActive.acquireNoblock(): 

4018 gotLock = False 

4019 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

4020 abortFlag = IPCFlag(srUuid) 

4021 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

4022 return False 

4023 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

4024 gotLock = lockGCActive.acquireNoblock() 

4025 if gotLock: 

4026 break 

4027 time.sleep(SR.LOCK_RETRY_INTERVAL) 

4028 abortFlag.clear(FLAG_TYPE_ABORT) 

4029 if not gotLock: 

4030 raise util.CommandException(code=errno.ETIMEDOUT, 

4031 reason="SR %s: error aborting existing process" % srUuid) 

4032 return True 

4033 

4034 

4035def init(srUuid): 

4036 global lockGCRunning 

4037 if not lockGCRunning: 4037 ↛ 4038line 4037 didn't jump to line 4038, because the condition on line 4037 was never true

4038 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid) 

4039 global lockGCActive 

4040 if not lockGCActive: 4040 ↛ 4041line 4040 didn't jump to line 4041, because the condition on line 4040 was never true

4041 lockGCActive = LockActive(srUuid) 

4042 

4043 

4044class LockActive: 

4045 """ 

4046 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

4047 if another process holds the SR lock. 

4048 """ 

4049 def __init__(self, srUuid): 

4050 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

4051 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, srUuid) 

4052 

4053 def acquireNoblock(self): 

4054 self._srLock.acquire() 

4055 

4056 try: 

4057 return self._lock.acquireNoblock() 

4058 finally: 

4059 self._srLock.release() 

4060 

4061 def release(self): 

4062 self._lock.release() 

4063 

4064 

4065def usage(): 

4066 output = """Garbage collect and/or coalesce COW images in a COW-based SR 

4067 

4068Parameters: 

4069 -u --uuid UUID SR UUID 

4070 and one of: 

4071 -g --gc garbage collect, coalesce, and repeat while there is work 

4072 -G --gc_force garbage collect once, aborting any current operations 

4073 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

4074 max_age hours 

4075 -a --abort abort any currently running operation (GC or coalesce) 

4076 -q --query query the current state (GC'ing, coalescing or not running) 

4077 -x --disable disable GC/coalesce (will be in effect until you exit) 

4078 -t --debug see Debug below 

4079 

4080Options: 

4081 -b --background run in background (return immediately) (valid for -g only) 

4082 -f --force continue in the presence of COW images with errors (when doing 

4083 GC, this might cause removal of any such images) (only valid 

4084 for -G) (DANGEROUS) 

4085 

4086Debug: 

4087 The --debug parameter enables manipulation of LVHD VDIs for debugging 

4088 purposes. ** NEVER USE IT ON A LIVE VM ** 

4089 The following parameters are required: 

4090 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

4091 "deflate". 

4092 -v --vdi_uuid VDI UUID 

4093 """ 

4094 #-d --dry-run don't actually perform any SR-modifying operations 

4095 print(output) 

4096 Util.log("(Invalid usage)") 

4097 sys.exit(1) 

4098 

4099 

4100############################################################################## 

4101# 

4102# API 

4103# 

4104def abort(srUuid, soft=False): 

4105 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

4106 """ 

4107 if _abort(srUuid, soft): 

4108 Util.log("abort: releasing the process lock") 

4109 lockGCActive.release() 

4110 return True 

4111 else: 

4112 return False 

4113 

4114 

4115def gc(session, srUuid, inBackground, dryRun=False): 

4116 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

4117 immediately if inBackground=True. 

4118 

4119 The following algorithm is used: 

4120 1. If we are already GC'ing in this SR, return 

4121 2. If we are already coalescing a VDI pair: 

4122 a. Scan the SR and determine if the VDI pair is GC'able 

4123 b. If the pair is not GC'able, return 

4124 c. If the pair is GC'able, abort coalesce 

4125 3. Scan the SR 

4126 4. If there is nothing to collect, nor to coalesce, return 

4127 5. If there is something to collect, GC all, then goto 3 

4128 6. If there is something to coalesce, coalesce one pair, then goto 3 

4129 """ 

4130 Util.log("=== SR %s: gc ===" % srUuid) 

4131 

4132 signal.signal(signal.SIGTERM, receiveSignal) 

4133 

4134 if inBackground: 

4135 if daemonize(): 4135 ↛ exitline 4135 didn't return from function 'gc', because the condition on line 4135 was never false

4136 # we are now running in the background. Catch & log any errors 

4137 # because there is no other way to propagate them back at this 

4138 # point 

4139 

4140 try: 

4141 _gc(None, srUuid, dryRun) 

4142 except AbortException: 

4143 Util.log("Aborted") 

4144 except Exception: 

4145 Util.logException("gc") 

4146 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

4147 os._exit(0) 

4148 else: 

4149 _gc(session, srUuid, dryRun, immediate=True) 

4150 

4151 

4152def start_gc(session, sr_uuid): 

4153 """ 

4154 This function is used to try to start a backgrounded GC session by forking 

4155 the current process. If using the systemd version, call start_gc_service() instead. 

4156 """ 

4157 # don't bother if an instance already running (this is just an 

4158 # optimization to reduce the overhead of forking a new process if we 

4159 # don't have to, but the process will check the lock anyways) 

4160 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid) 

4161 if not lockRunning.acquireNoblock(): 

4162 if should_preempt(session, sr_uuid): 

4163 util.SMlog("Aborting currently-running coalesce of garbage VDI") 

4164 try: 

4165 if not abort(sr_uuid, soft=True): 

4166 util.SMlog("The GC has already been scheduled to re-start") 

4167 except util.CommandException as e: 

4168 if e.code != errno.ETIMEDOUT: 

4169 raise 

4170 util.SMlog('failed to abort the GC') 

4171 else: 

4172 util.SMlog("A GC instance already running, not kicking") 

4173 return 

4174 else: 

4175 lockRunning.release() 

4176 

4177 util.SMlog(f"Starting GC file is {__file__}") 

4178 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'], 

4179 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4180 

4181def start_gc_service(sr_uuid, wait=False): 

4182 """ 

4183 This starts the templated systemd service which runs GC on the given SR UUID. 

4184 If the service was already started, this is a no-op. 

4185 

4186 Because the service is a one-shot with RemainAfterExit=no, when called with 

4187 wait=True this will run the service synchronously and will not return until the 

4188 run has finished. This is used to force a run of the GC instead of just kicking it 

4189 in the background. 

4190 """ 

4191 sr_uuid_esc = sr_uuid.replace("-", "\\x2d") 

4192 util.SMlog(f"Kicking SMGC@{sr_uuid}...") 

4193 cmd=[ "/usr/bin/systemctl", "--quiet" ] 

4194 if not wait: 4194 ↛ 4196line 4194 didn't jump to line 4196, because the condition on line 4194 was never false

4195 cmd.append("--no-block") 

4196 cmd += ["start", f"SMGC@{sr_uuid_esc}"] 

4197 subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4198 

4199 

4200def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

4201 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

4202 the SR lock is held. 

4203 The following algorithm is used: 

4204 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

4205 2. Scan the SR 

4206 3. GC 

4207 4. return 

4208 """ 

4209 Util.log("=== SR %s: gc_force ===" % srUuid) 

4210 init(srUuid) 

4211 sr = SR.getInstance(srUuid, session, lockSR, True) 

4212 if not lockGCActive.acquireNoblock(): 

4213 abort(srUuid) 

4214 else: 

4215 Util.log("Nothing was running, clear to proceed") 

4216 

4217 if force: 

4218 Util.log("FORCED: will continue even if there are COW image errors") 

4219 sr.scanLocked(force) 

4220 sr.cleanupCoalesceJournals() 

4221 

4222 try: 

4223 sr.cleanupCache() 

4224 sr.garbageCollect(dryRun) 

4225 finally: 

4226 sr.cleanup() 

4227 sr.logFilter.logState() 

4228 lockGCActive.release() 

4229 

4230 

4231def get_state(srUuid): 

4232 """Return whether GC/coalesce is currently running or not. This asks systemd for 

4233 the state of the templated SMGC service and will return True if it is "activating" 

4234 or "running" (for completeness, as in practice it will never achieve the latter state) 

4235 """ 

4236 sr_uuid_esc = srUuid.replace("-", "\\x2d") 

4237 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"] 

4238 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4239 state = result.stdout.decode('utf-8').rstrip() 

4240 if state == "activating" or state == "running": 

4241 return True 

4242 return False 

4243 

4244 

4245def should_preempt(session, srUuid): 

4246 sr = SR.getInstance(srUuid, session) 

4247 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

4248 if len(entries) == 0: 

4249 return False 

4250 elif len(entries) > 1: 

4251 raise util.SMException("More than one coalesce entry: " + str(entries)) 

4252 sr.scanLocked() 

4253 coalescedUuid = entries.popitem()[0] 

4254 garbage = sr.findGarbage() 

4255 for vdi in garbage: 

4256 if vdi.uuid == coalescedUuid: 

4257 return True 

4258 return False 

4259 

4260 

4261def get_coalesceable_leaves(session, srUuid, vdiUuids): 

4262 coalesceable = [] 

4263 sr = SR.getInstance(srUuid, session) 

4264 sr.scanLocked() 

4265 for uuid in vdiUuids: 

4266 vdi = sr.getVDI(uuid) 

4267 if not vdi: 

4268 raise util.SMException("VDI %s not found" % uuid) 

4269 if vdi.isLeafCoalesceable(): 

4270 coalesceable.append(uuid) 

4271 return coalesceable 

4272 

4273 

4274def cache_cleanup(session, srUuid, maxAge): 

4275 sr = SR.getInstance(srUuid, session) 

4276 return sr.cleanupCache(maxAge) 

4277 

4278 

4279def debug(sr_uuid, cmd, vdi_uuid): 

4280 Util.log("Debug command: %s" % cmd) 

4281 sr = SR.getInstance(sr_uuid, None) 

4282 if not isinstance(sr, LVMSR): 

4283 print("Error: not an LVHD SR") 

4284 return 

4285 sr.scanLocked() 

4286 vdi = sr.getVDI(vdi_uuid) 

4287 if not vdi: 

4288 print("Error: VDI %s not found") 

4289 return 

4290 print("Running %s on SR %s" % (cmd, sr)) 

4291 print("VDI before: %s" % vdi) 

4292 if cmd == "activate": 

4293 vdi._activate() 

4294 print("VDI file: %s" % vdi.path) 

4295 if cmd == "deactivate": 

4296 ns = NS_PREFIX_LVM + sr.uuid 

4297 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

4298 if cmd == "inflate": 

4299 vdi.inflateFully() 

4300 sr.cleanup() 

4301 if cmd == "deflate": 

4302 vdi.deflate() 

4303 sr.cleanup() 

4304 sr.scanLocked() 

4305 print("VDI after: %s" % vdi) 

4306 

4307 

4308def abort_optional_reenable(uuid): 

4309 print("Disabling GC/coalesce for %s" % uuid) 

4310 ret = _abort(uuid) 

4311 input("Press enter to re-enable...") 

4312 print("GC/coalesce re-enabled") 

4313 lockGCRunning.release() 

4314 if ret: 

4315 lockGCActive.release() 

4316 

4317 

4318############################################################################## 

4319# 

4320# CLI 

4321# 

4322def main(): 

4323 action = "" 

4324 maxAge = 0 

4325 uuid = "" 

4326 background = False 

4327 force = False 

4328 dryRun = False 

4329 debug_cmd = "" 

4330 vdi_uuid = "" 

4331 shortArgs = "gGc:aqxu:bfdt:v:" 

4332 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

4333 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

4334 

4335 try: 

4336 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

4337 except getopt.GetoptError: 

4338 usage() 

4339 for o, a in opts: 

4340 if o in ("-g", "--gc"): 

4341 action = "gc" 

4342 if o in ("-G", "--gc_force"): 

4343 action = "gc_force" 

4344 if o in ("-c", "--clean_cache"): 

4345 action = "clean_cache" 

4346 maxAge = int(a) 

4347 if o in ("-a", "--abort"): 

4348 action = "abort" 

4349 if o in ("-q", "--query"): 

4350 action = "query" 

4351 if o in ("-x", "--disable"): 

4352 action = "disable" 

4353 if o in ("-u", "--uuid"): 

4354 uuid = a 

4355 if o in ("-b", "--background"): 

4356 background = True 

4357 if o in ("-f", "--force"): 

4358 force = True 

4359 if o in ("-d", "--dry-run"): 

4360 Util.log("Dry run mode") 

4361 dryRun = True 

4362 if o in ("-t", "--debug"): 

4363 action = "debug" 

4364 debug_cmd = a 

4365 if o in ("-v", "--vdi_uuid"): 

4366 vdi_uuid = a 

4367 

4368 if not action or not uuid: 

4369 usage() 

4370 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

4371 action != "debug" and (debug_cmd or vdi_uuid): 

4372 usage() 

4373 

4374 if action != "query" and action != "debug": 

4375 print("All output goes to log") 

4376 

4377 if action == "gc": 

4378 gc(None, uuid, background, dryRun) 

4379 elif action == "gc_force": 

4380 gc_force(None, uuid, force, dryRun, True) 

4381 elif action == "clean_cache": 

4382 cache_cleanup(None, uuid, maxAge) 

4383 elif action == "abort": 

4384 abort(uuid) 

4385 elif action == "query": 

4386 print("Currently running: %s" % get_state(uuid)) 

4387 elif action == "disable": 

4388 abort_optional_reenable(uuid) 

4389 elif action == "debug": 

4390 debug(uuid, debug_cmd, vdi_uuid) 

4391 

4392 

4393if __name__ == '__main__': 4393 ↛ 4394line 4393 didn't jump to line 4394, because the condition on line 4393 was never true

4394 main()