Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect COW-based SR's in the background 

19# 

20 

21from sm_typing import Any, Optional, List, override 

22 

23import os 

24import os.path 

25import sys 

26import time 

27import signal 

28import subprocess 

29import getopt 

30import datetime 

31import traceback 

32import base64 

33import zlib 

34import errno 

35import stat 

36 

37import XenAPI # pylint: disable=import-error 

38import util 

39import lvutil 

40import lvmcache 

41import journaler 

42import fjournaler 

43import lock 

44import blktap2 

45import xs_errors 

46from refcounter import RefCounter 

47from ipc import IPCFlag 

48from lvmanager import LVActivator 

49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

50from functools import reduce 

51from time import monotonic as _time 

52 

53from constants import NS_PREFIX_LVM, VG_LOCATION, VG_PREFIX 

54from cowutil import CowImageInfo, CowUtil, getCowUtil 

55from lvmcowutil import LV_PREFIX, LvmCowUtil 

56from vditype import VdiType, VdiTypeExtension, VDI_COW_TYPES, VDI_TYPE_TO_EXTENSION 

57 

58try: 

59 from linstorcowutil import LinstorCowUtil 

60 from linstorjournaler import LinstorJournaler 

61 from linstorvolumemanager import get_controller_uri 

62 from linstorvolumemanager import LinstorVolumeManager 

63 from linstorvolumemanager import LinstorVolumeManagerError 

64 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

65 

66 LINSTOR_AVAILABLE = True 

67except ImportError: 

68 LINSTOR_AVAILABLE = False 

69 

70# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

71# possible due to lvhd_stop_using_() not working correctly. However, we leave 

72# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

73# record for use by the offline tool (which makes the operation safe by pausing 

74# the VM first) 

75AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

76 

77FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

78 

79# process "lock", used simply as an indicator that a process already exists 

80# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

81# check for the fact by trying the lock). 

82lockGCRunning = None 

83 

84# process "lock" to indicate that the GC process has been activated but may not 

85# yet be running, stops a second process from being started. 

86LOCK_TYPE_GC_ACTIVE = "gc_active" 

87lockGCActive = None 

88 

89# Default coalesce error rate limit, in messages per minute. A zero value 

90# disables throttling, and a negative value disables error reporting. 

91DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

92 

93COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

94COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

95VAR_RUN = "/var/run/" 

96SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

97 

98N_RUNNING_AVERAGE = 10 

99 

100NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

101 

102# Signal Handler 

103SIGTERM = False 

104 

105 

106class AbortException(util.SMException): 

107 pass 

108 

109 

110def receiveSignal(signalNumber, frame): 

111 global SIGTERM 

112 

113 util.SMlog("GC: recieved SIGTERM") 

114 SIGTERM = True 

115 return 

116 

117 

118################################################################################ 

119# 

120# Util 

121# 

122class Util: 

123 RET_RC = 1 

124 RET_STDOUT = 2 

125 RET_STDERR = 4 

126 

127 UUID_LEN = 36 

128 

129 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

130 

131 @staticmethod 

132 def log(text) -> None: 

133 util.SMlog(text, ident="SMGC") 

134 

135 @staticmethod 

136 def logException(tag): 

137 info = sys.exc_info() 

138 if info[0] == SystemExit: 138 ↛ 140line 138 didn't jump to line 140, because the condition on line 138 was never true

139 # this should not be happening when catching "Exception", but it is 

140 sys.exit(0) 

141 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

142 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

143 Util.log(" ***********************") 

144 Util.log(" * E X C E P T I O N *") 

145 Util.log(" ***********************") 

146 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

147 Util.log(tb) 

148 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

149 

150 @staticmethod 

151 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

152 "Execute a subprocess, then return its return code, stdout, stderr" 

153 proc = subprocess.Popen(args, 

154 stdin=subprocess.PIPE, \ 

155 stdout=subprocess.PIPE, \ 

156 stderr=subprocess.PIPE, \ 

157 shell=True, \ 

158 close_fds=True) 

159 (stdout, stderr) = proc.communicate(inputtext) 

160 stdout = str(stdout) 

161 stderr = str(stderr) 

162 rc = proc.returncode 

163 if log: 

164 Util.log("`%s`: %s" % (args, rc)) 

165 if type(expectedRC) != type([]): 

166 expectedRC = [expectedRC] 

167 if not rc in expectedRC: 

168 reason = stderr.strip() 

169 if stdout.strip(): 

170 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

171 Util.log("Failed: %s" % reason) 

172 raise util.CommandException(rc, args, reason) 

173 

174 if ret == Util.RET_RC: 

175 return rc 

176 if ret == Util.RET_STDERR: 

177 return stderr 

178 return stdout 

179 

180 @staticmethod 

181 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): 

182 """execute func in a separate thread and kill it if abortTest signals 

183 so""" 

184 abortSignaled = abortTest() # check now before we clear resultFlag 

185 resultFlag = IPCFlag(ns) 

186 resultFlag.clearAll() 

187 pid = os.fork() 

188 if pid: 

189 startTime = _time() 

190 try: 

191 while True: 

192 if resultFlag.test("success"): 

193 Util.log(" Child process completed successfully") 

194 resultFlag.clear("success") 

195 return 

196 if resultFlag.test("failure"): 

197 resultFlag.clear("failure") 

198 raise util.SMException("Child process exited with error") 

199 if abortTest() or abortSignaled or SIGTERM: 

200 os.killpg(pid, signal.SIGKILL) 

201 raise AbortException("Aborting due to signal") 

202 if timeOut and _time() - startTime > timeOut: 

203 os.killpg(pid, signal.SIGKILL) 

204 resultFlag.clearAll() 

205 raise util.SMException("Timed out") 

206 time.sleep(pollInterval) 

207 finally: 

208 wait_pid = 0 

209 rc = -1 

210 count = 0 

211 while wait_pid == 0 and count < 10: 

212 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

213 if wait_pid == 0: 

214 time.sleep(2) 

215 count += 1 

216 

217 if wait_pid == 0: 

218 Util.log("runAbortable: wait for process completion timed out") 

219 else: 

220 os.setpgrp() 

221 try: 

222 if func() == ret: 

223 resultFlag.set("success") 

224 else: 

225 resultFlag.set("failure") 

226 except Exception as e: 

227 Util.log("Child process failed with : (%s)" % e) 

228 resultFlag.set("failure") 

229 Util.logException("This exception has occured") 

230 os._exit(0) 

231 

232 @staticmethod 

233 def num2str(number): 

234 for prefix in ("G", "M", "K"): 234 ↛ 237line 234 didn't jump to line 237, because the loop on line 234 didn't complete

235 if number >= Util.PREFIX[prefix]: 

236 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

237 return "%s" % number 

238 

239 @staticmethod 

240 def numBits(val): 

241 count = 0 

242 while val: 

243 count += val & 1 

244 val = val >> 1 

245 return count 

246 

247 @staticmethod 

248 def countBits(bitmap1, bitmap2): 

249 """return bit count in the bitmap produced by ORing the two bitmaps""" 

250 len1 = len(bitmap1) 

251 len2 = len(bitmap2) 

252 lenLong = len1 

253 lenShort = len2 

254 bitmapLong = bitmap1 

255 if len2 > len1: 

256 lenLong = len2 

257 lenShort = len1 

258 bitmapLong = bitmap2 

259 

260 count = 0 

261 for i in range(lenShort): 

262 val = bitmap1[i] | bitmap2[i] 

263 count += Util.numBits(val) 

264 

265 for i in range(i + 1, lenLong): 

266 val = bitmapLong[i] 

267 count += Util.numBits(val) 

268 return count 

269 

270 @staticmethod 

271 def getThisScript(): 

272 thisScript = util.get_real_path(__file__) 

273 if thisScript.endswith(".pyc"): 

274 thisScript = thisScript[:-1] 

275 return thisScript 

276 

277 

278################################################################################ 

279# 

280# XAPI 

281# 

282class XAPI: 

283 USER = "root" 

284 PLUGIN_ON_SLAVE = "on-slave" 

285 

286 CONFIG_SM = 0 

287 CONFIG_OTHER = 1 

288 CONFIG_ON_BOOT = 2 

289 CONFIG_ALLOW_CACHING = 3 

290 

291 CONFIG_NAME = { 

292 CONFIG_SM: "sm-config", 

293 CONFIG_OTHER: "other-config", 

294 CONFIG_ON_BOOT: "on-boot", 

295 CONFIG_ALLOW_CACHING: "allow_caching" 

296 } 

297 

298 class LookupError(util.SMException): 

299 pass 

300 

301 @staticmethod 

302 def getSession(): 

303 session = XenAPI.xapi_local() 

304 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

305 return session 

306 

307 def __init__(self, session, srUuid): 

308 self.sessionPrivate = False 

309 self.session = session 

310 if self.session is None: 

311 self.session = self.getSession() 

312 self.sessionPrivate = True 

313 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

314 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

315 self.hostUuid = util.get_this_host() 

316 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

317 self.task = None 

318 self.task_progress = {"coalescable": 0, "done": 0} 

319 

320 def __del__(self): 

321 if self.sessionPrivate: 

322 self.session.xenapi.session.logout() 

323 

324 @property 

325 def srRef(self): 

326 return self._srRef 

327 

328 def isPluggedHere(self): 

329 pbds = self.getAttachedPBDs() 

330 for pbdRec in pbds: 

331 if pbdRec["host"] == self._hostRef: 

332 return True 

333 return False 

334 

335 def poolOK(self): 

336 host_recs = self.session.xenapi.host.get_all_records() 

337 for host_ref, host_rec in host_recs.items(): 

338 if not host_rec["enabled"]: 

339 Util.log("Host %s not enabled" % host_rec["uuid"]) 

340 return False 

341 return True 

342 

343 def isMaster(self): 

344 if self.srRecord["shared"]: 

345 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

346 return pool["master"] == self._hostRef 

347 else: 

348 pbds = self.getAttachedPBDs() 

349 if len(pbds) < 1: 

350 raise util.SMException("Local SR not attached") 

351 elif len(pbds) > 1: 

352 raise util.SMException("Local SR multiply attached") 

353 return pbds[0]["host"] == self._hostRef 

354 

355 def getAttachedPBDs(self): 

356 """Return PBD records for all PBDs of this SR that are currently 

357 attached""" 

358 attachedPBDs = [] 

359 pbds = self.session.xenapi.PBD.get_all_records() 

360 for pbdRec in pbds.values(): 

361 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

362 attachedPBDs.append(pbdRec) 

363 return attachedPBDs 

364 

365 def getOnlineHosts(self): 

366 return util.get_online_hosts(self.session) 

367 

368 def ensureInactive(self, hostRef, args): 

369 text = self.session.xenapi.host.call_plugin( \ 

370 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

371 Util.log("call-plugin returned: '%s'" % text) 

372 

373 def getRecordHost(self, hostRef): 

374 return self.session.xenapi.host.get_record(hostRef) 

375 

376 def _getRefVDI(self, uuid): 

377 return self.session.xenapi.VDI.get_by_uuid(uuid) 

378 

379 def getRefVDI(self, vdi): 

380 return self._getRefVDI(vdi.uuid) 

381 

382 def getRecordVDI(self, uuid): 

383 try: 

384 ref = self._getRefVDI(uuid) 

385 return self.session.xenapi.VDI.get_record(ref) 

386 except XenAPI.Failure: 

387 return None 

388 

389 def singleSnapshotVDI(self, vdi): 

390 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

391 {"type": "internal"}) 

392 

393 def forgetVDI(self, srUuid, vdiUuid): 

394 """Forget the VDI, but handle the case where the VDI has already been 

395 forgotten (i.e. ignore errors)""" 

396 try: 

397 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

398 self.session.xenapi.VDI.forget(vdiRef) 

399 except XenAPI.Failure: 

400 pass 

401 

402 def getConfigVDI(self, vdi, key): 

403 kind = vdi.CONFIG_TYPE[key] 

404 if kind == self.CONFIG_SM: 

405 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

406 elif kind == self.CONFIG_OTHER: 

407 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

408 elif kind == self.CONFIG_ON_BOOT: 

409 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

410 elif kind == self.CONFIG_ALLOW_CACHING: 

411 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

412 else: 

413 assert(False) 

414 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

415 return cfg 

416 

417 def removeFromConfigVDI(self, vdi, key): 

418 kind = vdi.CONFIG_TYPE[key] 

419 if kind == self.CONFIG_SM: 

420 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

421 elif kind == self.CONFIG_OTHER: 

422 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

423 else: 

424 assert(False) 

425 

426 def addToConfigVDI(self, vdi, key, val): 

427 kind = vdi.CONFIG_TYPE[key] 

428 if kind == self.CONFIG_SM: 

429 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

430 elif kind == self.CONFIG_OTHER: 

431 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

432 else: 

433 assert(False) 

434 

435 def isSnapshot(self, vdi): 

436 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

437 

438 def markCacheSRsDirty(self): 

439 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

440 'field "local_cache_enabled" = "true"') 

441 for sr_ref in sr_refs: 

442 Util.log("Marking SR %s dirty" % sr_ref) 

443 util.set_dirty(self.session, sr_ref) 

444 

445 def srUpdate(self): 

446 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

447 abortFlag = IPCFlag(self.srRecord["uuid"]) 

448 task = self.session.xenapi.Async.SR.update(self._srRef) 

449 cancelTask = True 

450 try: 

451 for i in range(60): 

452 status = self.session.xenapi.task.get_status(task) 

453 if not status == "pending": 

454 Util.log("SR.update_asynch status changed to [%s]" % status) 

455 cancelTask = False 

456 return 

457 if abortFlag.test(FLAG_TYPE_ABORT): 

458 Util.log("Abort signalled during srUpdate, cancelling task...") 

459 try: 

460 self.session.xenapi.task.cancel(task) 

461 cancelTask = False 

462 Util.log("Task cancelled") 

463 except: 

464 pass 

465 return 

466 time.sleep(1) 

467 finally: 

468 if cancelTask: 

469 self.session.xenapi.task.cancel(task) 

470 self.session.xenapi.task.destroy(task) 

471 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

472 

473 def update_task(self): 

474 self.session.xenapi.task.set_other_config( 

475 self.task, 

476 { 

477 "applies_to": self._srRef 

478 }) 

479 total = self.task_progress['coalescable'] + self.task_progress['done'] 

480 if (total > 0): 

481 self.session.xenapi.task.set_progress( 

482 self.task, float(self.task_progress['done']) / total) 

483 

484 def create_task(self, label, description): 

485 self.task = self.session.xenapi.task.create(label, description) 

486 self.update_task() 

487 

488 def update_task_progress(self, key, value): 

489 self.task_progress[key] = value 

490 if self.task: 

491 self.update_task() 

492 

493 def set_task_status(self, status): 

494 if self.task: 

495 self.session.xenapi.task.set_status(self.task, status) 

496 

497 

498################################################################################ 

499# 

500# VDI 

501# 

502class VDI(object): 

503 """Object representing a VDI of a COW-based SR""" 

504 

505 POLL_INTERVAL = 1 

506 POLL_TIMEOUT = 30 

507 DEVICE_MAJOR = 202 

508 

509 # config keys & values 

510 DB_VDI_PARENT = "vhd-parent" 

511 DB_VDI_TYPE = "vdi_type" 

512 DB_VDI_BLOCKS = "vhd-blocks" 

513 DB_VDI_PAUSED = "paused" 

514 DB_VDI_RELINKING = "relinking" 

515 DB_VDI_ACTIVATING = "activating" 

516 DB_GC = "gc" 

517 DB_COALESCE = "coalesce" 

518 DB_LEAFCLSC = "leaf-coalesce" # config key 

519 DB_GC_NO_SPACE = "gc_no_space" 

520 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

521 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

522 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

523 # no space to snap-coalesce or unable to keep 

524 # up with VDI. This is not used by the SM, it 

525 # might be used by external components. 

526 DB_ONBOOT = "on-boot" 

527 ONBOOT_RESET = "reset" 

528 DB_ALLOW_CACHING = "allow_caching" 

529 

530 CONFIG_TYPE = { 

531 DB_VDI_PARENT: XAPI.CONFIG_SM, 

532 DB_VDI_TYPE: XAPI.CONFIG_SM, 

533 DB_VDI_BLOCKS: XAPI.CONFIG_SM, 

534 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

535 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

536 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

537 DB_GC: XAPI.CONFIG_OTHER, 

538 DB_COALESCE: XAPI.CONFIG_OTHER, 

539 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

540 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

541 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

542 DB_GC_NO_SPACE: XAPI.CONFIG_SM 

543 } 

544 

545 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

546 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

547 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

548 # feasibility of leaf coalesce 

549 

550 JRN_RELINK = "relink" # journal entry type for relinking children 

551 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

552 JRN_LEAF = "leaf" # used in coalesce-leaf 

553 

554 STR_TREE_INDENT = 4 

555 

556 def __init__(self, sr, uuid, vdi_type): 

557 self.sr = sr 

558 self.scanError = True 

559 self.uuid = uuid 

560 self.vdi_type = vdi_type 

561 self.fileName = "" 

562 self.parentUuid = "" 

563 self.sizeVirt = -1 

564 self._sizePhys = -1 

565 self._sizeAllocated = -1 

566 self._hidden = False 

567 self.parent = None 

568 self.children = [] 

569 self._vdiRef = None 

570 self.cowutil = getCowUtil(vdi_type) 

571 self._clearRef() 

572 

573 @staticmethod 

574 def extractUuid(path): 

575 raise NotImplementedError("Implement in sub class") 

576 

577 def load(self, info=None) -> None: 

578 """Load VDI info""" 

579 pass 

580 

581 def getDriverName(self) -> str: 

582 return self.vdi_type 

583 

584 def getRef(self): 

585 if self._vdiRef is None: 

586 self._vdiRef = self.sr.xapi.getRefVDI(self) 

587 return self._vdiRef 

588 

589 def getConfig(self, key, default=None): 

590 config = self.sr.xapi.getConfigVDI(self, key) 

591 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 591 ↛ 592line 591 didn't jump to line 592, because the condition on line 591 was never true

592 val = config 

593 else: 

594 val = config.get(key) 

595 if val: 

596 return val 

597 return default 

598 

599 def setConfig(self, key, val): 

600 self.sr.xapi.removeFromConfigVDI(self, key) 

601 self.sr.xapi.addToConfigVDI(self, key, val) 

602 Util.log("Set %s = %s for %s" % (key, val, self)) 

603 

604 def delConfig(self, key): 

605 self.sr.xapi.removeFromConfigVDI(self, key) 

606 Util.log("Removed %s from %s" % (key, self)) 

607 

608 def ensureUnpaused(self): 

609 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

610 Util.log("Unpausing VDI %s" % self) 

611 self.unpause() 

612 

613 def pause(self, failfast=False) -> None: 

614 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

615 self.uuid, failfast): 

616 raise util.SMException("Failed to pause VDI %s" % self) 

617 

618 def _report_tapdisk_unpause_error(self): 

619 try: 

620 xapi = self.sr.xapi.session.xenapi 

621 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

622 msg_name = "failed to unpause tapdisk" 

623 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

624 "VMs using this tapdisk have lost access " \ 

625 "to the corresponding disk(s)" % self.uuid 

626 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

627 except Exception as e: 

628 util.SMlog("failed to generate message: %s" % e) 

629 

630 def unpause(self): 

631 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

632 self.uuid): 

633 self._report_tapdisk_unpause_error() 

634 raise util.SMException("Failed to unpause VDI %s" % self) 

635 

636 def refresh(self, ignoreNonexistent=True): 

637 """Pause-unpause in one step""" 

638 self.sr.lock() 

639 try: 

640 try: 

641 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 641 ↛ 643line 641 didn't jump to line 643, because the condition on line 641 was never true

642 self.sr.uuid, self.uuid): 

643 self._report_tapdisk_unpause_error() 

644 raise util.SMException("Failed to refresh %s" % self) 

645 except XenAPI.Failure as e: 

646 if util.isInvalidVDI(e) and ignoreNonexistent: 

647 Util.log("VDI %s not found, ignoring" % self) 

648 return 

649 raise 

650 finally: 

651 self.sr.unlock() 

652 

653 def isSnapshot(self): 

654 return self.sr.xapi.isSnapshot(self) 

655 

656 def isAttachedRW(self): 

657 return util.is_attached_rw( 

658 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

659 

660 def getVDIBlocks(self): 

661 val = self.updateBlockInfo() 

662 bitmap = zlib.decompress(base64.b64decode(val)) 

663 return bitmap 

664 

665 def isCoalesceable(self): 

666 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

667 return not self.scanError and \ 

668 self.parent and \ 

669 len(self.parent.children) == 1 and \ 

670 self.isHidden() and \ 

671 len(self.children) > 0 

672 

673 def isLeafCoalesceable(self): 

674 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

675 return not self.scanError and \ 

676 self.parent and \ 

677 len(self.parent.children) == 1 and \ 

678 not self.isHidden() and \ 

679 len(self.children) == 0 

680 

681 def canLiveCoalesce(self, speed): 

682 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

683 isLeafCoalesceable() already""" 

684 feasibleSize = False 

685 allowedDownTime = \ 

686 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

687 allocated_size = self.getAllocatedSize() 

688 if speed: 

689 feasibleSize = \ 

690 allocated_size // speed < allowedDownTime 

691 else: 

692 feasibleSize = \ 

693 allocated_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

694 

695 return (feasibleSize or 

696 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

697 

698 def getAllPrunable(self): 

699 if len(self.children) == 0: # base case 

700 # it is possible to have a hidden leaf that was recently coalesced 

701 # onto its parent, its children already relinked but not yet 

702 # reloaded - in which case it may not be garbage collected yet: 

703 # some tapdisks could still be using the file. 

704 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

705 return [] 

706 if not self.scanError and self.isHidden(): 

707 return [self] 

708 return [] 

709 

710 thisPrunable = True 

711 vdiList = [] 

712 for child in self.children: 

713 childList = child.getAllPrunable() 

714 vdiList.extend(childList) 

715 if child not in childList: 

716 thisPrunable = False 

717 

718 # We can destroy the current VDI if all childs are hidden BUT the 

719 # current VDI must be hidden too to do that! 

720 # Example in this case (after a failed live leaf coalesce): 

721 # 

722 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

723 # SMGC: [32436] b5458d61(1.000G/4.127M) 

724 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

725 # 

726 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

727 # Normally we are not in this function when the delete action is 

728 # executed but in `_liveLeafCoalesce`. 

729 

730 if not self.scanError and not self.isHidden() and thisPrunable: 

731 vdiList.append(self) 

732 return vdiList 

733 

734 def getSizePhys(self) -> int: 

735 return self._sizePhys 

736 

737 def getAllocatedSize(self) -> int: 

738 return self._sizeAllocated 

739 

740 def getTreeRoot(self): 

741 "Get the root of the tree that self belongs to" 

742 root = self 

743 while root.parent: 

744 root = root.parent 

745 return root 

746 

747 def getTreeHeight(self): 

748 "Get the height of the subtree rooted at self" 

749 if len(self.children) == 0: 

750 return 1 

751 

752 maxChildHeight = 0 

753 for child in self.children: 

754 childHeight = child.getTreeHeight() 

755 if childHeight > maxChildHeight: 

756 maxChildHeight = childHeight 

757 

758 return maxChildHeight + 1 

759 

760 def getAllLeaves(self) -> List["VDI"]: 

761 "Get all leaf nodes in the subtree rooted at self" 

762 if len(self.children) == 0: 

763 return [self] 

764 

765 leaves = [] 

766 for child in self.children: 

767 leaves.extend(child.getAllLeaves()) 

768 return leaves 

769 

770 def updateBlockInfo(self) -> Optional[str]: 

771 val = base64.b64encode(self._queryCowBlocks()).decode() 

772 try: 

773 self.setConfig(VDI.DB_VDI_BLOCKS, val) 

774 except Exception: 

775 if self.vdi_type != VdiType.QCOW2: 

776 raise 

777 # Sometime with QCOW2, our allocation table is too big to be stored in XAPI, in this case we do not store it 

778 # and we write `skipped` instead so that hasWork is happy (and the GC doesn't run in loop indefinitely). 

779 self.setConfig(VDI.DB_VDI_BLOCKS, "skipped") 

780 

781 return val 

782 

783 def rename(self, uuid) -> None: 

784 "Rename the VDI file" 

785 assert(not self.sr.vdis.get(uuid)) 

786 self._clearRef() 

787 oldUuid = self.uuid 

788 self.uuid = uuid 

789 self.children = [] 

790 # updating the children themselves is the responsibility of the caller 

791 del self.sr.vdis[oldUuid] 

792 self.sr.vdis[self.uuid] = self 

793 

794 def delete(self) -> None: 

795 "Physically delete the VDI" 

796 lock.Lock.cleanup(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

797 lock.Lock.cleanupAll(self.uuid) 

798 self._clear() 

799 

800 def getParent(self) -> str: 

801 return self.cowutil.getParent(self.path, lambda x: x.strip()) 801 ↛ exitline 801 didn't run the lambda on line 801

802 

803 def repair(self, parent) -> None: 

804 self.cowutil.repair(parent) 

805 

806 @override 

807 def __str__(self) -> str: 

808 strHidden = "" 

809 if self.isHidden(): 809 ↛ 810line 809 didn't jump to line 810, because the condition on line 809 was never true

810 strHidden = "*" 

811 strSizeVirt = "?" 

812 if self.sizeVirt > 0: 812 ↛ 813line 812 didn't jump to line 813, because the condition on line 812 was never true

813 strSizeVirt = Util.num2str(self.sizeVirt) 

814 strSizePhys = "?" 

815 if self._sizePhys > 0: 815 ↛ 816line 815 didn't jump to line 816, because the condition on line 815 was never true

816 strSizePhys = "/%s" % Util.num2str(self._sizePhys) 

817 strSizeAllocated = "?" 

818 if self._sizeAllocated >= 0: 

819 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

820 strType = "[{}]".format(self.vdi_type) 

821 

822 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

823 strSizePhys, strSizeAllocated, strType) 

824 

825 def validate(self, fast=False) -> None: 

826 if self.cowutil.check(self.path, fast=fast) != CowUtil.CheckResult.Success: 826 ↛ 827line 826 didn't jump to line 827, because the condition on line 826 was never true

827 raise util.SMException("COW image %s corrupted" % self) 

828 

829 def _clear(self): 

830 self.uuid = "" 

831 self.path = "" 

832 self.parentUuid = "" 

833 self.parent = None 

834 self._clearRef() 

835 

836 def _clearRef(self): 

837 self._vdiRef = None 

838 

839 def _call_plug_cancel(self, hostRef): 

840 args = {"path": self.path, "vdi_type": self.vdi_type} 

841 self.sr.xapi.session.xenapi.host.call_plugin( \ 

842 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_cancel", args) 

843 

844 def _call_plugin_coalesce(self, hostRef): 

845 args = {"path": self.path, "vdi_type": self.vdi_type} 

846 util.SMlog("DAMS: Calling remote coalesce with: {}".format(args)) 

847 ret = self.sr.xapi.session.xenapi.host.call_plugin( \ 

848 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_tapdisk", args) 

849 util.SMlog("DAMS: Remote coalesce returned {}".format(ret)) 

850 

851 def _doCoalesceOnHost(self, hostRef): 

852 self.validate() 

853 self.parent.validate(True) 

854 self.parent._increaseSizeVirt(self.sizeVirt) 

855 self.sr._updateSlavesOnResize(self.parent) 

856 #TODO: We might need to make the LV RW on the slave directly for coalesce? 

857 # Children and parent need to be RW for QCOW2 coalesce, otherwise tapdisk(libqcow) will crash trying to access them 

858 

859 def abortTest(): 

860 file = self.sr._gc_running_file(self) 

861 try: 

862 with open(file, "r") as f: 

863 if not f.read(): 

864 #TODO: Need to call commit cancel on the hostRef if we stop 

865 util.SMlog("DAMS: Cancelling") 

866 self._call_plug_cancel(hostRef) 

867 return True 

868 except OSError as e: 

869 if e.errno == errno.ENOENT: 

870 util.SMlog("File {} does not exist".format(file)) 

871 else: 

872 util.SMlog("IOError: {}".format(e)) 

873 return True 

874 return False 

875 

876 #TODO: Add exception handling here like when callinng in a runAbortable situation_doCoalesceCOWImage 

877 Util.runAbortable(lambda: self._call_plugin_coalesce(hostRef), 

878 None, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

879 

880 self.parent.validate(True) 

881 #self._verifyContents(0) 

882 self.parent.updateBlockInfo() 

883 

884 def _isOpenOnHosts(self) -> Optional[str]: 

885 for pbdRecord in self.sr.xapi.getAttachedPBDs(): 

886 hostRef = pbdRecord["host"] 

887 args = {"path": self.path} 

888 is_openers = util.strtobool(self.sr.xapi.session.xenapi.host.call_plugin( \ 

889 hostRef, XAPI.PLUGIN_ON_SLAVE, "is_openers", args)) 

890 if is_openers: 

891 return hostRef 

892 return None 

893 

894 def _doCoalesce(self) -> None: 

895 """Coalesce self onto parent. Only perform the actual coalescing of 

896 an image, but not the subsequent relinking. We'll do that as the next step, 

897 after reloading the entire SR in case things have changed while we 

898 were coalescing""" 

899 self.validate() 

900 self.parent.validate(True) 

901 self.parent._increaseSizeVirt(self.sizeVirt) 

902 self.sr._updateSlavesOnResize(self.parent) 

903 self._coalesceCowImage(0) 

904 self.parent.validate(True) 

905 #self._verifyContents(0) 

906 self.parent.updateBlockInfo() 

907 

908 def _verifyContents(self, timeOut): 

909 Util.log(" Coalesce verification on %s" % self) 

910 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

911 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

912 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

913 Util.log(" Coalesce verification succeeded") 

914 

915 def _runTapdiskDiff(self): 

916 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

917 (self.getDriverName(), self.path, \ 

918 self.parent.getDriverName(), self.parent.path) 

919 Util.doexec(cmd, 0) 

920 return True 

921 

922 @staticmethod 

923 def _reportCoalesceError(vdi, ce): 

924 """Reports a coalesce error to XenCenter. 

925 

926 vdi: the VDI object on which the coalesce error occured 

927 ce: the CommandException that was raised""" 

928 

929 msg_name = os.strerror(ce.code) 

930 if ce.code == errno.ENOSPC: 

931 # TODO We could add more information here, e.g. exactly how much 

932 # space is required for the particular coalesce, as well as actions 

933 # to be taken by the user and consequences of not taking these 

934 # actions. 

935 msg_body = 'Run out of space while coalescing.' 

936 elif ce.code == errno.EIO: 

937 msg_body = 'I/O error while coalescing.' 

938 else: 

939 msg_body = '' 

940 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

941 % (vdi.sr.uuid, msg_name, msg_body)) 

942 

943 # Create a XenCenter message, but don't spam. 

944 xapi = vdi.sr.xapi.session.xenapi 

945 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

946 oth_cfg = xapi.SR.get_other_config(sr_ref) 

947 if COALESCE_ERR_RATE_TAG in oth_cfg: 

948 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

949 else: 

950 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

951 

952 xcmsg = False 

953 if coalesce_err_rate == 0: 

954 xcmsg = True 

955 elif coalesce_err_rate > 0: 

956 now = datetime.datetime.now() 

957 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

958 if COALESCE_LAST_ERR_TAG in sm_cfg: 

959 # seconds per message (minimum distance in time between two 

960 # messages in seconds) 

961 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

962 last = datetime.datetime.fromtimestamp( 

963 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

964 if now - last >= spm: 

965 xapi.SR.remove_from_sm_config(sr_ref, 

966 COALESCE_LAST_ERR_TAG) 

967 xcmsg = True 

968 else: 

969 xcmsg = True 

970 if xcmsg: 

971 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

972 str(now.strftime('%s'))) 

973 if xcmsg: 

974 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

975 

976 def coalesce(self) -> int: 

977 return self.cowutil.coalesce(self.path) 

978 

979 @staticmethod 

980 def _doCoalesceCowImage(vdi: "VDI"): 

981 try: 

982 startTime = time.time() 

983 allocated_size = vdi.getAllocatedSize() 

984 coalesced_size = vdi.coalesce() 

985 endTime = time.time() 

986 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

987 except util.CommandException as ce: 

988 # We use try/except for the following piece of code because it runs 

989 # in a separate process context and errors will not be caught and 

990 # reported by anyone. 

991 try: 

992 # Report coalesce errors back to user via XC 

993 VDI._reportCoalesceError(vdi, ce) 

994 except Exception as e: 

995 util.SMlog('failed to create XenCenter message: %s' % e) 

996 raise ce 

997 except: 

998 raise 

999 

1000 def _vdi_is_raw(self, vdi_path): 

1001 """ 

1002 Given path to vdi determine if it is raw 

1003 """ 

1004 uuid = self.extractUuid(vdi_path) 

1005 return self.sr.vdis[uuid].vdi_type == VdiType.RAW 

1006 

1007 def _coalesceCowImage(self, timeOut): 

1008 Util.log(" Running COW coalesce on %s" % self) 

1009 def abortTest(): 

1010 if self.cowutil.isCoalesceableOnRemote(): 

1011 file = self.sr._gc_running_file(self) 

1012 try: 

1013 with open(file, "r") as f: 

1014 if not f.read(): 

1015 return True 

1016 except OSError as e: 

1017 if e.errno == errno.ENOENT: 

1018 util.SMlog("File {} does not exist".format(file)) 

1019 else: 

1020 util.SMlog("IOError: {}".format(e)) 

1021 return True 

1022 return IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1023 

1024 try: 

1025 util.fistpoint.activate_custom_fn( 

1026 "cleanup_coalesceVHD_inject_failure", 

1027 util.inject_failure) 

1028 Util.runAbortable(lambda: VDI._doCoalesceCowImage(self), None, 

1029 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

1030 except: 

1031 # Exception at this phase could indicate a failure in COW coalesce 

1032 # or a kill of COW coalesce by runAbortable due to timeOut 

1033 # Try a repair and reraise the exception 

1034 parent = "" 

1035 try: 

1036 parent = self.getParent() 

1037 if not self._vdi_is_raw(parent): 

1038 # Repair error is logged and ignored. Error reraised later 

1039 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

1040 'parent %s' % (self.uuid, parent)) 

1041 self.repair(parent) 

1042 except Exception as e: 

1043 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

1044 'after failed coalesce on %s, err: %s' % 

1045 (parent, self.path, e)) 

1046 raise 

1047 

1048 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

1049 

1050 def _relinkSkip(self) -> None: 

1051 """Relink children of this VDI to point to the parent of this VDI""" 

1052 abortFlag = IPCFlag(self.sr.uuid) 

1053 for child in self.children: 

1054 if abortFlag.test(FLAG_TYPE_ABORT): 1054 ↛ 1055line 1054 didn't jump to line 1055, because the condition on line 1054 was never true

1055 raise AbortException("Aborting due to signal") 

1056 Util.log(" Relinking %s from %s to %s" % \ 

1057 (child, self, self.parent)) 

1058 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

1059 child._setParent(self.parent) 

1060 self.children = [] 

1061 

1062 def _reloadChildren(self, vdiSkip): 

1063 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

1064 the COW image metadata for this file in any online VDI""" 

1065 abortFlag = IPCFlag(self.sr.uuid) 

1066 for child in self.children: 

1067 if child == vdiSkip: 

1068 continue 

1069 if abortFlag.test(FLAG_TYPE_ABORT): 1069 ↛ 1070line 1069 didn't jump to line 1070, because the condition on line 1069 was never true

1070 raise AbortException("Aborting due to signal") 

1071 Util.log(" Reloading VDI %s" % child) 

1072 child._reload() 

1073 

1074 def _reload(self): 

1075 """Pause & unpause to cause blktap to reload the image metadata""" 

1076 for child in self.children: 1076 ↛ 1077line 1076 didn't jump to line 1077, because the loop on line 1076 never started

1077 child._reload() 

1078 

1079 # only leaves can be attached 

1080 if len(self.children) == 0: 1080 ↛ exitline 1080 didn't return from function '_reload', because the condition on line 1080 was never false

1081 try: 

1082 self.delConfig(VDI.DB_VDI_RELINKING) 

1083 except XenAPI.Failure as e: 

1084 if not util.isInvalidVDI(e): 

1085 raise 

1086 self.refresh() 

1087 

1088 def _tagChildrenForRelink(self): 

1089 if len(self.children) == 0: 

1090 retries = 0 

1091 try: 

1092 while retries < 15: 

1093 retries += 1 

1094 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1095 Util.log("VDI %s is activating, wait to relink" % 

1096 self.uuid) 

1097 else: 

1098 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1099 

1100 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1101 self.delConfig(VDI.DB_VDI_RELINKING) 

1102 Util.log("VDI %s started activating while tagging" % 

1103 self.uuid) 

1104 else: 

1105 return 

1106 time.sleep(2) 

1107 

1108 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1109 except XenAPI.Failure as e: 

1110 if not util.isInvalidVDI(e): 

1111 raise 

1112 

1113 for child in self.children: 

1114 child._tagChildrenForRelink() 

1115 

1116 def _loadInfoParent(self): 

1117 ret = self.cowutil.getParent(self.path, LvmCowUtil.extractUuid) 

1118 if ret: 

1119 self.parentUuid = ret 

1120 

1121 def _setParent(self, parent) -> None: 

1122 self.cowutil.setParent(self.path, parent.path, False) 

1123 self.parent = parent 

1124 self.parentUuid = parent.uuid 

1125 parent.children.append(self) 

1126 try: 

1127 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1128 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1129 (self.uuid, self.parentUuid)) 

1130 except: 

1131 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1132 (self.uuid, self.parentUuid)) 

1133 

1134 def isHidden(self) -> bool: 

1135 if self._hidden is None: 1135 ↛ 1136line 1135 didn't jump to line 1136, because the condition on line 1135 was never true

1136 self._loadInfoHidden() 

1137 return self._hidden 

1138 

1139 def _loadInfoHidden(self) -> None: 

1140 hidden = self.cowutil.getHidden(self.path) 

1141 self._hidden = (hidden != 0) 

1142 

1143 def _setHidden(self, hidden=True) -> None: 

1144 self._hidden = None 

1145 self.cowutil.setHidden(self.path, hidden) 

1146 self._hidden = hidden 

1147 

1148 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1149 """ensure the virtual size of 'self' is at least 'size'. Note that 

1150 resizing a COW image must always be offline and atomically: the file must 

1151 not be open by anyone and no concurrent operations may take place. 

1152 Thus we use the Agent API call for performing paused atomic 

1153 operations. If the caller is already in the atomic context, it must 

1154 call with atomic = False""" 

1155 if self.sizeVirt >= size: 1155 ↛ 1157line 1155 didn't jump to line 1157, because the condition on line 1155 was never false

1156 return 

1157 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1158 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1159 

1160 msize = self.cowutil.getMaxResizeSize(self.path) 

1161 if (size <= msize): 

1162 self.cowutil.setSizeVirtFast(self.path, size) 

1163 else: 

1164 if atomic: 

1165 vdiList = self._getAllSubtree() 

1166 self.sr.lock() 

1167 try: 

1168 self.sr.pauseVDIs(vdiList) 

1169 try: 

1170 self._setSizeVirt(size) 

1171 finally: 

1172 self.sr.unpauseVDIs(vdiList) 

1173 finally: 

1174 self.sr.unlock() 

1175 else: 

1176 self._setSizeVirt(size) 

1177 

1178 self.sizeVirt = self.cowutil.getSizeVirt(self.path) 

1179 

1180 def _setSizeVirt(self, size) -> None: 

1181 """WARNING: do not call this method directly unless all VDIs in the 

1182 subtree are guaranteed to be unplugged (and remain so for the duration 

1183 of the operation): this operation is only safe for offline COW images""" 

1184 jFile = os.path.join(self.sr.path, self.uuid) 

1185 self.cowutil.setSizeVirt(self.path, size, jFile) 

1186 

1187 def _queryCowBlocks(self) -> bytes: 

1188 return self.cowutil.getBlockBitmap(self.path) 

1189 

1190 def _getCoalescedSizeData(self): 

1191 """Get the data size of the resulting image if we coalesce self onto 

1192 parent. We calculate the actual size by using the image block allocation 

1193 information (as opposed to just adding up the two image sizes to get an 

1194 upper bound)""" 

1195 # make sure we don't use stale BAT info from vdi_rec since the child 

1196 # was writable all this time 

1197 self.delConfig(VDI.DB_VDI_BLOCKS) 

1198 blocksChild = self.getVDIBlocks() 

1199 blocksParent = self.parent.getVDIBlocks() 

1200 numBlocks = Util.countBits(blocksChild, blocksParent) 

1201 Util.log("Num combined blocks = %d" % numBlocks) 

1202 sizeData = numBlocks * self.cowutil.getBlockSize(self.path) 

1203 assert(sizeData <= self.sizeVirt) 

1204 return sizeData 

1205 

1206 def _calcExtraSpaceForCoalescing(self) -> int: 

1207 sizeData = self._getCoalescedSizeData() 

1208 sizeCoalesced = sizeData + self.cowutil.calcOverheadBitmap(sizeData) + \ 

1209 self.cowutil.calcOverheadEmpty(self.sizeVirt) 

1210 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1211 return sizeCoalesced - self.parent.getSizePhys() 

1212 

1213 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1214 """How much extra space in the SR will be required to 

1215 [live-]leaf-coalesce this VDI""" 

1216 # the space requirements are the same as for inline coalesce 

1217 return self._calcExtraSpaceForCoalescing() 

1218 

1219 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1220 """How much extra space in the SR will be required to 

1221 snapshot-coalesce this VDI""" 

1222 return self._calcExtraSpaceForCoalescing() + \ 

1223 self.cowutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1224 

1225 def _getAllSubtree(self): 

1226 """Get self and all VDIs in the subtree of self as a flat list""" 

1227 vdiList = [self] 

1228 for child in self.children: 

1229 vdiList.extend(child._getAllSubtree()) 

1230 return vdiList 

1231 

1232 

1233class FileVDI(VDI): 

1234 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1235 

1236 @override 

1237 @staticmethod 

1238 def extractUuid(path): 

1239 fileName = os.path.basename(path) 

1240 return os.path.splitext(fileName)[0] 

1241 

1242 def __init__(self, sr, uuid, vdi_type): 

1243 VDI.__init__(self, sr, uuid, vdi_type) 

1244 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1245 

1246 @override 

1247 def load(self, info=None) -> None: 

1248 if not info: 

1249 if not util.pathexists(self.path): 

1250 raise util.SMException("%s not found" % self.path) 

1251 try: 

1252 info = self.cowutil.getInfo(self.path, self.extractUuid) 

1253 except util.SMException: 

1254 Util.log(" [VDI %s: failed to read COW image metadata]" % self.uuid) 

1255 return 

1256 self.parent = None 

1257 self.children = [] 

1258 self.parentUuid = info.parentUuid 

1259 self.sizeVirt = info.sizeVirt 

1260 self._sizePhys = info.sizePhys 

1261 self._sizeAllocated = info.sizeAllocated 

1262 self._hidden = info.hidden 

1263 self.scanError = False 

1264 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1265 (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])) 

1266 

1267 @override 

1268 def rename(self, uuid) -> None: 

1269 oldPath = self.path 

1270 VDI.rename(self, uuid) 

1271 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1272 self.path = os.path.join(self.sr.path, self.fileName) 

1273 assert(not util.pathexists(self.path)) 

1274 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1275 os.rename(oldPath, self.path) 

1276 

1277 @override 

1278 def delete(self) -> None: 

1279 if len(self.children) > 0: 1279 ↛ 1280line 1279 didn't jump to line 1280, because the condition on line 1279 was never true

1280 raise util.SMException("VDI %s has children, can't delete" % \ 

1281 self.uuid) 

1282 try: 

1283 self.sr.lock() 

1284 try: 

1285 os.unlink(self.path) 

1286 self.sr.forgetVDI(self.uuid) 

1287 finally: 

1288 self.sr.unlock() 

1289 except OSError: 

1290 raise util.SMException("os.unlink(%s) failed" % self.path) 

1291 VDI.delete(self) 

1292 

1293 @override 

1294 def getAllocatedSize(self) -> int: 

1295 if self._sizeAllocated == -1: 1295 ↛ 1296line 1295 didn't jump to line 1296, because the condition on line 1295 was never true

1296 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1297 return self._sizeAllocated 

1298 

1299 

1300class LVMVDI(VDI): 

1301 """Object representing a VDI in an LVM SR""" 

1302 

1303 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1304 

1305 @override 

1306 def load(self, info=None) -> None: 

1307 # `info` is always set. `None` default value is only here to match parent method. 

1308 assert info, "No info given to LVMVDI.load" 

1309 self.parent = None 

1310 self.children = [] 

1311 self._sizePhys = -1 

1312 self._sizeAllocated = -1 

1313 self.scanError = info.scanError 

1314 self.sizeLV = info.sizeLV 

1315 self.sizeVirt = info.sizeVirt 

1316 self.fileName = info.lvName 

1317 self.lvActive = info.lvActive 

1318 self.lvOpen = info.lvOpen 

1319 self.lvReadonly = info.lvReadonly 

1320 self._hidden = info.hidden 

1321 self.parentUuid = info.parentUuid 

1322 self.path = os.path.join(self.sr.path, self.fileName) 

1323 self.lvmcowutil = LvmCowUtil(self.cowutil) 

1324 

1325 @override 

1326 @staticmethod 

1327 def extractUuid(path): 

1328 return LvmCowUtil.extractUuid(path) 

1329 

1330 def inflate(self, size): 

1331 """inflate the LV containing the COW image to 'size'""" 

1332 if not VdiType.isCowImage(self.vdi_type): 

1333 return 

1334 self._activate() 

1335 self.sr.lock() 

1336 try: 

1337 self.lvmcowutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size) 

1338 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1339 finally: 

1340 self.sr.unlock() 

1341 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1342 self._sizePhys = -1 

1343 self._sizeAllocated = -1 

1344 

1345 def deflate(self): 

1346 """deflate the LV containing the image to minimum""" 

1347 if not VdiType.isCowImage(self.vdi_type): 

1348 return 

1349 self._activate() 

1350 self.sr.lock() 

1351 try: 

1352 self.lvmcowutil.deflate(self.sr.lvmCache, self.fileName, self.getSizePhys()) 

1353 finally: 

1354 self.sr.unlock() 

1355 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1356 self._sizePhys = -1 

1357 self._sizeAllocated = -1 

1358 

1359 def inflateFully(self): 

1360 self.inflate(self.lvmcowutil.calcVolumeSize(self.sizeVirt)) 

1361 

1362 def inflateParentForCoalesce(self): 

1363 """Inflate the parent only as much as needed for the purposes of 

1364 coalescing""" 

1365 if not VdiType.isCowImage(self.parent.vdi_type): 

1366 return 

1367 inc = self._calcExtraSpaceForCoalescing() 

1368 if inc > 0: 

1369 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1370 self.parent.inflate(self.parent.sizeLV + inc) 

1371 

1372 @override 

1373 def updateBlockInfo(self) -> Optional[str]: 

1374 if VdiType.isCowImage(self.vdi_type): 

1375 return VDI.updateBlockInfo(self) 

1376 return None 

1377 

1378 @override 

1379 def rename(self, uuid) -> None: 

1380 oldUuid = self.uuid 

1381 oldLVName = self.fileName 

1382 VDI.rename(self, uuid) 

1383 self.fileName = LV_PREFIX[self.vdi_type] + self.uuid 

1384 self.path = os.path.join(self.sr.path, self.fileName) 

1385 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1386 

1387 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1388 if self.sr.lvActivator.get(oldUuid, False): 

1389 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1390 

1391 ns = NS_PREFIX_LVM + self.sr.uuid 

1392 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1393 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1394 RefCounter.reset(oldUuid, ns) 

1395 

1396 @override 

1397 def delete(self) -> None: 

1398 if len(self.children) > 0: 

1399 raise util.SMException("VDI %s has children, can't delete" % \ 

1400 self.uuid) 

1401 self.sr.lock() 

1402 try: 

1403 self.sr.lvmCache.remove(self.fileName) 

1404 self.sr.forgetVDI(self.uuid) 

1405 finally: 

1406 self.sr.unlock() 

1407 RefCounter.reset(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

1408 VDI.delete(self) 

1409 

1410 @override 

1411 def getSizePhys(self) -> int: 

1412 if self._sizePhys == -1: 

1413 self._loadInfoSizePhys() 

1414 return self._sizePhys 

1415 

1416 def _loadInfoSizePhys(self): 

1417 """Get the physical utilization of the COW image file. We do it individually 

1418 (and not using the COW batch scanner) as an optimization: this info is 

1419 relatively expensive and we need it only for VDI's involved in 

1420 coalescing.""" 

1421 if not VdiType.isCowImage(self.vdi_type): 

1422 return 

1423 self._activate() 

1424 self._sizePhys = self.cowutil.getSizePhys(self.path) 

1425 if self._sizePhys <= 0: 

1426 raise util.SMException("phys size of %s = %d" % \ 

1427 (self, self._sizePhys)) 

1428 

1429 @override 

1430 def getAllocatedSize(self) -> int: 

1431 if self._sizeAllocated == -1: 

1432 self._loadInfoSizeAllocated() 

1433 return self._sizeAllocated 

1434 

1435 def _loadInfoSizeAllocated(self): 

1436 """ 

1437 Get the allocated size of the COW volume. 

1438 """ 

1439 if not VdiType.isCowImage(self.vdi_type): 

1440 return 

1441 self._activate() 

1442 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1443 

1444 @override 

1445 def _loadInfoHidden(self) -> None: 

1446 if not VdiType.isCowImage(self.vdi_type): 

1447 self._hidden = self.sr.lvmCache.getHidden(self.fileName) 

1448 else: 

1449 VDI._loadInfoHidden(self) 

1450 

1451 @override 

1452 def _setHidden(self, hidden=True) -> None: 

1453 if not VdiType.isCowImage(self.vdi_type): 

1454 self._hidden = None 

1455 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1456 self._hidden = hidden 

1457 else: 

1458 VDI._setHidden(self, hidden) 

1459 

1460 @override 

1461 def __str__(self) -> str: 

1462 strType = self.vdi_type 

1463 if self.vdi_type == VdiType.RAW: 

1464 strType = "RAW" 

1465 strHidden = "" 

1466 if self.isHidden(): 

1467 strHidden = "*" 

1468 strSizePhys = "" 

1469 if self._sizePhys > 0: 

1470 strSizePhys = Util.num2str(self._sizePhys) 

1471 strSizeAllocated = "" 

1472 if self._sizeAllocated >= 0: 

1473 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1474 strActive = "n" 

1475 if self.lvActive: 

1476 strActive = "a" 

1477 if self.lvOpen: 

1478 strActive += "o" 

1479 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1480 Util.num2str(self.sizeVirt), strSizePhys, strSizeAllocated, 

1481 Util.num2str(self.sizeLV), strActive) 

1482 

1483 @override 

1484 def validate(self, fast=False) -> None: 

1485 if VdiType.isCowImage(self.vdi_type): 

1486 VDI.validate(self, fast) 

1487 

1488 def _setChainRw(self) -> List[str]: 

1489 """ 

1490 Set the readonly LV and children writable. 

1491 It's needed because the coalesce can be done by tapdisk directly 

1492 and it will need to write parent information for children. 

1493 The VDI we want to coalesce into it's parent need to be writable for libqcow coalesce part. 

1494 Return a list of the LV that were previously readonly to be made RO again after the coalesce. 

1495 """ 

1496 was_ro = [] 

1497 if self.lvReadonly: 

1498 self.sr.lvmCache.setReadonly(self.fileName, False) 

1499 was_ro.append(self.fileName) 

1500 

1501 for child in self.children: 

1502 if child.lvReadonly: 

1503 self.sr.lvmCache.setReadonly(child.fileName, False) 

1504 was_ro.append(child.fileName) 

1505 

1506 return was_ro 

1507 

1508 def _setChainRo(self, was_ro: List[str]) -> None: 

1509 """Set the list of LV in parameters to readonly""" 

1510 for lvName in was_ro: 

1511 self.sr.lvmCache.setReadonly(lvName, True) 

1512 

1513 @override 

1514 def _doCoalesce(self) -> None: 

1515 """LVMVDI parents must first be activated, inflated, and made writable""" 

1516 was_ro = [] 

1517 try: 

1518 self._activateChain() 

1519 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1520 self.parent.validate() 

1521 self.inflateParentForCoalesce() 

1522 was_ro = self._setChainRw() 

1523 VDI._doCoalesce(self) 

1524 finally: 

1525 self.parent._loadInfoSizePhys() 

1526 self.parent.deflate() 

1527 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1528 self._setChainRo(was_ro) 

1529 

1530 @override 

1531 def _setParent(self, parent) -> None: 

1532 self._activate() 

1533 if self.lvReadonly: 

1534 self.sr.lvmCache.setReadonly(self.fileName, False) 

1535 

1536 try: 

1537 self.cowutil.setParent(self.path, parent.path, parent.vdi_type == VdiType.RAW) 

1538 finally: 

1539 if self.lvReadonly: 

1540 self.sr.lvmCache.setReadonly(self.fileName, True) 

1541 self._deactivate() 

1542 self.parent = parent 

1543 self.parentUuid = parent.uuid 

1544 parent.children.append(self) 

1545 try: 

1546 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1547 Util.log("Updated the VDI-parent field for child %s with %s" % \ 

1548 (self.uuid, self.parentUuid)) 

1549 except: 

1550 Util.log("Failed to update the VDI-parent with %s for child %s" % \ 

1551 (self.parentUuid, self.uuid)) 

1552 

1553 def _activate(self): 

1554 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1555 

1556 def _activateChain(self): 

1557 vdi = self 

1558 while vdi: 

1559 vdi._activate() 

1560 vdi = vdi.parent 

1561 

1562 def _deactivate(self): 

1563 self.sr.lvActivator.deactivate(self.uuid, False) 

1564 

1565 @override 

1566 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1567 "ensure the virtual size of 'self' is at least 'size'" 

1568 self._activate() 

1569 if VdiType.isCowImage(self.vdi_type): 

1570 VDI._increaseSizeVirt(self, size, atomic) 

1571 return 

1572 

1573 # raw VDI case 

1574 offset = self.sizeLV 

1575 if self.sizeVirt < size: 

1576 oldSize = self.sizeLV 

1577 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1578 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1579 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1580 offset = oldSize 

1581 unfinishedZero = False 

1582 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1583 if jval: 

1584 unfinishedZero = True 

1585 offset = int(jval) 

1586 length = self.sizeLV - offset 

1587 if not length: 

1588 return 

1589 

1590 if unfinishedZero: 

1591 Util.log(" ==> Redoing unfinished zeroing out") 

1592 else: 

1593 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1594 str(offset)) 

1595 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1596 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1597 func = lambda: util.zeroOut(self.path, offset, length) 

1598 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1599 VDI.POLL_INTERVAL, 0) 

1600 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1601 

1602 @override 

1603 def _setSizeVirt(self, size) -> None: 

1604 """WARNING: do not call this method directly unless all VDIs in the 

1605 subtree are guaranteed to be unplugged (and remain so for the duration 

1606 of the operation): this operation is only safe for offline COW images.""" 

1607 self._activate() 

1608 jFile = self.lvmcowutil.createResizeJournal(self.sr.lvmCache, self.uuid) 

1609 try: 

1610 self.lvmcowutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size, jFile) 

1611 finally: 

1612 self.lvmcowutil.destroyResizeJournal(self.sr.lvmCache, self.uuid) 

1613 

1614 @override 

1615 def _queryCowBlocks(self) -> bytes: 

1616 self._activate() 

1617 return VDI._queryCowBlocks(self) 

1618 

1619 @override 

1620 def _calcExtraSpaceForCoalescing(self) -> int: 

1621 if not VdiType.isCowImage(self.parent.vdi_type): 

1622 return 0 # raw parents are never deflated in the first place 

1623 sizeCoalesced = self.lvmcowutil.calcVolumeSize(self._getCoalescedSizeData()) 

1624 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1625 return sizeCoalesced - self.parent.sizeLV 

1626 

1627 @override 

1628 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1629 """How much extra space in the SR will be required to 

1630 [live-]leaf-coalesce this VDI""" 

1631 # we can deflate the leaf to minimize the space requirements 

1632 deflateDiff = self.sizeLV - lvutil.calcSizeLV(self.getSizePhys()) 

1633 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1634 

1635 @override 

1636 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1637 return self._calcExtraSpaceForCoalescing() + \ 

1638 lvutil.calcSizeLV(self.getSizePhys()) 

1639 

1640 

1641class LinstorVDI(VDI): 

1642 """Object representing a VDI in a LINSTOR SR""" 

1643 

1644 VOLUME_LOCK_TIMEOUT = 30 

1645 

1646 @override 

1647 def load(self, info=None) -> None: 

1648 self.parentUuid = info.parentUuid 

1649 self.scanError = True 

1650 self.parent = None 

1651 self.children = [] 

1652 

1653 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1654 self.path = self.sr._linstor.build_device_path(self.fileName) 

1655 self.linstorcowutil = LinstorCowUtil(self.sr.xapi.session, self.sr._linstor, info.vdiType) 

1656 

1657 if not info: 

1658 try: 

1659 info = self.linstorcowutil.get_info(self.uuid) 

1660 except util.SMException: 

1661 Util.log( 

1662 ' [VDI {}: failed to read COW image metadata]'.format(self.uuid) 

1663 ) 

1664 return 

1665 

1666 self.parentUuid = info.parentUuid 

1667 self.sizeVirt = info.sizeVirt 

1668 self._sizePhys = -1 

1669 self._sizeAllocated = -1 

1670 self.drbd_size = -1 

1671 self._hidden = info.hidden 

1672 self.scanError = False 

1673 

1674 @override 

1675 def getSizePhys(self, fetch=False) -> int: 

1676 if self._sizePhys < 0 or fetch: 

1677 self._sizePhys = self.linstorcowutil.get_size_phys(self.uuid) 

1678 return self._sizePhys 

1679 

1680 def getDrbdSize(self, fetch=False): 

1681 if self.drbd_size < 0 or fetch: 

1682 self.drbd_size = self.linstorcowutil.get_drbd_size(self.uuid) 

1683 return self.drbd_size 

1684 

1685 @override 

1686 def getAllocatedSize(self) -> int: 

1687 if self._sizeAllocated == -1: 

1688 if VdiType.isCowImage(self.vdi_type): 

1689 self._sizeAllocated = self.linstorcowutil.get_allocated_size(self.uuid) 

1690 return self._sizeAllocated 

1691 

1692 def inflate(self, size): 

1693 if not VdiType.isCowImage(self.vdi_type): 

1694 return 

1695 self.sr.lock() 

1696 try: 

1697 # Ensure we use the real DRBD size and not the cached one. 

1698 # Why? Because this attribute can be changed if volume is resized by user. 

1699 self.drbd_size = self.getDrbdSize(fetch=True) 

1700 self.linstorcowutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1701 finally: 

1702 self.sr.unlock() 

1703 self.drbd_size = -1 

1704 self._sizePhys = -1 

1705 self._sizeAllocated = -1 

1706 

1707 def deflate(self): 

1708 if not VdiType.isCowImage(self.vdi_type): 

1709 return 

1710 self.sr.lock() 

1711 try: 

1712 # Ensure we use the real sizes and not the cached info. 

1713 self.drbd_size = self.getDrbdSize(fetch=True) 

1714 self._sizePhys = self.getSizePhys(fetch=True) 

1715 self.linstorcowutil.force_deflate(self.path, self._sizePhys, self.drbd_size, zeroize=False) 

1716 finally: 

1717 self.sr.unlock() 

1718 self.drbd_size = -1 

1719 self._sizePhys = -1 

1720 self._sizeAllocated = -1 

1721 

1722 def inflateFully(self): 

1723 if VdiType.isCowImage(self.vdi_type): 

1724 self.inflate(self.linstorcowutil.compute_volume_size(self.sizeVirt)) 

1725 

1726 @override 

1727 def rename(self, uuid) -> None: 

1728 Util.log('Renaming {} -> {} (path={})'.format( 

1729 self.uuid, uuid, self.path 

1730 )) 

1731 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1732 VDI.rename(self, uuid) 

1733 

1734 @override 

1735 def delete(self) -> None: 

1736 if len(self.children) > 0: 

1737 raise util.SMException( 

1738 'VDI {} has children, can\'t delete'.format(self.uuid) 

1739 ) 

1740 self.sr.lock() 

1741 try: 

1742 self.sr._linstor.destroy_volume(self.uuid) 

1743 self.sr.forgetVDI(self.uuid) 

1744 finally: 

1745 self.sr.unlock() 

1746 VDI.delete(self) 

1747 

1748 @override 

1749 def validate(self, fast=False) -> None: 

1750 if VdiType.isCowImage(self.vdi_type) and self.linstorcowutil.check(self.uuid, fast=fast) != CowUtil.CheckResult.Success: 

1751 raise util.SMException('COW image {} corrupted'.format(self)) 

1752 

1753 @override 

1754 def pause(self, failfast=False) -> None: 

1755 self.sr._linstor.ensure_volume_is_not_locked( 

1756 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1757 ) 

1758 return super(LinstorVDI, self).pause(failfast) 

1759 

1760 @override 

1761 def coalesce(self) -> int: 

1762 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1763 # Using another exception we can't execute the next coalesce calls. 

1764 return self.linstorcowutil.force_coalesce(self.path) 

1765 

1766 @override 

1767 def getParent(self) -> str: 

1768 return self.linstorcowutil.get_parent( 

1769 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1770 ) 

1771 

1772 @override 

1773 def repair(self, parent_uuid) -> None: 

1774 self.linstorcowutil.force_repair( 

1775 self.sr._linstor.get_device_path(parent_uuid) 

1776 ) 

1777 

1778 @override 

1779 def _relinkSkip(self) -> None: 

1780 abortFlag = IPCFlag(self.sr.uuid) 

1781 for child in self.children: 

1782 if abortFlag.test(FLAG_TYPE_ABORT): 

1783 raise AbortException('Aborting due to signal') 

1784 Util.log( 

1785 ' Relinking {} from {} to {}'.format( 

1786 child, self, self.parent 

1787 ) 

1788 ) 

1789 

1790 session = child.sr.xapi.session 

1791 sr_uuid = child.sr.uuid 

1792 vdi_uuid = child.uuid 

1793 try: 

1794 self.sr._linstor.ensure_volume_is_not_locked( 

1795 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1796 ) 

1797 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1798 child._setParent(self.parent) 

1799 finally: 

1800 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1801 self.children = [] 

1802 

1803 @override 

1804 def _setParent(self, parent) -> None: 

1805 self.sr._linstor.get_device_path(self.uuid) 

1806 self.linstorcowutil.force_parent(self.path, parent.path) 

1807 self.parent = parent 

1808 self.parentUuid = parent.uuid 

1809 parent.children.append(self) 

1810 try: 

1811 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1812 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1813 (self.uuid, self.parentUuid)) 

1814 except: 

1815 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1816 (self.uuid, self.parentUuid)) 

1817 

1818 @override 

1819 def _doCoalesce(self) -> None: 

1820 try: 

1821 self._activateChain() 

1822 self.parent.validate() 

1823 self._inflateParentForCoalesce() 

1824 VDI._doCoalesce(self) 

1825 finally: 

1826 self.parent.deflate() 

1827 

1828 def _activateChain(self): 

1829 vdi = self 

1830 while vdi: 

1831 try: 

1832 p = self.sr._linstor.get_device_path(vdi.uuid) 

1833 except Exception as e: 

1834 # Use SMException to skip coalesce. 

1835 # Otherwise the GC is stopped... 

1836 raise util.SMException(str(e)) 

1837 vdi = vdi.parent 

1838 

1839 @override 

1840 def _setHidden(self, hidden=True) -> None: 

1841 HIDDEN_TAG = 'hidden' 

1842 

1843 if not VdiType.isCowImage(self.vdi_type): 

1844 self._hidden = None 

1845 self.sr._linstor.update_volume_metadata(self.uuid, { 

1846 HIDDEN_TAG: hidden 

1847 }) 

1848 self._hidden = hidden 

1849 else: 

1850 VDI._setHidden(self, hidden) 

1851 

1852 @override 

1853 def _increaseSizeVirt(self, size, atomic=True): 

1854 if self.vdi_type == VdiType.RAW: 

1855 offset = self.drbd_size 

1856 if self.sizeVirt < size: 

1857 oldSize = self.drbd_size 

1858 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) 

1859 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) 

1860 self.sr._linstor.resize_volume(self.uuid, self.drbd_size) 

1861 offset = oldSize 

1862 unfinishedZero = False 

1863 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) 

1864 if jval: 

1865 unfinishedZero = True 

1866 offset = int(jval) 

1867 length = self.drbd_size - offset 

1868 if not length: 

1869 return 

1870 

1871 if unfinishedZero: 

1872 Util.log(" ==> Redoing unfinished zeroing out") 

1873 else: 

1874 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) 

1875 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1876 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1877 func = lambda: util.zeroOut(self.path, offset, length) 

1878 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

1879 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) 

1880 return 

1881 

1882 if self.sizeVirt >= size: 

1883 return 

1884 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1885 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1886 

1887 msize = self.linstorcowutil.get_max_resize_size(self.uuid) * 1024 * 1024 

1888 if (size <= msize): 

1889 self.linstorcowutil.set_size_virt_fast(self.path, size) 

1890 else: 

1891 if atomic: 

1892 vdiList = self._getAllSubtree() 

1893 self.sr.lock() 

1894 try: 

1895 self.sr.pauseVDIs(vdiList) 

1896 try: 

1897 self._setSizeVirt(size) 

1898 finally: 

1899 self.sr.unpauseVDIs(vdiList) 

1900 finally: 

1901 self.sr.unlock() 

1902 else: 

1903 self._setSizeVirt(size) 

1904 

1905 self.sizeVirt = self.linstorcowutil.get_size_virt(self.uuid) 

1906 

1907 @override 

1908 def _setSizeVirt(self, size) -> None: 

1909 jfile = self.uuid + '-jvhd' 

1910 self.sr._linstor.create_volume( 

1911 jfile, self.cowutil.getResizeJournalSize(), persistent=False, volume_name=jfile 

1912 ) 

1913 try: 

1914 self.inflate(self.linstorcowutil.compute_volume_size(size)) 

1915 self.linstorcowutil.set_size_virt(self.path, size, jfile) 

1916 finally: 

1917 try: 

1918 self.sr._linstor.destroy_volume(jfile) 

1919 except Exception: 

1920 # We can ignore it, in any case this volume is not persistent. 

1921 pass 

1922 

1923 @override 

1924 def _queryCowBlocks(self) -> bytes: 

1925 return self.linstorcowutil.get_block_bitmap(self.uuid) 

1926 

1927 def _inflateParentForCoalesce(self): 

1928 if not VdiType.isCowImage(self.parent.vdi_type): 

1929 return 

1930 inc = self._calcExtraSpaceForCoalescing() 

1931 if inc > 0: 

1932 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1933 

1934 @override 

1935 def _calcExtraSpaceForCoalescing(self) -> int: 

1936 if not VdiType.isCowImage(self.parent.vdi_type): 

1937 return 0 

1938 size_coalesced = self.linstorcowutil.compute_volume_size(self._getCoalescedSizeData()) 

1939 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1940 return size_coalesced - self.parent.getDrbdSize() 

1941 

1942 @override 

1943 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1944 assert self.getDrbdSize() > 0 

1945 assert self.getSizePhys() > 0 

1946 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1947 assert deflate_diff >= 0 

1948 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1949 

1950 @override 

1951 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1952 assert self.getSizePhys() > 0 

1953 return self._calcExtraSpaceForCoalescing() + \ 

1954 LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1955 

1956################################################################################ 

1957# 

1958# SR 

1959# 

1960class SR(object): 

1961 class LogFilter: 

1962 def __init__(self, sr): 

1963 self.sr = sr 

1964 self.stateLogged = False 

1965 self.prevState = {} 

1966 self.currState = {} 

1967 

1968 def logState(self): 

1969 changes = "" 

1970 self.currState.clear() 

1971 for vdi in self.sr.vdiTrees: 

1972 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1973 if not self.prevState.get(vdi.uuid) or \ 

1974 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1975 changes += self.currState[vdi.uuid] 

1976 

1977 for uuid in self.prevState: 

1978 if not self.currState.get(uuid): 

1979 changes += "Tree %s gone\n" % uuid 

1980 

1981 result = "SR %s (%d VDIs in %d COW trees): " % \ 

1982 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1983 

1984 if len(changes) > 0: 

1985 if self.stateLogged: 

1986 result += "showing only COW trees that changed:" 

1987 result += "\n%s" % changes 

1988 else: 

1989 result += "no changes" 

1990 

1991 for line in result.split("\n"): 

1992 Util.log("%s" % line) 

1993 self.prevState.clear() 

1994 for key, val in self.currState.items(): 

1995 self.prevState[key] = val 

1996 self.stateLogged = True 

1997 

1998 def logNewVDI(self, uuid): 

1999 if self.stateLogged: 

2000 Util.log("Found new VDI when scanning: %s" % uuid) 

2001 

2002 def _getTreeStr(self, vdi, indent=8): 

2003 treeStr = "%s%s\n" % (" " * indent, vdi) 

2004 for child in vdi.children: 

2005 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

2006 return treeStr 

2007 

2008 TYPE_FILE = "file" 

2009 TYPE_LVHD = "lvhd" 

2010 TYPE_LINSTOR = "linstor" 

2011 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

2012 

2013 LOCK_RETRY_INTERVAL = 3 

2014 LOCK_RETRY_ATTEMPTS = 20 

2015 LOCK_RETRY_ATTEMPTS_LOCK = 100 

2016 

2017 SCAN_RETRY_ATTEMPTS = 3 

2018 

2019 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

2020 TMP_RENAME_PREFIX = "OLD_" 

2021 

2022 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

2023 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

2024 

2025 @staticmethod 

2026 def getInstance(uuid, xapiSession, createLock=True, force=False): 

2027 xapi = XAPI(xapiSession, uuid) 

2028 type = normalizeType(xapi.srRecord["type"]) 

2029 if type == SR.TYPE_FILE: 

2030 return FileSR(uuid, xapi, createLock, force) 

2031 elif type == SR.TYPE_LVHD: 

2032 return LVMSR(uuid, xapi, createLock, force) 

2033 elif type == SR.TYPE_LINSTOR: 

2034 return LinstorSR(uuid, xapi, createLock, force) 

2035 raise util.SMException("SR type %s not recognized" % type) 

2036 

2037 def __init__(self, uuid, xapi, createLock, force): 

2038 self.logFilter = self.LogFilter(self) 

2039 self.uuid = uuid 

2040 self.path = "" 

2041 self.name = "" 

2042 self.vdis = {} 

2043 self.vdiTrees = [] 

2044 self.journaler = None 

2045 self.xapi = xapi 

2046 self._locked = 0 

2047 self._srLock = None 

2048 if createLock: 2048 ↛ 2049line 2048 didn't jump to line 2049, because the condition on line 2048 was never true

2049 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, self.uuid) 

2050 else: 

2051 Util.log("Requested no SR locking") 

2052 self.name = self.xapi.srRecord["name_label"] 

2053 self._failedCoalesceTargets = [] 

2054 

2055 if not self.xapi.isPluggedHere(): 

2056 if force: 2056 ↛ 2057line 2056 didn't jump to line 2057, because the condition on line 2056 was never true

2057 Util.log("SR %s not attached on this host, ignoring" % uuid) 

2058 else: 

2059 if not self.wait_for_plug(): 

2060 raise util.SMException("SR %s not attached on this host" % uuid) 

2061 

2062 if force: 2062 ↛ 2063line 2062 didn't jump to line 2063, because the condition on line 2062 was never true

2063 Util.log("Not checking if we are Master (SR %s)" % uuid) 

2064 elif not self.xapi.isMaster(): 2064 ↛ 2065line 2064 didn't jump to line 2065, because the condition on line 2064 was never true

2065 raise util.SMException("This host is NOT master, will not run") 

2066 

2067 self.no_space_candidates = {} 

2068 

2069 def msg_cleared(self, xapi_session, msg_ref): 

2070 try: 

2071 msg = xapi_session.xenapi.message.get_record(msg_ref) 

2072 except XenAPI.Failure: 

2073 return True 

2074 

2075 return msg is None 

2076 

2077 def check_no_space_candidates(self): 

2078 xapi_session = self.xapi.getSession() 

2079 

2080 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE) 

2081 if self.no_space_candidates: 

2082 if msg_id is None or self.msg_cleared(xapi_session, msg_id): 

2083 util.SMlog("Could not coalesce due to a lack of space " 

2084 f"in SR {self.uuid}") 

2085 msg_body = ("Unable to perform data coalesce due to a lack " 

2086 f"of space in SR {self.uuid}") 

2087 msg_id = xapi_session.xenapi.message.create( 

2088 'SM_GC_NO_SPACE', 

2089 3, 

2090 "SR", 

2091 self.uuid, 

2092 msg_body) 

2093 xapi_session.xenapi.SR.remove_from_sm_config( 

2094 self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2095 xapi_session.xenapi.SR.add_to_sm_config( 

2096 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id) 

2097 

2098 for candidate in self.no_space_candidates.values(): 

2099 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id) 

2100 elif msg_id is not None: 

2101 # Everything was coalescable, remove the message 

2102 xapi_session.xenapi.message.destroy(msg_id) 

2103 

2104 def clear_no_space_msg(self, vdi): 

2105 msg_id = None 

2106 try: 

2107 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE) 

2108 except XenAPI.Failure: 

2109 pass 

2110 

2111 self.no_space_candidates.pop(vdi.uuid, None) 

2112 if msg_id is not None: 2112 ↛ exitline 2112 didn't return from function 'clear_no_space_msg', because the condition on line 2112 was never false

2113 vdi.delConfig(VDI.DB_GC_NO_SPACE) 

2114 

2115 

2116 def wait_for_plug(self): 

2117 for _ in range(1, 10): 

2118 time.sleep(2) 

2119 if self.xapi.isPluggedHere(): 

2120 return True 

2121 return False 

2122 

2123 def gcEnabled(self, refresh=True): 

2124 if refresh: 

2125 self.xapi.srRecord = \ 

2126 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

2127 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

2128 Util.log("GC is disabled for this SR, abort") 

2129 return False 

2130 return True 

2131 

2132 def scan(self, force=False) -> None: 

2133 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

2134 update VDI objects if they already exist""" 

2135 pass 

2136 

2137 def scanLocked(self, force=False): 

2138 self.lock() 

2139 try: 

2140 self.scan(force) 

2141 finally: 

2142 self.unlock() 

2143 

2144 def getVDI(self, uuid): 

2145 return self.vdis.get(uuid) 

2146 

2147 def hasWork(self): 

2148 if len(self.findGarbage()) > 0: 

2149 return True 

2150 if self.findCoalesceable(): 

2151 return True 

2152 if self.findLeafCoalesceable(): 

2153 return True 

2154 if self.needUpdateBlockInfo(): 

2155 return True 

2156 return False 

2157 

2158 def findCoalesceable(self): 

2159 """Find a coalesceable VDI. Return a vdi that should be coalesced 

2160 (choosing one among all coalesceable candidates according to some 

2161 criteria) or None if there is no VDI that could be coalesced""" 

2162 

2163 candidates = [] 

2164 

2165 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

2166 if srSwitch == "false": 

2167 Util.log("Coalesce disabled for this SR") 

2168 return candidates 

2169 

2170 # finish any VDI for which a relink journal entry exists first 

2171 journals = self.journaler.getAll(VDI.JRN_RELINK) 

2172 for uuid in journals: 

2173 vdi = self.getVDI(uuid) 

2174 if vdi and vdi not in self._failedCoalesceTargets: 

2175 return vdi 

2176 

2177 for vdi in self.vdis.values(): 

2178 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

2179 candidates.append(vdi) 

2180 Util.log("%s is coalescable" % vdi.uuid) 

2181 

2182 self.xapi.update_task_progress("coalescable", len(candidates)) 

2183 

2184 # pick one in the tallest tree 

2185 treeHeight = dict() 

2186 for c in candidates: 

2187 height = c.getTreeRoot().getTreeHeight() 

2188 if treeHeight.get(height): 

2189 treeHeight[height].append(c) 

2190 else: 

2191 treeHeight[height] = [c] 

2192 

2193 freeSpace = self.getFreeSpace() 

2194 heights = list(treeHeight.keys()) 

2195 heights.sort(reverse=True) 

2196 for h in heights: 

2197 for c in treeHeight[h]: 

2198 spaceNeeded = c._calcExtraSpaceForCoalescing() 

2199 if spaceNeeded <= freeSpace: 

2200 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

2201 self.clear_no_space_msg(c) 

2202 return c 

2203 else: 

2204 self.no_space_candidates[c.uuid] = c 

2205 Util.log("No space to coalesce %s (free space: %d)" % \ 

2206 (c, freeSpace)) 

2207 return None 

2208 

2209 def getSwitch(self, key): 

2210 return self.xapi.srRecord["other_config"].get(key) 

2211 

2212 def forbiddenBySwitch(self, switch, condition, fail_msg): 

2213 srSwitch = self.getSwitch(switch) 

2214 ret = False 

2215 if srSwitch: 

2216 ret = srSwitch == condition 

2217 

2218 if ret: 

2219 Util.log(fail_msg) 

2220 

2221 return ret 

2222 

2223 def leafCoalesceForbidden(self): 

2224 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

2225 "false", 

2226 "Coalesce disabled for this SR") or 

2227 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

2228 VDI.LEAFCLSC_DISABLED, 

2229 "Leaf-coalesce disabled for this SR")) 

2230 

2231 def findLeafCoalesceable(self): 

2232 """Find leaf-coalesceable VDIs in each COW tree""" 

2233 

2234 candidates = [] 

2235 if self.leafCoalesceForbidden(): 

2236 return candidates 

2237 

2238 self.gatherLeafCoalesceable(candidates) 

2239 

2240 self.xapi.update_task_progress("coalescable", len(candidates)) 

2241 

2242 freeSpace = self.getFreeSpace() 

2243 for candidate in candidates: 

2244 # check the space constraints to see if leaf-coalesce is actually 

2245 # feasible for this candidate 

2246 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

2247 spaceNeededLive = spaceNeeded 

2248 if spaceNeeded > freeSpace: 

2249 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

2250 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

2251 spaceNeeded = spaceNeededLive 

2252 

2253 if spaceNeeded <= freeSpace: 

2254 Util.log("Leaf-coalesce candidate: %s" % candidate) 

2255 self.clear_no_space_msg(candidate) 

2256 return candidate 

2257 else: 

2258 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

2259 (candidate, freeSpace)) 

2260 if spaceNeededLive <= freeSpace: 

2261 Util.log("...but enough space if skip snap-coalesce") 

2262 candidate.setConfig(VDI.DB_LEAFCLSC, 

2263 VDI.LEAFCLSC_OFFLINE) 

2264 self.no_space_candidates[candidate.uuid] = candidate 

2265 

2266 return None 

2267 

2268 def gatherLeafCoalesceable(self, candidates): 

2269 for vdi in self.vdis.values(): 

2270 if not vdi.isLeafCoalesceable(): 

2271 continue 

2272 if vdi in self._failedCoalesceTargets: 

2273 continue 

2274 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2275 Util.log("Skipping reset-on-boot %s" % vdi) 

2276 continue 

2277 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2278 Util.log("Skipping allow_caching=true %s" % vdi) 

2279 continue 

2280 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2281 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2282 continue 

2283 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2284 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2285 continue 

2286 candidates.append(vdi) 

2287 

2288 def coalesce(self, vdi, dryRun=False): 

2289 """Coalesce vdi onto parent""" 

2290 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2291 if dryRun: 2291 ↛ 2292line 2291 didn't jump to line 2292, because the condition on line 2291 was never true

2292 return 

2293 

2294 try: 

2295 self._coalesce(vdi) 

2296 except util.SMException as e: 

2297 if isinstance(e, AbortException): 2297 ↛ 2298line 2297 didn't jump to line 2298, because the condition on line 2297 was never true

2298 self.cleanup() 

2299 raise 

2300 else: 

2301 self._failedCoalesceTargets.append(vdi) 

2302 Util.logException("coalesce") 

2303 Util.log("Coalesce failed, skipping") 

2304 self.cleanup() 

2305 

2306 def coalesceLeaf(self, vdi, dryRun=False): 

2307 """Leaf-coalesce vdi onto parent""" 

2308 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2309 if dryRun: 

2310 return 

2311 

2312 try: 

2313 uuid = vdi.uuid 

2314 try: 

2315 # "vdi" object will no longer be valid after this call 

2316 self._coalesceLeaf(vdi) 

2317 finally: 

2318 vdi = self.getVDI(uuid) 

2319 if vdi: 

2320 vdi.delConfig(vdi.DB_LEAFCLSC) 

2321 except AbortException: 

2322 self.cleanup() 

2323 raise 

2324 except (util.SMException, XenAPI.Failure) as e: 

2325 self._failedCoalesceTargets.append(vdi) 

2326 Util.logException("leaf-coalesce") 

2327 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2328 self.cleanup() 

2329 

2330 def garbageCollect(self, dryRun=False): 

2331 vdiList = self.findGarbage() 

2332 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2333 for vdi in vdiList: 

2334 Util.log(" %s" % vdi) 

2335 if not dryRun: 

2336 self.deleteVDIs(vdiList) 

2337 self.cleanupJournals(dryRun) 

2338 

2339 def findGarbage(self): 

2340 vdiList = [] 

2341 for vdi in self.vdiTrees: 

2342 vdiList.extend(vdi.getAllPrunable()) 

2343 return vdiList 

2344 

2345 def deleteVDIs(self, vdiList) -> None: 

2346 for vdi in vdiList: 

2347 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2348 raise AbortException("Aborting due to signal") 

2349 Util.log("Deleting unlinked VDI %s" % vdi) 

2350 self.deleteVDI(vdi) 

2351 

2352 def deleteVDI(self, vdi) -> None: 

2353 assert(len(vdi.children) == 0) 

2354 del self.vdis[vdi.uuid] 

2355 if vdi.parent: 2355 ↛ 2357line 2355 didn't jump to line 2357, because the condition on line 2355 was never false

2356 vdi.parent.children.remove(vdi) 

2357 if vdi in self.vdiTrees: 2357 ↛ 2358line 2357 didn't jump to line 2358, because the condition on line 2357 was never true

2358 self.vdiTrees.remove(vdi) 

2359 vdi.delete() 

2360 

2361 def forgetVDI(self, vdiUuid) -> None: 

2362 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2363 

2364 def pauseVDIs(self, vdiList) -> None: 

2365 paused = [] 

2366 failed = False 

2367 for vdi in vdiList: 

2368 try: 

2369 vdi.pause() 

2370 paused.append(vdi) 

2371 except: 

2372 Util.logException("pauseVDIs") 

2373 failed = True 

2374 break 

2375 

2376 if failed: 

2377 self.unpauseVDIs(paused) 

2378 raise util.SMException("Failed to pause VDIs") 

2379 

2380 def unpauseVDIs(self, vdiList): 

2381 failed = False 

2382 for vdi in vdiList: 

2383 try: 

2384 vdi.unpause() 

2385 except: 

2386 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2387 failed = True 

2388 if failed: 

2389 raise util.SMException("Failed to unpause VDIs") 

2390 

2391 def getFreeSpace(self) -> int: 

2392 return 0 

2393 

2394 def cleanup(self): 

2395 Util.log("In cleanup") 

2396 return 

2397 

2398 @override 

2399 def __str__(self) -> str: 

2400 if self.name: 

2401 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2402 else: 

2403 ret = "%s" % self.uuid 

2404 return ret 

2405 

2406 def lock(self): 

2407 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2408 signal to avoid deadlocking (trying to acquire the SR lock while the 

2409 lock is held by a process that is trying to abort us)""" 

2410 if not self._srLock: 

2411 return 

2412 

2413 if self._locked == 0: 

2414 abortFlag = IPCFlag(self.uuid) 

2415 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2416 if self._srLock.acquireNoblock(): 

2417 self._locked += 1 

2418 return 

2419 if abortFlag.test(FLAG_TYPE_ABORT): 

2420 raise AbortException("Abort requested") 

2421 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2422 raise util.SMException("Unable to acquire the SR lock") 

2423 

2424 self._locked += 1 

2425 

2426 def unlock(self): 

2427 if not self._srLock: 2427 ↛ 2429line 2427 didn't jump to line 2429, because the condition on line 2427 was never false

2428 return 

2429 assert(self._locked > 0) 

2430 self._locked -= 1 

2431 if self._locked == 0: 

2432 self._srLock.release() 

2433 

2434 def needUpdateBlockInfo(self) -> bool: 

2435 for vdi in self.vdis.values(): 

2436 if vdi.scanError or len(vdi.children) == 0: 

2437 continue 

2438 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2439 return True 

2440 return False 

2441 

2442 def updateBlockInfo(self) -> None: 

2443 for vdi in self.vdis.values(): 

2444 if vdi.scanError or len(vdi.children) == 0: 

2445 continue 

2446 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2447 vdi.updateBlockInfo() 

2448 

2449 def cleanupCoalesceJournals(self): 

2450 """Remove stale coalesce VDI indicators""" 

2451 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2452 for uuid, jval in entries.items(): 

2453 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2454 

2455 def cleanupJournals(self, dryRun=False): 

2456 """delete journal entries for non-existing VDIs""" 

2457 for t in [LVMVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2458 entries = self.journaler.getAll(t) 

2459 for uuid, jval in entries.items(): 

2460 if self.getVDI(uuid): 

2461 continue 

2462 if t == SR.JRN_CLONE: 

2463 baseUuid, clonUuid = jval.split("_") 

2464 if self.getVDI(baseUuid): 

2465 continue 

2466 Util.log(" Deleting stale '%s' journal entry for %s " 

2467 "(%s)" % (t, uuid, jval)) 

2468 if not dryRun: 

2469 self.journaler.remove(t, uuid) 

2470 

2471 def cleanupCache(self, maxAge=-1) -> int: 

2472 return 0 

2473 

2474 def _hasLeavesAttachedOn(self, vdi: VDI): 

2475 leaves = vdi.getAllLeaves() 

2476 leaves_vdi = [leaf.uuid for leaf in leaves] 

2477 return util.get_hosts_attached_on(self.xapi.session, leaves_vdi) 

2478 

2479 def _gc_running_file(self, vdi: VDI): 

2480 run_file = "gc_running_{}".format(vdi.uuid) 

2481 return os.path.join(NON_PERSISTENT_DIR, str(self.uuid), run_file) 

2482 

2483 def _create_running_file(self, vdi: VDI): 

2484 with open(self._gc_running_file(vdi), "w") as f: 

2485 f.write("1") 

2486 

2487 def _delete_running_file(self, vdi: VDI): 

2488 os.unlink(self._gc_running_file(vdi)) 

2489 

2490 def _coalesce(self, vdi: VDI): 

2491 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2491 ↛ 2494line 2491 didn't jump to line 2494, because the condition on line 2491 was never true

2492 # this means we had done the actual coalescing already and just 

2493 # need to finish relinking and/or refreshing the children 

2494 Util.log("==> Coalesce apparently already done: skipping") 

2495 else: 

2496 # JRN_COALESCE is used to check which VDI is being coalesced in 

2497 # order to decide whether to abort the coalesce. We remove the 

2498 # journal as soon as the COW coalesce step is done, because we 

2499 # don't expect the rest of the process to take long 

2500 

2501 if os.path.exists(self._gc_running_file(vdi)): 2501 ↛ 2502line 2501 didn't jump to line 2502, because the condition on line 2501 was never true

2502 util.SMlog("gc_running already exist for {}. Ignoring...".format(self.uuid)) 

2503 

2504 self._create_running_file(vdi) 

2505 

2506 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2507 host_refs = self._hasLeavesAttachedOn(vdi) 

2508 #TODO: this check of multiple host_refs should be done earlier in `is_coalesceable` to avoid stopping this late every time 

2509 if len(host_refs) > 1: 2509 ↛ 2510line 2509 didn't jump to line 2510, because the condition on line 2509 was never true

2510 util.SMlog("Not coalesceable, chain activated more than once") 

2511 raise Exception("Not coalesceable, chain activated more than once") #TODO: Use correct error 

2512 

2513 try: 

2514 if host_refs and vdi.cowutil.isCoalesceableOnRemote(): 2514 ↛ 2516line 2514 didn't jump to line 2516, because the condition on line 2514 was never true

2515 #Leaf opened on another host, we need to call online coalesce 

2516 util.SMlog("Remote coalesce for {}".format(vdi.path)) 

2517 vdi._doCoalesceOnHost(list(host_refs)[0]) 

2518 else: 

2519 util.SMlog("Offline coalesce for {}".format(vdi.path)) 

2520 vdi._doCoalesce() 

2521 except Exception as e: 

2522 util.SMlog("EXCEPTION while coalescing: {}".format(e)) 

2523 self._delete_running_file(vdi) 

2524 raise 

2525 

2526 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2527 self._delete_running_file(vdi) 

2528 

2529 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2530 

2531 # we now need to relink the children: lock the SR to prevent ops 

2532 # like SM.clone from manipulating the VDIs we'll be relinking and 

2533 # rescan the SR first in case the children changed since the last 

2534 # scan 

2535 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2536 

2537 self.lock() 

2538 try: 

2539 vdi.parent._tagChildrenForRelink() 

2540 self.scan() 

2541 vdi._relinkSkip() #TODO: We could check if the parent is already the right one before doing the relink, or we could do the relink a second time, it doesn't seem to cause issues 

2542 finally: 

2543 self.unlock() 

2544 # Reload the children to leave things consistent 

2545 vdi.parent._reloadChildren(vdi) 

2546 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2547 

2548 self.deleteVDI(vdi) 

2549 

2550 class CoalesceTracker: 

2551 GRACE_ITERATIONS = 2 

2552 MAX_ITERATIONS_NO_PROGRESS = 3 

2553 MAX_ITERATIONS = 10 

2554 MAX_INCREASE_FROM_MINIMUM = 1.2 

2555 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2556 " --> Final size {finSize}" 

2557 

2558 def __init__(self, sr): 

2559 self.itsNoProgress = 0 

2560 self.its = 0 

2561 self.minSize = float("inf") 

2562 self.history = [] 

2563 self.reason = "" 

2564 self.startSize = None 

2565 self.finishSize = None 

2566 self.sr = sr 

2567 self.grace_remaining = self.GRACE_ITERATIONS 

2568 

2569 def abortCoalesce(self, prevSize, curSize): 

2570 self.its += 1 

2571 self.history.append(self.HISTORY_STRING.format(its=self.its, 

2572 initSize=prevSize, 

2573 finSize=curSize)) 

2574 

2575 self.finishSize = curSize 

2576 

2577 if self.startSize is None: 

2578 self.startSize = prevSize 

2579 

2580 if curSize < self.minSize: 

2581 self.minSize = curSize 

2582 

2583 if prevSize < self.minSize: 

2584 self.minSize = prevSize 

2585 

2586 if self.its == 1: 

2587 # Skip evaluating conditions on first iteration 

2588 return False 

2589 

2590 if prevSize < curSize: 

2591 self.itsNoProgress += 1 

2592 Util.log("No progress, attempt:" 

2593 " {attempt}".format(attempt=self.itsNoProgress)) 

2594 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2595 else: 

2596 # We made progress 

2597 return False 

2598 

2599 if self.its > self.MAX_ITERATIONS: 

2600 max = self.MAX_ITERATIONS 

2601 self.reason = \ 

2602 "Max iterations ({max}) exceeded".format(max=max) 

2603 return True 

2604 

2605 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS: 

2606 max = self.MAX_ITERATIONS_NO_PROGRESS 

2607 self.reason = \ 

2608 "No progress made for {max} iterations".format(max=max) 

2609 return True 

2610 

2611 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2612 if curSize > maxSizeFromMin: 

2613 self.grace_remaining -= 1 

2614 if self.grace_remaining == 0: 

2615 self.reason = "Unexpected bump in size," \ 

2616 " compared to minimum achieved" 

2617 

2618 return True 

2619 

2620 return False 

2621 

2622 def printSizes(self): 

2623 Util.log("Starting size was {size}" 

2624 .format(size=self.startSize)) 

2625 Util.log("Final size was {size}" 

2626 .format(size=self.finishSize)) 

2627 Util.log("Minimum size achieved was {size}" 

2628 .format(size=self.minSize)) 

2629 

2630 def printReasoning(self): 

2631 Util.log("Aborted coalesce") 

2632 for hist in self.history: 

2633 Util.log(hist) 

2634 Util.log(self.reason) 

2635 self.printSizes() 

2636 

2637 def printSummary(self): 

2638 if self.its == 0: 

2639 return 

2640 

2641 if self.reason: 2641 ↛ 2642line 2641 didn't jump to line 2642, because the condition on line 2641 was never true

2642 Util.log("Aborted coalesce") 

2643 Util.log(self.reason) 

2644 else: 

2645 Util.log("Coalesce summary") 

2646 

2647 Util.log(f"Performed {self.its} iterations") 

2648 self.printSizes() 

2649 

2650 

2651 def _coalesceLeaf(self, vdi): 

2652 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2653 complete due to external changes, namely vdi_delete and vdi_snapshot 

2654 that alter leaf-coalescibility of vdi""" 

2655 tracker = self.CoalesceTracker(self) 

2656 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2657 prevSizePhys = vdi.getSizePhys() 

2658 if not self._snapshotCoalesce(vdi): 2658 ↛ 2659line 2658 didn't jump to line 2659, because the condition on line 2658 was never true

2659 return False 

2660 if tracker.abortCoalesce(prevSizePhys, vdi.getSizePhys()): 

2661 tracker.printReasoning() 

2662 raise util.SMException("VDI {uuid} could not be coalesced" 

2663 .format(uuid=vdi.uuid)) 

2664 tracker.printSummary() 

2665 return self._liveLeafCoalesce(vdi) 

2666 

2667 def calcStorageSpeed(self, startTime, endTime, coalescedSize): 

2668 speed = None 

2669 total_time = endTime - startTime 

2670 if total_time > 0: 

2671 speed = float(coalescedSize) / float(total_time) 

2672 return speed 

2673 

2674 def writeSpeedToFile(self, speed): 

2675 content = [] 

2676 speedFile = None 

2677 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2678 self.lock() 

2679 try: 

2680 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2681 lines = "" 

2682 if not os.path.isfile(path): 

2683 lines = str(speed) + "\n" 

2684 else: 

2685 speedFile = open(path, "r+") 

2686 content = speedFile.readlines() 

2687 content.append(str(speed) + "\n") 

2688 if len(content) > N_RUNNING_AVERAGE: 

2689 del content[0] 

2690 lines = "".join(content) 

2691 

2692 util.atomicFileWrite(path, VAR_RUN, lines) 

2693 finally: 

2694 if speedFile is not None: 

2695 speedFile.close() 

2696 Util.log("Closing file: {myfile}".format(myfile=path)) 

2697 self.unlock() 

2698 

2699 def recordStorageSpeed(self, startTime, endTime, coalescedSize): 

2700 speed = self.calcStorageSpeed(startTime, endTime, coalescedSize) 

2701 if speed is None: 

2702 return 

2703 

2704 self.writeSpeedToFile(speed) 

2705 

2706 def getStorageSpeed(self): 

2707 speedFile = None 

2708 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2709 self.lock() 

2710 try: 

2711 speed = None 

2712 if os.path.isfile(path): 

2713 speedFile = open(path) 

2714 content = speedFile.readlines() 

2715 try: 

2716 content = [float(i) for i in content] 

2717 except ValueError: 

2718 Util.log("Something bad in the speed log:{log}". 

2719 format(log=speedFile.readlines())) 

2720 return speed 

2721 

2722 if len(content): 

2723 speed = sum(content) / float(len(content)) 

2724 if speed <= 0: 2724 ↛ 2726line 2724 didn't jump to line 2726, because the condition on line 2724 was never true

2725 # Defensive, should be impossible. 

2726 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2727 format(speed=speed, uuid=self.uuid)) 

2728 speed = None 

2729 else: 

2730 Util.log("Speed file empty for SR: {uuid}". 

2731 format(uuid=self.uuid)) 

2732 else: 

2733 Util.log("Speed log missing for SR: {uuid}". 

2734 format(uuid=self.uuid)) 

2735 return speed 

2736 finally: 

2737 if not (speedFile is None): 

2738 speedFile.close() 

2739 self.unlock() 

2740 

2741 def _snapshotCoalesce(self, vdi): 

2742 # Note that because we are not holding any locks here, concurrent SM 

2743 # operations may change this tree under our feet. In particular, vdi 

2744 # can be deleted, or it can be snapshotted. 

2745 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2746 Util.log("Single-snapshotting %s" % vdi) 

2747 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2748 try: 

2749 ret = self.xapi.singleSnapshotVDI(vdi) 

2750 Util.log("Single-snapshot returned: %s" % ret) 

2751 except XenAPI.Failure as e: 

2752 if util.isInvalidVDI(e): 

2753 Util.log("The VDI appears to have been concurrently deleted") 

2754 return False 

2755 raise 

2756 self.scanLocked() 

2757 tempSnap = vdi.parent 

2758 if not tempSnap.isCoalesceable(): 

2759 Util.log("The VDI appears to have been concurrently snapshotted") 

2760 return False 

2761 Util.log("Coalescing parent %s" % tempSnap) 

2762 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2763 sizePhys = vdi.getSizePhys() 

2764 self._coalesce(tempSnap) 

2765 if not vdi.isLeafCoalesceable(): 

2766 Util.log("The VDI tree appears to have been altered since") 

2767 return False 

2768 return True 

2769 

2770 def _liveLeafCoalesce(self, vdi: VDI) -> bool: 

2771 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2772 self.lock() 

2773 try: 

2774 self.scan() 

2775 if not self.getVDI(vdi.uuid): 

2776 Util.log("The VDI appears to have been deleted meanwhile") 

2777 return False 

2778 if not vdi.isLeafCoalesceable(): 

2779 Util.log("The VDI is no longer leaf-coalesceable") 

2780 return False 

2781 

2782 uuid = vdi.uuid 

2783 vdi.pause(failfast=True) 

2784 try: 

2785 try: 

2786 # "vdi" object will no longer be valid after this call 

2787 self._create_running_file(vdi) 

2788 self._doCoalesceLeaf(vdi) 

2789 except: 

2790 Util.logException("_doCoalesceLeaf") 

2791 self._handleInterruptedCoalesceLeaf() 

2792 raise 

2793 finally: 

2794 vdi = self.getVDI(uuid) 

2795 if vdi: 

2796 vdi.ensureUnpaused() 

2797 self._delete_running_file(vdi) 

2798 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2799 if vdiOld: 

2800 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2801 self.deleteVDI(vdiOld) 

2802 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2803 finally: 

2804 self.cleanup() 

2805 self.unlock() 

2806 self.logFilter.logState() 

2807 return True 

2808 

2809 def _doCoalesceLeaf(self, vdi: VDI): 

2810 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2811 offline/atomic context""" 

2812 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2813 self._prepareCoalesceLeaf(vdi) 

2814 vdi.parent._setHidden(False) 

2815 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2816 vdi.validate(True) 

2817 vdi.parent.validate(True) 

2818 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2819 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2820 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 2820 ↛ 2821line 2820 didn't jump to line 2821, because the condition on line 2820 was never true

2821 Util.log("Leaf-coalesce forced, will not use timeout") 

2822 timeout = 0 

2823 vdi._coalesceCowImage(timeout) 

2824 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2825 vdi.parent.validate(True) 

2826 #vdi._verifyContents(timeout / 2) 

2827 

2828 # rename 

2829 vdiUuid = vdi.uuid 

2830 oldName = vdi.fileName 

2831 origParentUuid = vdi.parent.uuid 

2832 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2833 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2834 vdi.parent.rename(vdiUuid) 

2835 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2836 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2837 

2838 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2839 # garbage 

2840 

2841 # update the VDI record 

2842 if vdi.parent.vdi_type == VdiType.RAW: 2842 ↛ 2843line 2842 didn't jump to line 2843, because the condition on line 2842 was never true

2843 vdi.parent.setConfig(VDI.DB_VDI_TYPE, VdiType.RAW) 

2844 vdi.parent.delConfig(VDI.DB_VDI_BLOCKS) 

2845 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2846 

2847 self._updateNode(vdi) 

2848 

2849 # delete the obsolete leaf & inflate the parent (in that order, to 

2850 # minimize free space requirements) 

2851 parent = vdi.parent 

2852 vdi._setHidden(True) 

2853 vdi.parent.children = [] 

2854 vdi.parent = None 

2855 

2856 if parent.parent is None: 

2857 parent.delConfig(VDI.DB_VDI_PARENT) 

2858 

2859 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2860 freeSpace = self.getFreeSpace() 

2861 if freeSpace < extraSpace: 2861 ↛ 2864line 2861 didn't jump to line 2864, because the condition on line 2861 was never true

2862 # don't delete unless we need the space: deletion is time-consuming 

2863 # because it requires contacting the slaves, and we're paused here 

2864 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2865 self.deleteVDI(vdi) 

2866 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2867 

2868 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2869 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2870 

2871 self.forgetVDI(origParentUuid) 

2872 self._finishCoalesceLeaf(parent) 

2873 self._updateSlavesOnResize(parent) 

2874 

2875 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

2876 assert(VdiType.isCowImage(parent.vdi_type)) 

2877 extra = child.getSizePhys() - parent.getSizePhys() 

2878 if extra < 0: 2878 ↛ 2879line 2878 didn't jump to line 2879, because the condition on line 2878 was never true

2879 extra = 0 

2880 return extra 

2881 

2882 def _prepareCoalesceLeaf(self, vdi) -> None: 

2883 pass 

2884 

2885 def _updateNode(self, vdi) -> None: 

2886 pass 

2887 

2888 def _finishCoalesceLeaf(self, parent) -> None: 

2889 pass 

2890 

2891 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

2892 pass 

2893 

2894 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: 

2895 pass 

2896 

2897 def _updateSlavesOnResize(self, vdi) -> None: 

2898 pass 

2899 

2900 def _removeStaleVDIs(self, uuidsPresent) -> None: 

2901 for uuid in list(self.vdis.keys()): 

2902 if not uuid in uuidsPresent: 

2903 Util.log("VDI %s disappeared since last scan" % \ 

2904 self.vdis[uuid]) 

2905 del self.vdis[uuid] 

2906 

2907 def _handleInterruptedCoalesceLeaf(self) -> None: 

2908 """An interrupted leaf-coalesce operation may leave the COW tree in an 

2909 inconsistent state. If the old-leaf VDI is still present, we revert the 

2910 operation (in case the original error is persistent); otherwise we must 

2911 finish the operation""" 

2912 pass 

2913 

2914 def _buildTree(self, force): 

2915 self.vdiTrees = [] 

2916 for vdi in self.vdis.values(): 

2917 if vdi.parentUuid: 

2918 parent = self.getVDI(vdi.parentUuid) 

2919 if not parent: 

2920 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2921 self.vdiTrees.append(vdi) 

2922 continue 

2923 if force: 

2924 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2925 (vdi.parentUuid, vdi.uuid)) 

2926 self.vdiTrees.append(vdi) 

2927 continue 

2928 else: 

2929 raise util.SMException("Parent VDI %s of %s not " \ 

2930 "found" % (vdi.parentUuid, vdi.uuid)) 

2931 vdi.parent = parent 

2932 parent.children.append(vdi) 

2933 else: 

2934 self.vdiTrees.append(vdi) 

2935 

2936 

2937class FileSR(SR): 

2938 TYPE = SR.TYPE_FILE 

2939 CACHE_FILE_EXT = ".vhdcache" 

2940 # cache cleanup actions 

2941 CACHE_ACTION_KEEP = 0 

2942 CACHE_ACTION_REMOVE = 1 

2943 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2944 

2945 def __init__(self, uuid, xapi, createLock, force): 

2946 SR.__init__(self, uuid, xapi, createLock, force) 

2947 self.path = "/var/run/sr-mount/%s" % self.uuid 

2948 self.journaler = fjournaler.Journaler(self.path) 

2949 

2950 @override 

2951 def scan(self, force=False) -> None: 

2952 if not util.pathexists(self.path): 

2953 raise util.SMException("directory %s not found!" % self.uuid) 

2954 

2955 uuidsPresent: List[str] = [] 

2956 

2957 for vdi_type in VDI_COW_TYPES: 

2958 scan_result = self._scan(vdi_type, force) 

2959 for uuid, image_info in scan_result.items(): 

2960 vdi = self.getVDI(uuid) 

2961 if not vdi: 

2962 self.logFilter.logNewVDI(uuid) 

2963 vdi = FileVDI(self, uuid, vdi_type) 

2964 self.vdis[uuid] = vdi 

2965 vdi.load(image_info) 

2966 uuidsPresent.extend(scan_result.keys()) 

2967 

2968 rawList = [x for x in os.listdir(self.path) if x.endswith(VdiTypeExtension.RAW)] 

2969 for rawName in rawList: 

2970 uuid = FileVDI.extractUuid(rawName) 

2971 uuidsPresent.append(uuid) 

2972 vdi = self.getVDI(uuid) 

2973 if not vdi: 

2974 self.logFilter.logNewVDI(uuid) 

2975 vdi = FileVDI(self, uuid, VdiType.RAW) 

2976 self.vdis[uuid] = vdi 

2977 self._removeStaleVDIs(uuidsPresent) 

2978 self._buildTree(force) 

2979 self.logFilter.logState() 

2980 self._handleInterruptedCoalesceLeaf() 

2981 

2982 @override 

2983 def getFreeSpace(self) -> int: 

2984 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2985 

2986 @override 

2987 def deleteVDIs(self, vdiList) -> None: 

2988 rootDeleted = False 

2989 for vdi in vdiList: 

2990 if not vdi.parent: 

2991 rootDeleted = True 

2992 break 

2993 SR.deleteVDIs(self, vdiList) 

2994 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

2995 self.xapi.markCacheSRsDirty() 

2996 

2997 @override 

2998 def cleanupCache(self, maxAge=-1) -> int: 

2999 """Clean up IntelliCache cache files. Caches for leaf nodes are 

3000 removed when the leaf node no longer exists or its allow-caching 

3001 attribute is not set. Caches for parent nodes are removed when the 

3002 parent node no longer exists or it hasn't been used in more than 

3003 <maxAge> hours. 

3004 Return number of caches removed. 

3005 """ 

3006 numRemoved = 0 

3007 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

3008 Util.log("Found %d cache files" % len(cacheFiles)) 

3009 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

3010 for cacheFile in cacheFiles: 

3011 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

3012 action = self.CACHE_ACTION_KEEP 

3013 rec = self.xapi.getRecordVDI(uuid) 

3014 if not rec: 

3015 Util.log("Cache %s: VDI doesn't exist" % uuid) 

3016 action = self.CACHE_ACTION_REMOVE 

3017 elif rec["managed"] and not rec["allow_caching"]: 

3018 Util.log("Cache %s: caching disabled" % uuid) 

3019 action = self.CACHE_ACTION_REMOVE 

3020 elif not rec["managed"] and maxAge >= 0: 

3021 lastAccess = datetime.datetime.fromtimestamp( \ 

3022 os.path.getatime(os.path.join(self.path, cacheFile))) 

3023 if lastAccess < cutoff: 

3024 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

3025 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

3026 

3027 if action == self.CACHE_ACTION_KEEP: 

3028 Util.log("Keeping cache %s" % uuid) 

3029 continue 

3030 

3031 lockId = uuid 

3032 parentUuid = None 

3033 if rec and rec["managed"]: 

3034 parentUuid = rec["sm_config"].get("vhd-parent") 

3035 if parentUuid: 

3036 lockId = parentUuid 

3037 

3038 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

3039 cacheLock.acquire() 

3040 try: 

3041 if self._cleanupCache(uuid, action): 

3042 numRemoved += 1 

3043 finally: 

3044 cacheLock.release() 

3045 return numRemoved 

3046 

3047 def _cleanupCache(self, uuid, action): 

3048 assert(action != self.CACHE_ACTION_KEEP) 

3049 rec = self.xapi.getRecordVDI(uuid) 

3050 if rec and rec["allow_caching"]: 

3051 Util.log("Cache %s appears to have become valid" % uuid) 

3052 return False 

3053 

3054 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

3055 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

3056 if tapdisk: 

3057 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

3058 Util.log("Cache %s still in use" % uuid) 

3059 return False 

3060 Util.log("Shutting down tapdisk for %s" % fullPath) 

3061 tapdisk.shutdown() 

3062 

3063 Util.log("Deleting file %s" % fullPath) 

3064 os.unlink(fullPath) 

3065 return True 

3066 

3067 def _isCacheFileName(self, name): 

3068 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

3069 name.endswith(self.CACHE_FILE_EXT) 

3070 

3071 def _scan(self, vdi_type, force): 

3072 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3073 error = False 

3074 pattern = os.path.join(self.path, "*%s" % VDI_TYPE_TO_EXTENSION[vdi_type]) 

3075 scan_result = getCowUtil(vdi_type).getAllInfoFromVG(pattern, FileVDI.extractUuid) 

3076 for uuid, vdiInfo in scan_result.items(): 

3077 if vdiInfo.error: 

3078 error = True 

3079 break 

3080 if not error: 

3081 return scan_result 

3082 Util.log("Scan error on attempt %d" % i) 

3083 if force: 

3084 return scan_result 

3085 raise util.SMException("Scan error") 

3086 

3087 @override 

3088 def deleteVDI(self, vdi) -> None: 

3089 self._checkSlaves(vdi) 

3090 SR.deleteVDI(self, vdi) 

3091 

3092 def _checkSlaves(self, vdi): 

3093 onlineHosts = self.xapi.getOnlineHosts() 

3094 abortFlag = IPCFlag(self.uuid) 

3095 for pbdRecord in self.xapi.getAttachedPBDs(): 

3096 hostRef = pbdRecord["host"] 

3097 if hostRef == self.xapi._hostRef: 

3098 continue 

3099 if abortFlag.test(FLAG_TYPE_ABORT): 

3100 raise AbortException("Aborting due to signal") 

3101 try: 

3102 self._checkSlave(hostRef, vdi) 

3103 except util.CommandException: 

3104 if hostRef in onlineHosts: 

3105 raise 

3106 

3107 def _checkSlave(self, hostRef, vdi): 

3108 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

3109 Util.log("Checking with slave: %s" % repr(call)) 

3110 _host = self.xapi.session.xenapi.host 

3111 text = _host.call_plugin( * call) 

3112 

3113 @override 

3114 def _handleInterruptedCoalesceLeaf(self) -> None: 

3115 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3116 for uuid, parentUuid in entries.items(): 

3117 fileList = os.listdir(self.path) 

3118 childName = uuid + VdiTypeExtension.VHD 

3119 tmpChildName = self.TMP_RENAME_PREFIX + uuid + VdiTypeExtension.VHD 

3120 parentName1 = parentUuid + VdiTypeExtension.VHD 

3121 parentName2 = parentUuid + VdiTypeExtension.RAW 

3122 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

3123 if parentPresent or tmpChildName in fileList: 

3124 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3125 else: 

3126 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3127 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3128 vdi = self.getVDI(uuid) 

3129 if vdi: 

3130 vdi.ensureUnpaused() 

3131 

3132 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3133 Util.log("*** UNDO LEAF-COALESCE") 

3134 parent = self.getVDI(parentUuid) 

3135 if not parent: 

3136 parent = self.getVDI(childUuid) 

3137 if not parent: 

3138 raise util.SMException("Neither %s nor %s found" % \ 

3139 (parentUuid, childUuid)) 

3140 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3141 parent.rename(parentUuid) 

3142 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3143 

3144 child = self.getVDI(childUuid) 

3145 if not child: 

3146 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3147 if not child: 

3148 raise util.SMException("Neither %s nor %s found" % \ 

3149 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3150 Util.log("Renaming child back to %s" % childUuid) 

3151 child.rename(childUuid) 

3152 Util.log("Updating the VDI record") 

3153 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3154 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3155 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3156 

3157 if child.isHidden(): 

3158 child._setHidden(False) 

3159 if not parent.isHidden(): 

3160 parent._setHidden(True) 

3161 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3162 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3163 Util.log("*** leaf-coalesce undo successful") 

3164 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3165 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3166 

3167 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3168 Util.log("*** FINISH LEAF-COALESCE") 

3169 vdi = self.getVDI(childUuid) 

3170 if not vdi: 

3171 Util.log(f"_finishInterruptedCoalesceLeaf, vdi {childUuid} not found, aborting") 

3172 raise util.SMException("VDI %s not found" % childUuid) 

3173 try: 

3174 self.forgetVDI(parentUuid) 

3175 except XenAPI.Failure: 

3176 Util.logException('_finishInterruptedCoalesceLeaf') 

3177 pass 

3178 self._updateSlavesOnResize(vdi) 

3179 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3180 Util.log("*** finished leaf-coalesce successfully") 

3181 

3182 

3183class LVMSR(SR): 

3184 TYPE = SR.TYPE_LVHD 

3185 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

3186 

3187 def __init__(self, uuid, xapi, createLock, force): 

3188 SR.__init__(self, uuid, xapi, createLock, force) 

3189 self.vgName = "%s%s" % (VG_PREFIX, self.uuid) 

3190 self.path = os.path.join(VG_LOCATION, self.vgName) 

3191 

3192 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) 

3193 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) 

3194 lvm_conf = other_conf.get('lvm-conf') if other_conf else None 

3195 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) 

3196 

3197 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

3198 self.journaler = journaler.Journaler(self.lvmCache) 

3199 

3200 @override 

3201 def deleteVDI(self, vdi) -> None: 

3202 if self.lvActivator.get(vdi.uuid, False): 

3203 self.lvActivator.deactivate(vdi.uuid, False) 

3204 self._checkSlaves(vdi) 

3205 SR.deleteVDI(self, vdi) 

3206 

3207 @override 

3208 def forgetVDI(self, vdiUuid) -> None: 

3209 SR.forgetVDI(self, vdiUuid) 

3210 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

3211 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

3212 

3213 @override 

3214 def getFreeSpace(self) -> int: 

3215 stats = lvutil._getVGstats(self.vgName) 

3216 return stats['physical_size'] - stats['physical_utilisation'] 

3217 

3218 @override 

3219 def cleanup(self): 

3220 if not self.lvActivator.deactivateAll(): 

3221 Util.log("ERROR deactivating LVs while cleaning up") 

3222 

3223 @override 

3224 def needUpdateBlockInfo(self) -> bool: 

3225 for vdi in self.vdis.values(): 

3226 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3227 continue 

3228 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3229 return True 

3230 return False 

3231 

3232 @override 

3233 def updateBlockInfo(self) -> None: 

3234 numUpdated = 0 

3235 for vdi in self.vdis.values(): 

3236 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3237 continue 

3238 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3239 vdi.updateBlockInfo() 

3240 numUpdated += 1 

3241 if numUpdated: 

3242 # deactivate the LVs back sooner rather than later. If we don't 

3243 # now, by the time this thread gets to deactivations, another one 

3244 # might have leaf-coalesced a node and deleted it, making the child 

3245 # inherit the refcount value and preventing the correct decrement 

3246 self.cleanup() 

3247 

3248 @override 

3249 def scan(self, force=False) -> None: 

3250 vdis = self._scan(force) 

3251 for uuid, vdiInfo in vdis.items(): 

3252 vdi = self.getVDI(uuid) 

3253 if not vdi: 

3254 self.logFilter.logNewVDI(uuid) 

3255 vdi = LVMVDI(self, uuid, vdiInfo.vdiType) 

3256 self.vdis[uuid] = vdi 

3257 vdi.load(vdiInfo) 

3258 self._removeStaleVDIs(vdis.keys()) 

3259 self._buildTree(force) 

3260 self.logFilter.logState() 

3261 self._handleInterruptedCoalesceLeaf() 

3262 

3263 def _scan(self, force): 

3264 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3265 error = False 

3266 self.lvmCache.refresh() 

3267 vdis = LvmCowUtil.getVDIInfo(self.lvmCache) 

3268 for uuid, vdiInfo in vdis.items(): 

3269 if vdiInfo.scanError: 

3270 error = True 

3271 break 

3272 if not error: 

3273 return vdis 

3274 Util.log("Scan error, retrying (%d)" % i) 

3275 if force: 

3276 return vdis 

3277 raise util.SMException("Scan error") 

3278 

3279 @override 

3280 def _removeStaleVDIs(self, uuidsPresent) -> None: 

3281 for uuid in list(self.vdis.keys()): 

3282 if not uuid in uuidsPresent: 

3283 Util.log("VDI %s disappeared since last scan" % \ 

3284 self.vdis[uuid]) 

3285 del self.vdis[uuid] 

3286 if self.lvActivator.get(uuid, False): 

3287 self.lvActivator.remove(uuid, False) 

3288 

3289 @override 

3290 def _liveLeafCoalesce(self, vdi) -> bool: 

3291 """If the parent is raw and the child was resized (virt. size), then 

3292 we'll need to resize the parent, which can take a while due to zeroing 

3293 out of the extended portion of the LV. Do it before pausing the child 

3294 to avoid a protracted downtime""" 

3295 if not VdiType.isCowImage(vdi.parent.vdi_type) and vdi.sizeVirt > vdi.parent.sizeVirt: 

3296 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3297 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

3298 

3299 return SR._liveLeafCoalesce(self, vdi) 

3300 

3301 @override 

3302 def _prepareCoalesceLeaf(self, vdi) -> None: 

3303 vdi._activateChain() 

3304 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3305 vdi.deflate() 

3306 vdi.inflateParentForCoalesce() 

3307 

3308 @override 

3309 def _updateNode(self, vdi) -> None: 

3310 # fix the refcounts: the remaining node should inherit the binary 

3311 # refcount from the leaf (because if it was online, it should remain 

3312 # refcounted as such), but the normal refcount from the parent (because 

3313 # this node is really the parent node) - minus 1 if it is online (since 

3314 # non-leaf nodes increment their normal counts when they are online and 

3315 # we are now a leaf, storing that 1 in the binary refcount). 

3316 ns = NS_PREFIX_LVM + self.uuid 

3317 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

3318 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

3319 pCnt = pCnt - cBcnt 

3320 assert(pCnt >= 0) 

3321 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

3322 

3323 @override 

3324 def _finishCoalesceLeaf(self, parent) -> None: 

3325 if not parent.isSnapshot() or parent.isAttachedRW(): 

3326 parent.inflateFully() 

3327 else: 

3328 parent.deflate() 

3329 

3330 @override 

3331 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3332 return parent.lvmcowutil.calcVolumeSize(parent.sizeVirt) - parent.sizeLV 

3333 

3334 @override 

3335 def _handleInterruptedCoalesceLeaf(self) -> None: 

3336 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3337 for uuid, parentUuid in entries.items(): 

3338 undo = False 

3339 for prefix in LV_PREFIX.values(): 

3340 parentLV = prefix + parentUuid 

3341 undo = self.lvmCache.checkLV(parentLV) 

3342 if undo: 

3343 break 

3344 

3345 if not undo: 

3346 for prefix in LV_PREFIX.values(): 

3347 tmpChildLV = prefix + uuid 

3348 undo = self.lvmCache.checkLV(tmpChildLV) 

3349 if undo: 

3350 break 

3351 

3352 if undo: 

3353 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3354 else: 

3355 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3356 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3357 vdi = self.getVDI(uuid) 

3358 if vdi: 

3359 vdi.ensureUnpaused() 

3360 

3361 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3362 Util.log("*** UNDO LEAF-COALESCE") 

3363 parent = self.getVDI(parentUuid) 

3364 if not parent: 

3365 parent = self.getVDI(childUuid) 

3366 if not parent: 

3367 raise util.SMException("Neither %s nor %s found" % \ 

3368 (parentUuid, childUuid)) 

3369 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3370 parent.rename(parentUuid) 

3371 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3372 

3373 child = self.getVDI(childUuid) 

3374 if not child: 

3375 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3376 if not child: 

3377 raise util.SMException("Neither %s nor %s found" % \ 

3378 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3379 Util.log("Renaming child back to %s" % childUuid) 

3380 child.rename(childUuid) 

3381 Util.log("Updating the VDI record") 

3382 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3383 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3384 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3385 

3386 # refcount (best effort - assume that it had succeeded if the 

3387 # second rename succeeded; if not, this adjustment will be wrong, 

3388 # leading to a non-deactivation of the LV) 

3389 ns = NS_PREFIX_LVM + self.uuid 

3390 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3391 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3392 pCnt = pCnt + cBcnt 

3393 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3394 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3395 

3396 parent.deflate() 

3397 child.inflateFully() 

3398 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3399 if child.isHidden(): 

3400 child._setHidden(False) 

3401 if not parent.isHidden(): 

3402 parent._setHidden(True) 

3403 if not parent.lvReadonly: 

3404 self.lvmCache.setReadonly(parent.fileName, True) 

3405 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3406 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3407 Util.log("*** leaf-coalesce undo successful") 

3408 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3409 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3410 

3411 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3412 Util.log("*** FINISH LEAF-COALESCE") 

3413 vdi = self.getVDI(childUuid) 

3414 if not vdi: 

3415 raise util.SMException("VDI %s not found" % childUuid) 

3416 vdi.inflateFully() 

3417 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3418 try: 

3419 self.forgetVDI(parentUuid) 

3420 except XenAPI.Failure: 

3421 pass 

3422 self._updateSlavesOnResize(vdi) 

3423 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3424 Util.log("*** finished leaf-coalesce successfully") 

3425 

3426 def _checkSlaves(self, vdi): 

3427 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3428 try to check all slaves, including those that the Agent believes are 

3429 offline, but ignore failures for offline hosts. This is to avoid cases 

3430 where the Agent thinks a host is offline but the host is up.""" 

3431 args = {"vgName": self.vgName, 

3432 "action1": "deactivateNoRefcount", 

3433 "lvName1": vdi.fileName, 

3434 "action2": "cleanupLockAndRefcount", 

3435 "uuid2": vdi.uuid, 

3436 "ns2": NS_PREFIX_LVM + self.uuid} 

3437 onlineHosts = self.xapi.getOnlineHosts() 

3438 abortFlag = IPCFlag(self.uuid) 

3439 for pbdRecord in self.xapi.getAttachedPBDs(): 

3440 hostRef = pbdRecord["host"] 

3441 if hostRef == self.xapi._hostRef: 

3442 continue 

3443 if abortFlag.test(FLAG_TYPE_ABORT): 

3444 raise AbortException("Aborting due to signal") 

3445 Util.log("Checking with slave %s (path %s)" % ( 

3446 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3447 try: 

3448 self.xapi.ensureInactive(hostRef, args) 

3449 except XenAPI.Failure: 

3450 if hostRef in onlineHosts: 

3451 raise 

3452 

3453 @override 

3454 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

3455 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3456 if not slaves: 

3457 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3458 child) 

3459 return 

3460 

3461 tmpName = child.vdi_type + self.TMP_RENAME_PREFIX + child.uuid 

3462 args = {"vgName": self.vgName, 

3463 "action1": "deactivateNoRefcount", 

3464 "lvName1": tmpName, 

3465 "action2": "deactivateNoRefcount", 

3466 "lvName2": child.fileName, 

3467 "action3": "refresh", 

3468 "lvName3": child.fileName, 

3469 "action4": "refresh", 

3470 "lvName4": parent.fileName} 

3471 for slave in slaves: 

3472 Util.log("Updating %s, %s, %s on slave %s" % \ 

3473 (tmpName, child.fileName, parent.fileName, 

3474 self.xapi.getRecordHost(slave)['hostname'])) 

3475 text = self.xapi.session.xenapi.host.call_plugin( \ 

3476 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3477 Util.log("call-plugin returned: '%s'" % text) 

3478 

3479 @override 

3480 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: 

3481 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3482 if not slaves: 

3483 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3484 return 

3485 

3486 args = {"vgName": self.vgName, 

3487 "action1": "deactivateNoRefcount", 

3488 "lvName1": oldNameLV, 

3489 "action2": "refresh", 

3490 "lvName2": vdi.fileName, 

3491 "action3": "cleanupLockAndRefcount", 

3492 "uuid3": origParentUuid, 

3493 "ns3": NS_PREFIX_LVM + self.uuid} 

3494 for slave in slaves: 

3495 Util.log("Updating %s to %s on slave %s" % \ 

3496 (oldNameLV, vdi.fileName, 

3497 self.xapi.getRecordHost(slave)['hostname'])) 

3498 text = self.xapi.session.xenapi.host.call_plugin( \ 

3499 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3500 Util.log("call-plugin returned: '%s'" % text) 

3501 

3502 @override 

3503 def _updateSlavesOnResize(self, vdi) -> None: 

3504 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3505 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3506 if not slaves: 

3507 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3508 return 

3509 LvmCowUtil.refreshVolumeOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3510 vdi.fileName, vdi.uuid, slaves) 

3511 

3512 

3513class LinstorSR(SR): 

3514 TYPE = SR.TYPE_LINSTOR 

3515 

3516 def __init__(self, uuid, xapi, createLock, force): 

3517 if not LINSTOR_AVAILABLE: 

3518 raise util.SMException( 

3519 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3520 ) 

3521 

3522 SR.__init__(self, uuid, xapi, createLock, force) 

3523 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3524 

3525 class LinstorProxy: 

3526 def __init__(self, sr: LinstorSR) -> None: 

3527 self.sr = sr 

3528 

3529 def __getattr__(self, attr: str) -> Any: 

3530 assert self.sr, "Cannot use `LinstorProxy` without valid `LinstorVolumeManager` instance" 

3531 return getattr(self.sr._linstor, attr) 

3532 

3533 self._linstor_proxy = LinstorProxy(self) 

3534 self._reloadLinstor(journaler_only=True) 

3535 

3536 @override 

3537 def deleteVDI(self, vdi) -> None: 

3538 self._checkSlaves(vdi) 

3539 SR.deleteVDI(self, vdi) 

3540 

3541 @override 

3542 def getFreeSpace(self) -> int: 

3543 return self._linstor.max_volume_size_allowed 

3544 

3545 @override 

3546 def scan(self, force=False) -> None: 

3547 all_vdi_info = self._scan(force) 

3548 for uuid, vdiInfo in all_vdi_info.items(): 

3549 # When vdiInfo is None, the VDI is RAW. 

3550 vdi = self.getVDI(uuid) 

3551 if not vdi: 

3552 self.logFilter.logNewVDI(uuid) 

3553 vdi = LinstorVDI(self, uuid, vdiInfo.vdiType if vdiInfo else VdiType.RAW) 

3554 self.vdis[uuid] = vdi 

3555 if vdiInfo: 

3556 vdi.load(vdiInfo) 

3557 self._removeStaleVDIs(all_vdi_info.keys()) 

3558 self._buildTree(force) 

3559 self.logFilter.logState() 

3560 self._handleInterruptedCoalesceLeaf() 

3561 

3562 @override 

3563 def pauseVDIs(self, vdiList) -> None: 

3564 self._linstor.ensure_volume_list_is_not_locked( 

3565 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3566 ) 

3567 return super(LinstorSR, self).pauseVDIs(vdiList) 

3568 

3569 def _reloadLinstor(self, journaler_only=False): 

3570 session = self.xapi.session 

3571 host_ref = util.get_this_host_ref(session) 

3572 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3573 

3574 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3575 if pbd is None: 

3576 raise util.SMException('Failed to find PBD') 

3577 

3578 dconf = session.xenapi.PBD.get_device_config(pbd) 

3579 group_name = dconf['group-name'] 

3580 

3581 controller_uri = get_controller_uri() 

3582 self.journaler = LinstorJournaler( 

3583 controller_uri, group_name, logger=util.SMlog 

3584 ) 

3585 

3586 if journaler_only: 

3587 return 

3588 

3589 self._linstor = LinstorVolumeManager( 

3590 controller_uri, 

3591 group_name, 

3592 repair=True, 

3593 logger=util.SMlog 

3594 ) 

3595 

3596 def _scan(self, force): 

3597 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3598 self._reloadLinstor() 

3599 error = False 

3600 try: 

3601 all_vdi_info = self._load_vdi_info() 

3602 for uuid, vdiInfo in all_vdi_info.items(): 

3603 if vdiInfo and vdiInfo.error: 

3604 error = True 

3605 break 

3606 if not error: 

3607 return all_vdi_info 

3608 Util.log('Scan error, retrying ({})'.format(i)) 

3609 except Exception as e: 

3610 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3611 Util.log(traceback.format_exc()) 

3612 

3613 if force: 

3614 return all_vdi_info 

3615 raise util.SMException('Scan error') 

3616 

3617 def _load_vdi_info(self): 

3618 all_vdi_info = {} 

3619 

3620 # TODO: Ensure metadata contains the right info. 

3621 

3622 all_volume_info = self._linstor.get_volumes_with_info() 

3623 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3624 for vdi_uuid, volume_info in all_volume_info.items(): 

3625 vdi_type = VdiType.RAW 

3626 try: 

3627 volume_metadata = volumes_metadata[vdi_uuid] 

3628 if not volume_info.name and not list(volume_metadata.items()): 

3629 continue # Ignore it, probably deleted. 

3630 

3631 if vdi_uuid.startswith('DELETED_'): 

3632 # Assume it's really a RAW volume of a failed snap without COW header/footer. 

3633 # We must remove this VDI now without adding it in the VDI list. 

3634 # Otherwise `Relinking` calls and other actions can be launched on it. 

3635 # We don't want that... 

3636 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3637 

3638 self.lock() 

3639 try: 

3640 self._linstor.destroy_volume(vdi_uuid) 

3641 try: 

3642 self.forgetVDI(vdi_uuid) 

3643 except: 

3644 pass 

3645 except Exception as e: 

3646 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3647 finally: 

3648 self.unlock() 

3649 continue 

3650 

3651 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3652 volume_name = self._linstor.get_volume_name(vdi_uuid) 

3653 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): 

3654 # Always RAW! 

3655 info = None 

3656 elif VdiType.isCowImage(vdi_type): 

3657 info = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type).get_info(vdi_uuid) 

3658 else: 

3659 # Ensure it's not a COW image... 

3660 linstorcowutil = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type) 

3661 try: 

3662 info = linstorcowutil.get_info(vdi_uuid) 

3663 except: 

3664 try: 

3665 linstorcowutil.force_repair( 

3666 self._linstor.get_device_path(vdi_uuid) 

3667 ) 

3668 info = linstorcowutil.get_info(vdi_uuid) 

3669 except: 

3670 info = None 

3671 

3672 except Exception as e: 

3673 Util.log( 

3674 ' [VDI {}: failed to load VDI info]: {}' 

3675 .format(vdi_uuid, e) 

3676 ) 

3677 info = CowImageInfo(vdi_uuid) 

3678 info.error = 1 

3679 

3680 if info: 

3681 info.vdiType = vdi_type 

3682 

3683 all_vdi_info[vdi_uuid] = info 

3684 

3685 return all_vdi_info 

3686 

3687 @override 

3688 def _prepareCoalesceLeaf(self, vdi) -> None: 

3689 vdi._activateChain() 

3690 vdi.deflate() 

3691 vdi._inflateParentForCoalesce() 

3692 

3693 @override 

3694 def _finishCoalesceLeaf(self, parent) -> None: 

3695 if not parent.isSnapshot() or parent.isAttachedRW(): 

3696 parent.inflateFully() 

3697 else: 

3698 parent.deflate() 

3699 

3700 @override 

3701 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3702 return LinstorCowUtil( 

3703 self.xapi.session, self._linstor, parent.vdi_type 

3704 ).compute_volume_size(parent.sizeVirt) - parent.getDrbdSize() 

3705 

3706 def _hasValidDevicePath(self, uuid): 

3707 try: 

3708 self._linstor.get_device_path(uuid) 

3709 except Exception: 

3710 # TODO: Maybe log exception. 

3711 return False 

3712 return True 

3713 

3714 @override 

3715 def _liveLeafCoalesce(self, vdi) -> bool: 

3716 self.lock() 

3717 try: 

3718 self._linstor.ensure_volume_is_not_locked( 

3719 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3720 ) 

3721 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3722 finally: 

3723 self.unlock() 

3724 

3725 @override 

3726 def _handleInterruptedCoalesceLeaf(self) -> None: 

3727 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3728 for uuid, parentUuid in entries.items(): 

3729 if self._hasValidDevicePath(parentUuid) or \ 

3730 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3731 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3732 else: 

3733 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3734 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3735 vdi = self.getVDI(uuid) 

3736 if vdi: 

3737 vdi.ensureUnpaused() 

3738 

3739 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3740 Util.log('*** UNDO LEAF-COALESCE') 

3741 parent = self.getVDI(parentUuid) 

3742 if not parent: 

3743 parent = self.getVDI(childUuid) 

3744 if not parent: 

3745 raise util.SMException( 

3746 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3747 ) 

3748 Util.log( 

3749 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3750 ) 

3751 parent.rename(parentUuid) 

3752 

3753 child = self.getVDI(childUuid) 

3754 if not child: 

3755 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3756 if not child: 

3757 raise util.SMException( 

3758 'Neither {} nor {} found'.format( 

3759 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3760 ) 

3761 ) 

3762 Util.log('Renaming child back to {}'.format(childUuid)) 

3763 child.rename(childUuid) 

3764 Util.log('Updating the VDI record') 

3765 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3766 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3767 

3768 # TODO: Maybe deflate here. 

3769 

3770 if child.isHidden(): 

3771 child._setHidden(False) 

3772 if not parent.isHidden(): 

3773 parent._setHidden(True) 

3774 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3775 Util.log('*** leaf-coalesce undo successful') 

3776 

3777 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3778 Util.log('*** FINISH LEAF-COALESCE') 

3779 vdi = self.getVDI(childUuid) 

3780 if not vdi: 

3781 raise util.SMException('VDI {} not found'.format(childUuid)) 

3782 # TODO: Maybe inflate. 

3783 try: 

3784 self.forgetVDI(parentUuid) 

3785 except XenAPI.Failure: 

3786 pass 

3787 self._updateSlavesOnResize(vdi) 

3788 Util.log('*** finished leaf-coalesce successfully') 

3789 

3790 def _checkSlaves(self, vdi): 

3791 try: 

3792 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3793 for openers in all_openers.values(): 

3794 for opener in openers.values(): 

3795 if opener['process-name'] != 'tapdisk': 

3796 raise util.SMException( 

3797 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3798 ) 

3799 except LinstorVolumeManagerError as e: 

3800 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3801 raise 

3802 

3803 

3804################################################################################ 

3805# 

3806# Helpers 

3807# 

3808def daemonize(): 

3809 pid = os.fork() 

3810 if pid: 

3811 os.waitpid(pid, 0) 

3812 Util.log("New PID [%d]" % pid) 

3813 return False 

3814 os.chdir("/") 

3815 os.setsid() 

3816 pid = os.fork() 

3817 if pid: 

3818 Util.log("Will finish as PID [%d]" % pid) 

3819 os._exit(0) 

3820 for fd in [0, 1, 2]: 

3821 try: 

3822 os.close(fd) 

3823 except OSError: 

3824 pass 

3825 # we need to fill those special fd numbers or pread won't work 

3826 sys.stdin = open("/dev/null", 'r') 

3827 sys.stderr = open("/dev/null", 'w') 

3828 sys.stdout = open("/dev/null", 'w') 

3829 # As we're a new process we need to clear the lock objects 

3830 lock.Lock.clearAll() 

3831 return True 

3832 

3833 

3834def normalizeType(type): 

3835 if type in LVMSR.SUBTYPES: 

3836 type = SR.TYPE_LVHD 

3837 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3838 # temporary while LVHD is symlinked as LVM 

3839 type = SR.TYPE_LVHD 

3840 if type in [ 

3841 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3842 "moosefs", "xfs", "zfs", "largeblock" 

3843 ]: 

3844 type = SR.TYPE_FILE 

3845 if type in ["linstor"]: 

3846 type = SR.TYPE_LINSTOR 

3847 if type not in SR.TYPES: 

3848 raise util.SMException("Unsupported SR type: %s" % type) 

3849 return type 

3850 

3851GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3852 

3853 

3854def _gc_init_file(sr_uuid): 

3855 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3856 

3857 

3858def _create_init_file(sr_uuid): 

3859 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3860 with open(os.path.join(_gc_init_file(sr_uuid)), 'w+') as f: 

3861 f.write('1') 

3862 

3863 

3864def _gcLoopPause(sr, dryRun=False, immediate=False): 

3865 if immediate: 

3866 return 

3867 

3868 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3869 # point will just return. Otherwise, fall back on an abortable sleep. 

3870 

3871 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3872 

3873 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3873 ↛ exitline 3873 didn't jump to the function exit

3874 lambda *args: None) 

3875 elif os.path.exists(_gc_init_file(sr.uuid)): 

3876 def abortTest(): 

3877 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3878 

3879 # If time.sleep hangs we are in deep trouble, however for 

3880 # completeness we set the timeout of the abort thread to 

3881 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3882 Util.log("GC active, about to go quiet") 

3883 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3883 ↛ exitline 3883 didn't run the lambda on line 3883

3884 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3885 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3886 Util.log("GC active, quiet period ended") 

3887 

3888 

3889def _gcLoop(sr, dryRun=False, immediate=False): 

3890 if not lockGCActive.acquireNoblock(): 3890 ↛ 3891line 3890 didn't jump to line 3891, because the condition on line 3890 was never true

3891 Util.log("Another GC instance already active, exiting") 

3892 return 

3893 

3894 # Check we're still attached after acquiring locks 

3895 if not sr.xapi.isPluggedHere(): 

3896 Util.log("SR no longer attached, exiting") 

3897 return 

3898 

3899 # Clean up Intellicache files 

3900 sr.cleanupCache() 

3901 

3902 # Track how many we do 

3903 coalesced = 0 

3904 task_status = "success" 

3905 try: 

3906 # Check if any work needs to be done 

3907 if not sr.xapi.isPluggedHere(): 3907 ↛ 3908line 3907 didn't jump to line 3908, because the condition on line 3907 was never true

3908 Util.log("SR no longer attached, exiting") 

3909 return 

3910 sr.scanLocked() 

3911 if not sr.hasWork(): 

3912 Util.log("No work, exiting") 

3913 return 

3914 sr.xapi.create_task( 

3915 "Garbage Collection", 

3916 "Garbage collection for SR %s" % sr.uuid) 

3917 _gcLoopPause(sr, dryRun, immediate=immediate) 

3918 while True: 

3919 if SIGTERM: 

3920 Util.log("Term requested") 

3921 return 

3922 

3923 if not sr.xapi.isPluggedHere(): 3923 ↛ 3924line 3923 didn't jump to line 3924, because the condition on line 3923 was never true

3924 Util.log("SR no longer attached, exiting") 

3925 break 

3926 sr.scanLocked() 

3927 if not sr.hasWork(): 

3928 Util.log("No work, exiting") 

3929 break 

3930 

3931 if not lockGCRunning.acquireNoblock(): 3931 ↛ 3932line 3931 didn't jump to line 3932, because the condition on line 3931 was never true

3932 Util.log("Unable to acquire GC running lock.") 

3933 return 

3934 try: 

3935 if not sr.gcEnabled(): 3935 ↛ 3936line 3935 didn't jump to line 3936, because the condition on line 3935 was never true

3936 break 

3937 

3938 sr.xapi.update_task_progress("done", coalesced) 

3939 

3940 sr.cleanupCoalesceJournals() 

3941 # Create the init file here in case startup is waiting on it 

3942 _create_init_file(sr.uuid) 

3943 sr.scanLocked() 

3944 sr.updateBlockInfo() 

3945 

3946 howmany = len(sr.findGarbage()) 

3947 if howmany > 0: 

3948 Util.log("Found %d orphaned vdis" % howmany) 

3949 sr.lock() 

3950 try: 

3951 sr.garbageCollect(dryRun) 

3952 finally: 

3953 sr.unlock() 

3954 sr.xapi.srUpdate() 

3955 

3956 candidate = sr.findCoalesceable() 

3957 if candidate: 

3958 util.fistpoint.activate( 

3959 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3960 sr.coalesce(candidate, dryRun) 

3961 sr.xapi.srUpdate() 

3962 coalesced += 1 

3963 continue 

3964 

3965 candidate = sr.findLeafCoalesceable() 

3966 if candidate: 3966 ↛ 3973line 3966 didn't jump to line 3973, because the condition on line 3966 was never false

3967 sr.coalesceLeaf(candidate, dryRun) 

3968 sr.xapi.srUpdate() 

3969 coalesced += 1 

3970 continue 

3971 

3972 finally: 

3973 lockGCRunning.release() 3973 ↛ 3978line 3973 didn't jump to line 3978, because the break on line 3936 wasn't executed

3974 except: 

3975 task_status = "failure" 

3976 raise 

3977 finally: 

3978 sr.xapi.set_task_status(task_status) 

3979 Util.log("GC process exiting, no work left") 

3980 _create_init_file(sr.uuid) 

3981 lockGCActive.release() 

3982 

3983 

3984def _gc(session, srUuid, dryRun=False, immediate=False): 

3985 init(srUuid) 

3986 sr = SR.getInstance(srUuid, session) 

3987 if not sr.gcEnabled(False): 3987 ↛ 3988line 3987 didn't jump to line 3988, because the condition on line 3987 was never true

3988 return 

3989 

3990 try: 

3991 _gcLoop(sr, dryRun, immediate=immediate) 

3992 finally: 

3993 sr.check_no_space_candidates() 

3994 sr.cleanup() 

3995 sr.logFilter.logState() 

3996 del sr.xapi 

3997 

3998 

3999def _abort(srUuid, soft=False): 

4000 """Aborts an GC/coalesce. 

4001 

4002 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

4003 soft: If set to True and there is a pending abort signal, the function 

4004 doesn't do anything. If set to False, a new abort signal is issued. 

4005 

4006 returns: If soft is set to False, we return True holding lockGCActive. If 

4007 soft is set to False and an abort signal is pending, we return False 

4008 without holding lockGCActive. An exception is raised in case of error.""" 

4009 Util.log("=== SR %s: abort ===" % (srUuid)) 

4010 init(srUuid) 

4011 if not lockGCActive.acquireNoblock(): 

4012 gotLock = False 

4013 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

4014 abortFlag = IPCFlag(srUuid) 

4015 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

4016 return False 

4017 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

4018 gotLock = lockGCActive.acquireNoblock() 

4019 if gotLock: 

4020 break 

4021 time.sleep(SR.LOCK_RETRY_INTERVAL) 

4022 abortFlag.clear(FLAG_TYPE_ABORT) 

4023 if not gotLock: 

4024 raise util.CommandException(code=errno.ETIMEDOUT, 

4025 reason="SR %s: error aborting existing process" % srUuid) 

4026 return True 

4027 

4028 

4029def init(srUuid): 

4030 global lockGCRunning 

4031 if not lockGCRunning: 4031 ↛ 4032line 4031 didn't jump to line 4032, because the condition on line 4031 was never true

4032 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid) 

4033 global lockGCActive 

4034 if not lockGCActive: 4034 ↛ 4035line 4034 didn't jump to line 4035, because the condition on line 4034 was never true

4035 lockGCActive = LockActive(srUuid) 

4036 

4037 

4038class LockActive: 

4039 """ 

4040 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

4041 if another process holds the SR lock. 

4042 """ 

4043 def __init__(self, srUuid): 

4044 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

4045 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, srUuid) 

4046 

4047 def acquireNoblock(self): 

4048 self._srLock.acquire() 

4049 

4050 try: 

4051 return self._lock.acquireNoblock() 

4052 finally: 

4053 self._srLock.release() 

4054 

4055 def release(self): 

4056 self._lock.release() 

4057 

4058 

4059def usage(): 

4060 output = """Garbage collect and/or coalesce COW images in a COW-based SR 

4061 

4062Parameters: 

4063 -u --uuid UUID SR UUID 

4064 and one of: 

4065 -g --gc garbage collect, coalesce, and repeat while there is work 

4066 -G --gc_force garbage collect once, aborting any current operations 

4067 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

4068 max_age hours 

4069 -a --abort abort any currently running operation (GC or coalesce) 

4070 -q --query query the current state (GC'ing, coalescing or not running) 

4071 -x --disable disable GC/coalesce (will be in effect until you exit) 

4072 -t --debug see Debug below 

4073 

4074Options: 

4075 -b --background run in background (return immediately) (valid for -g only) 

4076 -f --force continue in the presence of COW images with errors (when doing 

4077 GC, this might cause removal of any such images) (only valid 

4078 for -G) (DANGEROUS) 

4079 

4080Debug: 

4081 The --debug parameter enables manipulation of LVHD VDIs for debugging 

4082 purposes. ** NEVER USE IT ON A LIVE VM ** 

4083 The following parameters are required: 

4084 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

4085 "deflate". 

4086 -v --vdi_uuid VDI UUID 

4087 """ 

4088 #-d --dry-run don't actually perform any SR-modifying operations 

4089 print(output) 

4090 Util.log("(Invalid usage)") 

4091 sys.exit(1) 

4092 

4093 

4094############################################################################## 

4095# 

4096# API 

4097# 

4098def abort(srUuid, soft=False): 

4099 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

4100 """ 

4101 if _abort(srUuid, soft): 

4102 Util.log("abort: releasing the process lock") 

4103 lockGCActive.release() 

4104 return True 

4105 else: 

4106 return False 

4107 

4108 

4109def gc(session, srUuid, inBackground, dryRun=False): 

4110 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

4111 immediately if inBackground=True. 

4112 

4113 The following algorithm is used: 

4114 1. If we are already GC'ing in this SR, return 

4115 2. If we are already coalescing a VDI pair: 

4116 a. Scan the SR and determine if the VDI pair is GC'able 

4117 b. If the pair is not GC'able, return 

4118 c. If the pair is GC'able, abort coalesce 

4119 3. Scan the SR 

4120 4. If there is nothing to collect, nor to coalesce, return 

4121 5. If there is something to collect, GC all, then goto 3 

4122 6. If there is something to coalesce, coalesce one pair, then goto 3 

4123 """ 

4124 Util.log("=== SR %s: gc ===" % srUuid) 

4125 

4126 signal.signal(signal.SIGTERM, receiveSignal) 

4127 

4128 if inBackground: 

4129 if daemonize(): 4129 ↛ exitline 4129 didn't return from function 'gc', because the condition on line 4129 was never false

4130 # we are now running in the background. Catch & log any errors 

4131 # because there is no other way to propagate them back at this 

4132 # point 

4133 

4134 try: 

4135 _gc(None, srUuid, dryRun) 

4136 except AbortException: 

4137 Util.log("Aborted") 

4138 except Exception: 

4139 Util.logException("gc") 

4140 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

4141 os._exit(0) 

4142 else: 

4143 _gc(session, srUuid, dryRun, immediate=True) 

4144 

4145 

4146def start_gc(session, sr_uuid): 

4147 """ 

4148 This function is used to try to start a backgrounded GC session by forking 

4149 the current process. If using the systemd version, call start_gc_service() instead. 

4150 """ 

4151 # don't bother if an instance already running (this is just an 

4152 # optimization to reduce the overhead of forking a new process if we 

4153 # don't have to, but the process will check the lock anyways) 

4154 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid) 

4155 if not lockRunning.acquireNoblock(): 

4156 if should_preempt(session, sr_uuid): 

4157 util.SMlog("Aborting currently-running coalesce of garbage VDI") 

4158 try: 

4159 if not abort(sr_uuid, soft=True): 

4160 util.SMlog("The GC has already been scheduled to re-start") 

4161 except util.CommandException as e: 

4162 if e.code != errno.ETIMEDOUT: 

4163 raise 

4164 util.SMlog('failed to abort the GC') 

4165 else: 

4166 util.SMlog("A GC instance already running, not kicking") 

4167 return 

4168 else: 

4169 lockRunning.release() 

4170 

4171 util.SMlog(f"Starting GC file is {__file__}") 

4172 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'], 

4173 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4174 

4175def start_gc_service(sr_uuid, wait=False): 

4176 """ 

4177 This starts the templated systemd service which runs GC on the given SR UUID. 

4178 If the service was already started, this is a no-op. 

4179 

4180 Because the service is a one-shot with RemainAfterExit=no, when called with 

4181 wait=True this will run the service synchronously and will not return until the 

4182 run has finished. This is used to force a run of the GC instead of just kicking it 

4183 in the background. 

4184 """ 

4185 sr_uuid_esc = sr_uuid.replace("-", "\\x2d") 

4186 util.SMlog(f"Kicking SMGC@{sr_uuid}...") 

4187 cmd=[ "/usr/bin/systemctl", "--quiet" ] 

4188 if not wait: 4188 ↛ 4190line 4188 didn't jump to line 4190, because the condition on line 4188 was never false

4189 cmd.append("--no-block") 

4190 cmd += ["start", f"SMGC@{sr_uuid_esc}"] 

4191 subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4192 

4193 

4194def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

4195 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

4196 the SR lock is held. 

4197 The following algorithm is used: 

4198 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

4199 2. Scan the SR 

4200 3. GC 

4201 4. return 

4202 """ 

4203 Util.log("=== SR %s: gc_force ===" % srUuid) 

4204 init(srUuid) 

4205 sr = SR.getInstance(srUuid, session, lockSR, True) 

4206 if not lockGCActive.acquireNoblock(): 

4207 abort(srUuid) 

4208 else: 

4209 Util.log("Nothing was running, clear to proceed") 

4210 

4211 if force: 

4212 Util.log("FORCED: will continue even if there are COW image errors") 

4213 sr.scanLocked(force) 

4214 sr.cleanupCoalesceJournals() 

4215 

4216 try: 

4217 sr.cleanupCache() 

4218 sr.garbageCollect(dryRun) 

4219 finally: 

4220 sr.cleanup() 

4221 sr.logFilter.logState() 

4222 lockGCActive.release() 

4223 

4224 

4225def get_state(srUuid): 

4226 """Return whether GC/coalesce is currently running or not. This asks systemd for 

4227 the state of the templated SMGC service and will return True if it is "activating" 

4228 or "running" (for completeness, as in practice it will never achieve the latter state) 

4229 """ 

4230 sr_uuid_esc = srUuid.replace("-", "\\x2d") 

4231 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"] 

4232 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4233 state = result.stdout.decode('utf-8').rstrip() 

4234 if state == "activating" or state == "running": 

4235 return True 

4236 return False 

4237 

4238 

4239def should_preempt(session, srUuid): 

4240 sr = SR.getInstance(srUuid, session) 

4241 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

4242 if len(entries) == 0: 

4243 return False 

4244 elif len(entries) > 1: 

4245 raise util.SMException("More than one coalesce entry: " + str(entries)) 

4246 sr.scanLocked() 

4247 coalescedUuid = entries.popitem()[0] 

4248 garbage = sr.findGarbage() 

4249 for vdi in garbage: 

4250 if vdi.uuid == coalescedUuid: 

4251 return True 

4252 return False 

4253 

4254 

4255def get_coalesceable_leaves(session, srUuid, vdiUuids): 

4256 coalesceable = [] 

4257 sr = SR.getInstance(srUuid, session) 

4258 sr.scanLocked() 

4259 for uuid in vdiUuids: 

4260 vdi = sr.getVDI(uuid) 

4261 if not vdi: 

4262 raise util.SMException("VDI %s not found" % uuid) 

4263 if vdi.isLeafCoalesceable(): 

4264 coalesceable.append(uuid) 

4265 return coalesceable 

4266 

4267 

4268def cache_cleanup(session, srUuid, maxAge): 

4269 sr = SR.getInstance(srUuid, session) 

4270 return sr.cleanupCache(maxAge) 

4271 

4272 

4273def debug(sr_uuid, cmd, vdi_uuid): 

4274 Util.log("Debug command: %s" % cmd) 

4275 sr = SR.getInstance(sr_uuid, None) 

4276 if not isinstance(sr, LVMSR): 

4277 print("Error: not an LVHD SR") 

4278 return 

4279 sr.scanLocked() 

4280 vdi = sr.getVDI(vdi_uuid) 

4281 if not vdi: 

4282 print("Error: VDI %s not found") 

4283 return 

4284 print("Running %s on SR %s" % (cmd, sr)) 

4285 print("VDI before: %s" % vdi) 

4286 if cmd == "activate": 

4287 vdi._activate() 

4288 print("VDI file: %s" % vdi.path) 

4289 if cmd == "deactivate": 

4290 ns = NS_PREFIX_LVM + sr.uuid 

4291 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

4292 if cmd == "inflate": 

4293 vdi.inflateFully() 

4294 sr.cleanup() 

4295 if cmd == "deflate": 

4296 vdi.deflate() 

4297 sr.cleanup() 

4298 sr.scanLocked() 

4299 print("VDI after: %s" % vdi) 

4300 

4301 

4302def abort_optional_reenable(uuid): 

4303 print("Disabling GC/coalesce for %s" % uuid) 

4304 ret = _abort(uuid) 

4305 input("Press enter to re-enable...") 

4306 print("GC/coalesce re-enabled") 

4307 lockGCRunning.release() 

4308 if ret: 

4309 lockGCActive.release() 

4310 

4311 

4312############################################################################## 

4313# 

4314# CLI 

4315# 

4316def main(): 

4317 action = "" 

4318 maxAge = 0 

4319 uuid = "" 

4320 background = False 

4321 force = False 

4322 dryRun = False 

4323 debug_cmd = "" 

4324 vdi_uuid = "" 

4325 shortArgs = "gGc:aqxu:bfdt:v:" 

4326 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

4327 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

4328 

4329 try: 

4330 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

4331 except getopt.GetoptError: 

4332 usage() 

4333 for o, a in opts: 

4334 if o in ("-g", "--gc"): 

4335 action = "gc" 

4336 if o in ("-G", "--gc_force"): 

4337 action = "gc_force" 

4338 if o in ("-c", "--clean_cache"): 

4339 action = "clean_cache" 

4340 maxAge = int(a) 

4341 if o in ("-a", "--abort"): 

4342 action = "abort" 

4343 if o in ("-q", "--query"): 

4344 action = "query" 

4345 if o in ("-x", "--disable"): 

4346 action = "disable" 

4347 if o in ("-u", "--uuid"): 

4348 uuid = a 

4349 if o in ("-b", "--background"): 

4350 background = True 

4351 if o in ("-f", "--force"): 

4352 force = True 

4353 if o in ("-d", "--dry-run"): 

4354 Util.log("Dry run mode") 

4355 dryRun = True 

4356 if o in ("-t", "--debug"): 

4357 action = "debug" 

4358 debug_cmd = a 

4359 if o in ("-v", "--vdi_uuid"): 

4360 vdi_uuid = a 

4361 

4362 if not action or not uuid: 

4363 usage() 

4364 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

4365 action != "debug" and (debug_cmd or vdi_uuid): 

4366 usage() 

4367 

4368 if action != "query" and action != "debug": 

4369 print("All output goes to log") 

4370 

4371 if action == "gc": 

4372 gc(None, uuid, background, dryRun) 

4373 elif action == "gc_force": 

4374 gc_force(None, uuid, force, dryRun, True) 

4375 elif action == "clean_cache": 

4376 cache_cleanup(None, uuid, maxAge) 

4377 elif action == "abort": 

4378 abort(uuid) 

4379 elif action == "query": 

4380 print("Currently running: %s" % get_state(uuid)) 

4381 elif action == "disable": 

4382 abort_optional_reenable(uuid) 

4383 elif action == "debug": 

4384 debug(uuid, debug_cmd, vdi_uuid) 

4385 

4386 

4387if __name__ == '__main__': 4387 ↛ 4388line 4387 didn't jump to line 4388, because the condition on line 4387 was never true

4388 main()