Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect COW-based SR's in the background 

19# 

20 

21from sm_typing import Any, Optional, List, override 

22 

23import os 

24import os.path 

25import sys 

26import time 

27import signal 

28import subprocess 

29import getopt 

30import datetime 

31import traceback 

32import base64 

33import zlib 

34import errno 

35import stat 

36 

37import XenAPI # pylint: disable=import-error 

38import util 

39import lvutil 

40import lvmcache 

41import journaler 

42import fjournaler 

43import lock 

44import blktap2 

45import xs_errors 

46from refcounter import RefCounter 

47from ipc import IPCFlag 

48from lvmanager import LVActivator 

49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

50from functools import reduce 

51from time import monotonic as _time 

52 

53from constants import NS_PREFIX_LVM, VG_LOCATION, VG_PREFIX 

54from cowutil import CowImageInfo, CowUtil, getCowUtil 

55from lvmcowutil import LV_PREFIX, LvmCowUtil 

56from vditype import VdiType, VdiTypeExtension, VDI_COW_TYPES, VDI_TYPE_TO_EXTENSION 

57 

58try: 

59 from linstorcowutil import LinstorCowUtil 

60 from linstorjournaler import LinstorJournaler 

61 from linstorvolumemanager import get_controller_uri 

62 from linstorvolumemanager import LinstorVolumeManager 

63 from linstorvolumemanager import LinstorVolumeManagerError 

64 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

65 

66 LINSTOR_AVAILABLE = True 

67except ImportError: 

68 LINSTOR_AVAILABLE = False 

69 

70# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

71# possible due to lvhd_stop_using_() not working correctly. However, we leave 

72# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

73# record for use by the offline tool (which makes the operation safe by pausing 

74# the VM first) 

75AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

76 

77FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

78 

79# process "lock", used simply as an indicator that a process already exists 

80# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

81# check for the fact by trying the lock). 

82lockGCRunning = None 

83 

84# process "lock" to indicate that the GC process has been activated but may not 

85# yet be running, stops a second process from being started. 

86LOCK_TYPE_GC_ACTIVE = "gc_active" 

87lockGCActive = None 

88 

89# Default coalesce error rate limit, in messages per minute. A zero value 

90# disables throttling, and a negative value disables error reporting. 

91DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

92 

93COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

94COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

95VAR_RUN = "/var/run/" 

96SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

97 

98N_RUNNING_AVERAGE = 10 

99 

100NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

101 

102# Signal Handler 

103SIGTERM = False 

104 

105 

106class AbortException(util.SMException): 

107 pass 

108 

109 

110def receiveSignal(signalNumber, frame): 

111 global SIGTERM 

112 

113 util.SMlog("GC: recieved SIGTERM") 

114 SIGTERM = True 

115 return 

116 

117 

118################################################################################ 

119# 

120# Util 

121# 

122class Util: 

123 RET_RC = 1 

124 RET_STDOUT = 2 

125 RET_STDERR = 4 

126 

127 UUID_LEN = 36 

128 

129 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

130 

131 @staticmethod 

132 def log(text) -> None: 

133 util.SMlog(text, ident="SMGC") 

134 

135 @staticmethod 

136 def logException(tag): 

137 info = sys.exc_info() 

138 if info[0] == SystemExit: 138 ↛ 140line 138 didn't jump to line 140, because the condition on line 138 was never true

139 # this should not be happening when catching "Exception", but it is 

140 sys.exit(0) 

141 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

142 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

143 Util.log(" ***********************") 

144 Util.log(" * E X C E P T I O N *") 

145 Util.log(" ***********************") 

146 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

147 Util.log(tb) 

148 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

149 

150 @staticmethod 

151 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

152 "Execute a subprocess, then return its return code, stdout, stderr" 

153 proc = subprocess.Popen(args, 

154 stdin=subprocess.PIPE, \ 

155 stdout=subprocess.PIPE, \ 

156 stderr=subprocess.PIPE, \ 

157 shell=True, \ 

158 close_fds=True) 

159 (stdout, stderr) = proc.communicate(inputtext) 

160 stdout = str(stdout) 

161 stderr = str(stderr) 

162 rc = proc.returncode 

163 if log: 

164 Util.log("`%s`: %s" % (args, rc)) 

165 if type(expectedRC) != type([]): 

166 expectedRC = [expectedRC] 

167 if not rc in expectedRC: 

168 reason = stderr.strip() 

169 if stdout.strip(): 

170 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

171 Util.log("Failed: %s" % reason) 

172 raise util.CommandException(rc, args, reason) 

173 

174 if ret == Util.RET_RC: 

175 return rc 

176 if ret == Util.RET_STDERR: 

177 return stderr 

178 return stdout 

179 

180 @staticmethod 

181 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): 

182 """execute func in a separate thread and kill it if abortTest signals 

183 so""" 

184 abortSignaled = abortTest() # check now before we clear resultFlag 

185 resultFlag = IPCFlag(ns) 

186 resultFlag.clearAll() 

187 pid = os.fork() 

188 if pid: 

189 startTime = _time() 

190 try: 

191 while True: 

192 if resultFlag.test("success"): 

193 Util.log(" Child process completed successfully") 

194 resultFlag.clear("success") 

195 return 

196 if resultFlag.test("failure"): 

197 resultFlag.clear("failure") 

198 raise util.SMException("Child process exited with error") 

199 if abortTest() or abortSignaled or SIGTERM: 

200 os.killpg(pid, signal.SIGKILL) 

201 raise AbortException("Aborting due to signal") 

202 if timeOut and _time() - startTime > timeOut: 

203 os.killpg(pid, signal.SIGKILL) 

204 resultFlag.clearAll() 

205 raise util.SMException("Timed out") 

206 time.sleep(pollInterval) 

207 finally: 

208 wait_pid = 0 

209 rc = -1 

210 count = 0 

211 while wait_pid == 0 and count < 10: 

212 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

213 if wait_pid == 0: 

214 time.sleep(2) 

215 count += 1 

216 

217 if wait_pid == 0: 

218 Util.log("runAbortable: wait for process completion timed out") 

219 else: 

220 os.setpgrp() 

221 try: 

222 if func() == ret: 

223 resultFlag.set("success") 

224 else: 

225 resultFlag.set("failure") 

226 except Exception as e: 

227 Util.log("Child process failed with : (%s)" % e) 

228 resultFlag.set("failure") 

229 Util.logException("This exception has occured") 

230 os._exit(0) 

231 

232 @staticmethod 

233 def num2str(number): 

234 for prefix in ("G", "M", "K"): 234 ↛ 237line 234 didn't jump to line 237, because the loop on line 234 didn't complete

235 if number >= Util.PREFIX[prefix]: 

236 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

237 return "%s" % number 

238 

239 @staticmethod 

240 def numBits(val): 

241 count = 0 

242 while val: 

243 count += val & 1 

244 val = val >> 1 

245 return count 

246 

247 @staticmethod 

248 def countBits(bitmap1, bitmap2): 

249 """return bit count in the bitmap produced by ORing the two bitmaps""" 

250 len1 = len(bitmap1) 

251 len2 = len(bitmap2) 

252 lenLong = len1 

253 lenShort = len2 

254 bitmapLong = bitmap1 

255 if len2 > len1: 

256 lenLong = len2 

257 lenShort = len1 

258 bitmapLong = bitmap2 

259 

260 count = 0 

261 for i in range(lenShort): 

262 val = bitmap1[i] | bitmap2[i] 

263 count += Util.numBits(val) 

264 

265 for i in range(i + 1, lenLong): 

266 val = bitmapLong[i] 

267 count += Util.numBits(val) 

268 return count 

269 

270 @staticmethod 

271 def getThisScript(): 

272 thisScript = util.get_real_path(__file__) 

273 if thisScript.endswith(".pyc"): 

274 thisScript = thisScript[:-1] 

275 return thisScript 

276 

277 

278################################################################################ 

279# 

280# XAPI 

281# 

282class XAPI: 

283 USER = "root" 

284 PLUGIN_ON_SLAVE = "on-slave" 

285 

286 CONFIG_SM = 0 

287 CONFIG_OTHER = 1 

288 CONFIG_ON_BOOT = 2 

289 CONFIG_ALLOW_CACHING = 3 

290 

291 CONFIG_NAME = { 

292 CONFIG_SM: "sm-config", 

293 CONFIG_OTHER: "other-config", 

294 CONFIG_ON_BOOT: "on-boot", 

295 CONFIG_ALLOW_CACHING: "allow_caching" 

296 } 

297 

298 class LookupError(util.SMException): 

299 pass 

300 

301 @staticmethod 

302 def getSession(): 

303 session = XenAPI.xapi_local() 

304 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

305 return session 

306 

307 def __init__(self, session, srUuid): 

308 self.sessionPrivate = False 

309 self.session = session 

310 if self.session is None: 

311 self.session = self.getSession() 

312 self.sessionPrivate = True 

313 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

314 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

315 self.hostUuid = util.get_this_host() 

316 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

317 self.task = None 

318 self.task_progress = {"coalescable": 0, "done": 0} 

319 

320 def __del__(self): 

321 if self.sessionPrivate: 

322 self.session.xenapi.session.logout() 

323 

324 @property 

325 def srRef(self): 

326 return self._srRef 

327 

328 def isPluggedHere(self): 

329 pbds = self.getAttachedPBDs() 

330 for pbdRec in pbds: 

331 if pbdRec["host"] == self._hostRef: 

332 return True 

333 return False 

334 

335 def poolOK(self): 

336 host_recs = self.session.xenapi.host.get_all_records() 

337 for host_ref, host_rec in host_recs.items(): 

338 if not host_rec["enabled"]: 

339 Util.log("Host %s not enabled" % host_rec["uuid"]) 

340 return False 

341 return True 

342 

343 def isMaster(self): 

344 if self.srRecord["shared"]: 

345 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

346 return pool["master"] == self._hostRef 

347 else: 

348 pbds = self.getAttachedPBDs() 

349 if len(pbds) < 1: 

350 raise util.SMException("Local SR not attached") 

351 elif len(pbds) > 1: 

352 raise util.SMException("Local SR multiply attached") 

353 return pbds[0]["host"] == self._hostRef 

354 

355 def getAttachedPBDs(self): 

356 """Return PBD records for all PBDs of this SR that are currently 

357 attached""" 

358 attachedPBDs = [] 

359 pbds = self.session.xenapi.PBD.get_all_records() 

360 for pbdRec in pbds.values(): 

361 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

362 attachedPBDs.append(pbdRec) 

363 return attachedPBDs 

364 

365 def getOnlineHosts(self): 

366 return util.get_online_hosts(self.session) 

367 

368 def ensureInactive(self, hostRef, args): 

369 text = self.session.xenapi.host.call_plugin( \ 

370 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

371 Util.log("call-plugin returned: '%s'" % text) 

372 

373 def getRecordHost(self, hostRef): 

374 return self.session.xenapi.host.get_record(hostRef) 

375 

376 def _getRefVDI(self, uuid): 

377 return self.session.xenapi.VDI.get_by_uuid(uuid) 

378 

379 def getRefVDI(self, vdi): 

380 return self._getRefVDI(vdi.uuid) 

381 

382 def getRecordVDI(self, uuid): 

383 try: 

384 ref = self._getRefVDI(uuid) 

385 return self.session.xenapi.VDI.get_record(ref) 

386 except XenAPI.Failure: 

387 return None 

388 

389 def singleSnapshotVDI(self, vdi): 

390 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

391 {"type": "internal"}) 

392 

393 def forgetVDI(self, srUuid, vdiUuid): 

394 """Forget the VDI, but handle the case where the VDI has already been 

395 forgotten (i.e. ignore errors)""" 

396 try: 

397 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

398 self.session.xenapi.VDI.forget(vdiRef) 

399 except XenAPI.Failure: 

400 pass 

401 

402 def getConfigVDI(self, vdi, key): 

403 kind = vdi.CONFIG_TYPE[key] 

404 if kind == self.CONFIG_SM: 

405 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

406 elif kind == self.CONFIG_OTHER: 

407 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

408 elif kind == self.CONFIG_ON_BOOT: 

409 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

410 elif kind == self.CONFIG_ALLOW_CACHING: 

411 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

412 else: 

413 assert(False) 

414 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

415 return cfg 

416 

417 def removeFromConfigVDI(self, vdi, key): 

418 kind = vdi.CONFIG_TYPE[key] 

419 if kind == self.CONFIG_SM: 

420 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

421 elif kind == self.CONFIG_OTHER: 

422 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

423 else: 

424 assert(False) 

425 

426 def addToConfigVDI(self, vdi, key, val): 

427 kind = vdi.CONFIG_TYPE[key] 

428 if kind == self.CONFIG_SM: 

429 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

430 elif kind == self.CONFIG_OTHER: 

431 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

432 else: 

433 assert(False) 

434 

435 def isSnapshot(self, vdi): 

436 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

437 

438 def markCacheSRsDirty(self): 

439 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

440 'field "local_cache_enabled" = "true"') 

441 for sr_ref in sr_refs: 

442 Util.log("Marking SR %s dirty" % sr_ref) 

443 util.set_dirty(self.session, sr_ref) 

444 

445 def srUpdate(self): 

446 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

447 abortFlag = IPCFlag(self.srRecord["uuid"]) 

448 task = self.session.xenapi.Async.SR.update(self._srRef) 

449 cancelTask = True 

450 try: 

451 for i in range(60): 

452 status = self.session.xenapi.task.get_status(task) 

453 if not status == "pending": 

454 Util.log("SR.update_asynch status changed to [%s]" % status) 

455 cancelTask = False 

456 return 

457 if abortFlag.test(FLAG_TYPE_ABORT): 

458 Util.log("Abort signalled during srUpdate, cancelling task...") 

459 try: 

460 self.session.xenapi.task.cancel(task) 

461 cancelTask = False 

462 Util.log("Task cancelled") 

463 except: 

464 pass 

465 return 

466 time.sleep(1) 

467 finally: 

468 if cancelTask: 

469 self.session.xenapi.task.cancel(task) 

470 self.session.xenapi.task.destroy(task) 

471 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

472 

473 def update_task(self): 

474 self.session.xenapi.task.set_other_config( 

475 self.task, 

476 { 

477 "applies_to": self._srRef 

478 }) 

479 total = self.task_progress['coalescable'] + self.task_progress['done'] 

480 if (total > 0): 

481 self.session.xenapi.task.set_progress( 

482 self.task, float(self.task_progress['done']) / total) 

483 

484 def create_task(self, label, description): 

485 self.task = self.session.xenapi.task.create(label, description) 

486 self.update_task() 

487 

488 def update_task_progress(self, key, value): 

489 self.task_progress[key] = value 

490 if self.task: 

491 self.update_task() 

492 

493 def set_task_status(self, status): 

494 if self.task: 

495 self.session.xenapi.task.set_status(self.task, status) 

496 

497 

498################################################################################ 

499# 

500# VDI 

501# 

502class VDI(object): 

503 """Object representing a VDI of a COW-based SR""" 

504 

505 POLL_INTERVAL = 1 

506 POLL_TIMEOUT = 30 

507 DEVICE_MAJOR = 202 

508 

509 # config keys & values 

510 DB_VDI_PARENT = "vhd-parent" 

511 DB_VDI_TYPE = "vdi_type" 

512 DB_VDI_BLOCKS = "vhd-blocks" 

513 DB_VDI_PAUSED = "paused" 

514 DB_VDI_RELINKING = "relinking" 

515 DB_VDI_ACTIVATING = "activating" 

516 DB_GC = "gc" 

517 DB_COALESCE = "coalesce" 

518 DB_LEAFCLSC = "leaf-coalesce" # config key 

519 DB_GC_NO_SPACE = "gc_no_space" 

520 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

521 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

522 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

523 # no space to snap-coalesce or unable to keep 

524 # up with VDI. This is not used by the SM, it 

525 # might be used by external components. 

526 DB_ONBOOT = "on-boot" 

527 ONBOOT_RESET = "reset" 

528 DB_ALLOW_CACHING = "allow_caching" 

529 

530 CONFIG_TYPE = { 

531 DB_VDI_PARENT: XAPI.CONFIG_SM, 

532 DB_VDI_TYPE: XAPI.CONFIG_SM, 

533 DB_VDI_BLOCKS: XAPI.CONFIG_SM, 

534 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

535 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

536 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

537 DB_GC: XAPI.CONFIG_OTHER, 

538 DB_COALESCE: XAPI.CONFIG_OTHER, 

539 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

540 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

541 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

542 DB_GC_NO_SPACE: XAPI.CONFIG_SM 

543 } 

544 

545 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

546 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

547 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

548 # feasibility of leaf coalesce 

549 

550 JRN_RELINK = "relink" # journal entry type for relinking children 

551 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

552 JRN_LEAF = "leaf" # used in coalesce-leaf 

553 

554 STR_TREE_INDENT = 4 

555 

556 def __init__(self, sr, uuid, vdi_type): 

557 self.sr = sr 

558 self.scanError = True 

559 self.uuid = uuid 

560 self.vdi_type = vdi_type 

561 self.fileName = "" 

562 self.parentUuid = "" 

563 self.sizeVirt = -1 

564 self._sizePhys = -1 

565 self._sizeAllocated = -1 

566 self._hidden = False 

567 self.parent = None 

568 self.children = [] 

569 self._vdiRef = None 

570 self.cowutil = getCowUtil(vdi_type) 

571 self._clearRef() 

572 

573 @staticmethod 

574 def extractUuid(path): 

575 raise NotImplementedError("Implement in sub class") 

576 

577 def load(self, info=None) -> None: 

578 """Load VDI info""" 

579 pass 

580 

581 def getDriverName(self) -> str: 

582 return self.vdi_type 

583 

584 def getRef(self): 

585 if self._vdiRef is None: 

586 self._vdiRef = self.sr.xapi.getRefVDI(self) 

587 return self._vdiRef 

588 

589 def getConfig(self, key, default=None): 

590 config = self.sr.xapi.getConfigVDI(self, key) 

591 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 591 ↛ 592line 591 didn't jump to line 592, because the condition on line 591 was never true

592 val = config 

593 else: 

594 val = config.get(key) 

595 if val: 

596 return val 

597 return default 

598 

599 def setConfig(self, key, val): 

600 self.sr.xapi.removeFromConfigVDI(self, key) 

601 self.sr.xapi.addToConfigVDI(self, key, val) 

602 Util.log("Set %s = %s for %s" % (key, val, self)) 

603 

604 def delConfig(self, key): 

605 self.sr.xapi.removeFromConfigVDI(self, key) 

606 Util.log("Removed %s from %s" % (key, self)) 

607 

608 def ensureUnpaused(self): 

609 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

610 Util.log("Unpausing VDI %s" % self) 

611 self.unpause() 

612 

613 def pause(self, failfast=False) -> None: 

614 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

615 self.uuid, failfast): 

616 raise util.SMException("Failed to pause VDI %s" % self) 

617 

618 def _report_tapdisk_unpause_error(self): 

619 try: 

620 xapi = self.sr.xapi.session.xenapi 

621 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

622 msg_name = "failed to unpause tapdisk" 

623 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

624 "VMs using this tapdisk have lost access " \ 

625 "to the corresponding disk(s)" % self.uuid 

626 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

627 except Exception as e: 

628 util.SMlog("failed to generate message: %s" % e) 

629 

630 def unpause(self): 

631 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

632 self.uuid): 

633 self._report_tapdisk_unpause_error() 

634 raise util.SMException("Failed to unpause VDI %s" % self) 

635 

636 def refresh(self, ignoreNonexistent=True): 

637 """Pause-unpause in one step""" 

638 self.sr.lock() 

639 try: 

640 try: 

641 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 641 ↛ 643line 641 didn't jump to line 643, because the condition on line 641 was never true

642 self.sr.uuid, self.uuid): 

643 self._report_tapdisk_unpause_error() 

644 raise util.SMException("Failed to refresh %s" % self) 

645 except XenAPI.Failure as e: 

646 if util.isInvalidVDI(e) and ignoreNonexistent: 

647 Util.log("VDI %s not found, ignoring" % self) 

648 return 

649 raise 

650 finally: 

651 self.sr.unlock() 

652 

653 def isSnapshot(self): 

654 return self.sr.xapi.isSnapshot(self) 

655 

656 def isAttachedRW(self): 

657 return util.is_attached_rw( 

658 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

659 

660 def getVDIBlocks(self): 

661 val = self.updateBlockInfo() 

662 bitmap = zlib.decompress(base64.b64decode(val)) 

663 return bitmap 

664 

665 def isCoalesceable(self): 

666 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

667 return not self.scanError and \ 

668 self.parent and \ 

669 len(self.parent.children) == 1 and \ 

670 self.isHidden() and \ 

671 len(self.children) > 0 

672 

673 def isLeafCoalesceable(self): 

674 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

675 return not self.scanError and \ 

676 self.parent and \ 

677 len(self.parent.children) == 1 and \ 

678 not self.isHidden() and \ 

679 len(self.children) == 0 

680 

681 def canLiveCoalesce(self, speed): 

682 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

683 isLeafCoalesceable() already""" 

684 feasibleSize = False 

685 allowedDownTime = \ 

686 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

687 allocated_size = self.getAllocatedSize() 

688 if speed: 

689 feasibleSize = \ 

690 allocated_size // speed < allowedDownTime 

691 else: 

692 feasibleSize = \ 

693 allocated_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

694 

695 return (feasibleSize or 

696 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

697 

698 def getAllPrunable(self): 

699 if len(self.children) == 0: # base case 

700 # it is possible to have a hidden leaf that was recently coalesced 

701 # onto its parent, its children already relinked but not yet 

702 # reloaded - in which case it may not be garbage collected yet: 

703 # some tapdisks could still be using the file. 

704 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

705 return [] 

706 if not self.scanError and self.isHidden(): 

707 return [self] 

708 return [] 

709 

710 thisPrunable = True 

711 vdiList = [] 

712 for child in self.children: 

713 childList = child.getAllPrunable() 

714 vdiList.extend(childList) 

715 if child not in childList: 

716 thisPrunable = False 

717 

718 # We can destroy the current VDI if all childs are hidden BUT the 

719 # current VDI must be hidden too to do that! 

720 # Example in this case (after a failed live leaf coalesce): 

721 # 

722 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

723 # SMGC: [32436] b5458d61(1.000G/4.127M) 

724 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

725 # 

726 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

727 # Normally we are not in this function when the delete action is 

728 # executed but in `_liveLeafCoalesce`. 

729 

730 if not self.scanError and not self.isHidden() and thisPrunable: 

731 vdiList.append(self) 

732 return vdiList 

733 

734 def getSizePhys(self) -> int: 

735 return self._sizePhys 

736 

737 def getAllocatedSize(self) -> int: 

738 return self._sizeAllocated 

739 

740 def getTreeRoot(self): 

741 "Get the root of the tree that self belongs to" 

742 root = self 

743 while root.parent: 

744 root = root.parent 

745 return root 

746 

747 def getTreeHeight(self): 

748 "Get the height of the subtree rooted at self" 

749 if len(self.children) == 0: 

750 return 1 

751 

752 maxChildHeight = 0 

753 for child in self.children: 

754 childHeight = child.getTreeHeight() 

755 if childHeight > maxChildHeight: 

756 maxChildHeight = childHeight 

757 

758 return maxChildHeight + 1 

759 

760 def getAllLeaves(self) -> List["VDI"]: 

761 "Get all leaf nodes in the subtree rooted at self" 

762 if len(self.children) == 0: 

763 return [self] 

764 

765 leaves = [] 

766 for child in self.children: 

767 leaves.extend(child.getAllLeaves()) 

768 return leaves 

769 

770 def updateBlockInfo(self) -> Optional[str]: 

771 val = base64.b64encode(self._queryCowBlocks()).decode() 

772 try: 

773 self.setConfig(VDI.DB_VDI_BLOCKS, val) 

774 except Exception: 

775 if self.vdi_type != VdiType.QCOW2: 

776 raise 

777 # Sometime with QCOW2, our allocation table is too big to be stored in XAPI, in this case we do not store it 

778 # and we write `skipped` instead so that hasWork is happy (and the GC doesn't run in loop indefinitely). 

779 self.setConfig(VDI.DB_VDI_BLOCKS, "skipped") 

780 

781 return val 

782 

783 def rename(self, uuid) -> None: 

784 "Rename the VDI file" 

785 assert(not self.sr.vdis.get(uuid)) 

786 self._clearRef() 

787 oldUuid = self.uuid 

788 self.uuid = uuid 

789 self.children = [] 

790 # updating the children themselves is the responsibility of the caller 

791 del self.sr.vdis[oldUuid] 

792 self.sr.vdis[self.uuid] = self 

793 

794 def delete(self) -> None: 

795 "Physically delete the VDI" 

796 lock.Lock.cleanup(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

797 lock.Lock.cleanupAll(self.uuid) 

798 self._clear() 

799 

800 def getParent(self) -> str: 

801 return self.cowutil.getParent(self.path, lambda x: x.strip()) 801 ↛ exitline 801 didn't run the lambda on line 801

802 

803 def repair(self, parent) -> None: 

804 self.cowutil.repair(parent) 

805 

806 @override 

807 def __str__(self) -> str: 

808 strHidden = "" 

809 if self.isHidden(): 809 ↛ 810line 809 didn't jump to line 810, because the condition on line 809 was never true

810 strHidden = "*" 

811 strSizeVirt = "?" 

812 if self.sizeVirt > 0: 812 ↛ 813line 812 didn't jump to line 813, because the condition on line 812 was never true

813 strSizeVirt = Util.num2str(self.sizeVirt) 

814 strSizePhys = "?" 

815 if self._sizePhys > 0: 815 ↛ 816line 815 didn't jump to line 816, because the condition on line 815 was never true

816 strSizePhys = "/%s" % Util.num2str(self._sizePhys) 

817 strSizeAllocated = "?" 

818 if self._sizeAllocated >= 0: 

819 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

820 strType = "[{}]".format(self.vdi_type) 

821 

822 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

823 strSizePhys, strSizeAllocated, strType) 

824 

825 def validate(self, fast=False) -> None: 

826 if self.cowutil.check(self.path, fast=fast) != CowUtil.CheckResult.Success: 826 ↛ 827line 826 didn't jump to line 827, because the condition on line 826 was never true

827 raise util.SMException("COW image %s corrupted" % self) 

828 

829 def _clear(self): 

830 self.uuid = "" 

831 self.path = "" 

832 self.parentUuid = "" 

833 self.parent = None 

834 self._clearRef() 

835 

836 def _clearRef(self): 

837 self._vdiRef = None 

838 

839 def _call_plug_cancel(self, hostRef): 

840 args = {"path": self.path, "vdi_type": self.vdi_type} 

841 self.sr.xapi.session.xenapi.host.call_plugin( \ 

842 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_cancel", args) 

843 

844 def _call_plugin_coalesce(self, hostRef): 

845 args = {"path": self.path, "vdi_type": self.vdi_type} 

846 util.SMlog("DAMS: Calling remote coalesce with: {}".format(args)) 

847 ret = self.sr.xapi.session.xenapi.host.call_plugin( \ 

848 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_tapdisk", args) 

849 util.SMlog("DAMS: Remote coalesce returned {}".format(ret)) 

850 

851 def _doCoalesceOnHost(self, hostRef): 

852 self.validate() 

853 self.parent.validate(True) 

854 self.parent._increaseSizeVirt(self.sizeVirt) 

855 self.sr._updateSlavesOnResize(self.parent) 

856 #TODO: We might need to make the LV RW on the slave directly for coalesce? 

857 # Children and parent need to be RW for QCOW2 coalesce, otherwise tapdisk(libqcow) will crash trying to access them 

858 

859 def abortTest(): 

860 file = self.sr._gc_running_file(self) 

861 try: 

862 with open(file, "r") as f: 

863 if not f.read(): 

864 util.SMlog("DAMS: abortTest: Cancelling coalesce") 

865 self._call_plug_cancel(hostRef) 

866 return True 

867 except OSError as e: 

868 if e.errno == errno.ENOENT: 

869 util.SMlog("File {} does not exist".format(file)) 

870 else: 

871 util.SMlog("IOError: {}".format(e)) 

872 return True 

873 except Exception as e2: 

874 util.SMlog(f"DAMS: Error in AbortTest for _doCoalesceOnHost: {e2}") 

875 return True 

876 return False 

877 

878 #TODO: Add exception handling here like when callinng in a runAbortable situation_doCoalesceCOWImage 

879 Util.runAbortable(lambda: self._call_plugin_coalesce(hostRef), 

880 None, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

881 

882 self.parent.validate(True) 

883 #self._verifyContents(0) 

884 self.parent.updateBlockInfo() 

885 

886 def _isOpenOnHosts(self) -> Optional[str]: 

887 for pbdRecord in self.sr.xapi.getAttachedPBDs(): 

888 hostRef = pbdRecord["host"] 

889 args = {"path": self.path} 

890 is_openers = util.strtobool(self.sr.xapi.session.xenapi.host.call_plugin( \ 

891 hostRef, XAPI.PLUGIN_ON_SLAVE, "is_openers", args)) 

892 if is_openers: 

893 return hostRef 

894 return None 

895 

896 def _doCoalesce(self) -> None: 

897 """Coalesce self onto parent. Only perform the actual coalescing of 

898 an image, but not the subsequent relinking. We'll do that as the next step, 

899 after reloading the entire SR in case things have changed while we 

900 were coalescing""" 

901 self.validate() 

902 self.parent.validate(True) 

903 self.parent._increaseSizeVirt(self.sizeVirt) 

904 self.sr._updateSlavesOnResize(self.parent) 

905 self._coalesceCowImage(0) 

906 self.parent.validate(True) 

907 #self._verifyContents(0) 

908 self.parent.updateBlockInfo() 

909 

910 def _verifyContents(self, timeOut): 

911 Util.log(" Coalesce verification on %s" % self) 

912 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

913 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

914 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

915 Util.log(" Coalesce verification succeeded") 

916 

917 def _runTapdiskDiff(self): 

918 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

919 (self.getDriverName(), self.path, \ 

920 self.parent.getDriverName(), self.parent.path) 

921 Util.doexec(cmd, 0) 

922 return True 

923 

924 @staticmethod 

925 def _reportCoalesceError(vdi, ce): 

926 """Reports a coalesce error to XenCenter. 

927 

928 vdi: the VDI object on which the coalesce error occured 

929 ce: the CommandException that was raised""" 

930 

931 msg_name = os.strerror(ce.code) 

932 if ce.code == errno.ENOSPC: 

933 # TODO We could add more information here, e.g. exactly how much 

934 # space is required for the particular coalesce, as well as actions 

935 # to be taken by the user and consequences of not taking these 

936 # actions. 

937 msg_body = 'Run out of space while coalescing.' 

938 elif ce.code == errno.EIO: 

939 msg_body = 'I/O error while coalescing.' 

940 else: 

941 msg_body = '' 

942 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

943 % (vdi.sr.uuid, msg_name, msg_body)) 

944 

945 # Create a XenCenter message, but don't spam. 

946 xapi = vdi.sr.xapi.session.xenapi 

947 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

948 oth_cfg = xapi.SR.get_other_config(sr_ref) 

949 if COALESCE_ERR_RATE_TAG in oth_cfg: 

950 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

951 else: 

952 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

953 

954 xcmsg = False 

955 if coalesce_err_rate == 0: 

956 xcmsg = True 

957 elif coalesce_err_rate > 0: 

958 now = datetime.datetime.now() 

959 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

960 if COALESCE_LAST_ERR_TAG in sm_cfg: 

961 # seconds per message (minimum distance in time between two 

962 # messages in seconds) 

963 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

964 last = datetime.datetime.fromtimestamp( 

965 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

966 if now - last >= spm: 

967 xapi.SR.remove_from_sm_config(sr_ref, 

968 COALESCE_LAST_ERR_TAG) 

969 xcmsg = True 

970 else: 

971 xcmsg = True 

972 if xcmsg: 

973 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

974 str(now.strftime('%s'))) 

975 if xcmsg: 

976 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

977 

978 def coalesce(self) -> int: 

979 return self.cowutil.coalesce(self.path) 

980 

981 @staticmethod 

982 def _doCoalesceCowImage(vdi: "VDI"): 

983 try: 

984 startTime = time.time() 

985 allocated_size = vdi.getAllocatedSize() 

986 coalesced_size = vdi.coalesce() 

987 endTime = time.time() 

988 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

989 except util.CommandException as ce: 

990 # We use try/except for the following piece of code because it runs 

991 # in a separate process context and errors will not be caught and 

992 # reported by anyone. 

993 try: 

994 # Report coalesce errors back to user via XC 

995 VDI._reportCoalesceError(vdi, ce) 

996 except Exception as e: 

997 util.SMlog('failed to create XenCenter message: %s' % e) 

998 raise ce 

999 except: 

1000 raise 

1001 

1002 def _vdi_is_raw(self, vdi_path): 

1003 """ 

1004 Given path to vdi determine if it is raw 

1005 """ 

1006 uuid = self.extractUuid(vdi_path) 

1007 return self.sr.vdis[uuid].vdi_type == VdiType.RAW 

1008 

1009 def _coalesceCowImage(self, timeOut): 

1010 Util.log(" Running COW coalesce on %s" % self) 

1011 def abortTest(): 

1012 if self.cowutil.isCoalesceableOnRemote(): 

1013 file = self.sr._gc_running_file(self) 

1014 try: 

1015 with open(file, "r") as f: 

1016 if not f.read(): 

1017 return True 

1018 except OSError as e: 

1019 if e.errno == errno.ENOENT: 

1020 util.SMlog("File {} does not exist".format(file)) 

1021 else: 

1022 util.SMlog("IOError: {}".format(e)) 

1023 return True 

1024 return IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1025 

1026 try: 

1027 util.fistpoint.activate_custom_fn( 

1028 "cleanup_coalesceVHD_inject_failure", 

1029 util.inject_failure) 

1030 Util.runAbortable(lambda: VDI._doCoalesceCowImage(self), None, 

1031 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

1032 except: 

1033 # Exception at this phase could indicate a failure in COW coalesce 

1034 # or a kill of COW coalesce by runAbortable due to timeOut 

1035 # Try a repair and reraise the exception 

1036 parent = "" 

1037 try: 

1038 parent = self.getParent() 

1039 if not self._vdi_is_raw(parent): 

1040 # Repair error is logged and ignored. Error reraised later 

1041 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

1042 'parent %s' % (self.uuid, parent)) 

1043 self.repair(parent) 

1044 except Exception as e: 

1045 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

1046 'after failed coalesce on %s, err: %s' % 

1047 (parent, self.path, e)) 

1048 raise 

1049 

1050 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

1051 

1052 def _relinkSkip(self) -> None: 

1053 """Relink children of this VDI to point to the parent of this VDI""" 

1054 abortFlag = IPCFlag(self.sr.uuid) 

1055 for child in self.children: 

1056 if abortFlag.test(FLAG_TYPE_ABORT): 1056 ↛ 1057line 1056 didn't jump to line 1057, because the condition on line 1056 was never true

1057 raise AbortException("Aborting due to signal") 

1058 Util.log(" Relinking %s from %s to %s" % \ 

1059 (child, self, self.parent)) 

1060 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

1061 child._setParent(self.parent) 

1062 self.children = [] 

1063 

1064 def _reloadChildren(self, vdiSkip): 

1065 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

1066 the COW image metadata for this file in any online VDI""" 

1067 abortFlag = IPCFlag(self.sr.uuid) 

1068 for child in self.children: 

1069 if child == vdiSkip: 

1070 continue 

1071 if abortFlag.test(FLAG_TYPE_ABORT): 1071 ↛ 1072line 1071 didn't jump to line 1072, because the condition on line 1071 was never true

1072 raise AbortException("Aborting due to signal") 

1073 Util.log(" Reloading VDI %s" % child) 

1074 child._reload() 

1075 

1076 def _reload(self): 

1077 """Pause & unpause to cause blktap to reload the image metadata""" 

1078 for child in self.children: 1078 ↛ 1079line 1078 didn't jump to line 1079, because the loop on line 1078 never started

1079 child._reload() 

1080 

1081 # only leaves can be attached 

1082 if len(self.children) == 0: 1082 ↛ exitline 1082 didn't return from function '_reload', because the condition on line 1082 was never false

1083 try: 

1084 self.delConfig(VDI.DB_VDI_RELINKING) 

1085 except XenAPI.Failure as e: 

1086 if not util.isInvalidVDI(e): 

1087 raise 

1088 self.refresh() 

1089 

1090 def _tagChildrenForRelink(self): 

1091 if len(self.children) == 0: 

1092 retries = 0 

1093 try: 

1094 while retries < 15: 

1095 retries += 1 

1096 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1097 Util.log("VDI %s is activating, wait to relink" % 

1098 self.uuid) 

1099 else: 

1100 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1101 

1102 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1103 self.delConfig(VDI.DB_VDI_RELINKING) 

1104 Util.log("VDI %s started activating while tagging" % 

1105 self.uuid) 

1106 else: 

1107 return 

1108 time.sleep(2) 

1109 

1110 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1111 except XenAPI.Failure as e: 

1112 if not util.isInvalidVDI(e): 

1113 raise 

1114 

1115 for child in self.children: 

1116 child._tagChildrenForRelink() 

1117 

1118 def _loadInfoParent(self): 

1119 ret = self.cowutil.getParent(self.path, LvmCowUtil.extractUuid) 

1120 if ret: 

1121 self.parentUuid = ret 

1122 

1123 def _setParent(self, parent) -> None: 

1124 self.cowutil.setParent(self.path, parent.path, False) 

1125 self.parent = parent 

1126 self.parentUuid = parent.uuid 

1127 parent.children.append(self) 

1128 try: 

1129 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1130 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1131 (self.uuid, self.parentUuid)) 

1132 except: 

1133 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1134 (self.uuid, self.parentUuid)) 

1135 

1136 def isHidden(self) -> bool: 

1137 if self._hidden is None: 1137 ↛ 1138line 1137 didn't jump to line 1138, because the condition on line 1137 was never true

1138 self._loadInfoHidden() 

1139 return self._hidden 

1140 

1141 def _loadInfoHidden(self) -> None: 

1142 hidden = self.cowutil.getHidden(self.path) 

1143 self._hidden = (hidden != 0) 

1144 

1145 def _setHidden(self, hidden=True) -> None: 

1146 self._hidden = None 

1147 self.cowutil.setHidden(self.path, hidden) 

1148 self._hidden = hidden 

1149 

1150 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1151 """ensure the virtual size of 'self' is at least 'size'. Note that 

1152 resizing a COW image must always be offline and atomically: the file must 

1153 not be open by anyone and no concurrent operations may take place. 

1154 Thus we use the Agent API call for performing paused atomic 

1155 operations. If the caller is already in the atomic context, it must 

1156 call with atomic = False""" 

1157 if self.sizeVirt >= size: 1157 ↛ 1159line 1157 didn't jump to line 1159, because the condition on line 1157 was never false

1158 return 

1159 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1160 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1161 

1162 msize = self.cowutil.getMaxResizeSize(self.path) 

1163 if (size <= msize): 

1164 self.cowutil.setSizeVirtFast(self.path, size) 

1165 else: 

1166 if atomic: 

1167 vdiList = self._getAllSubtree() 

1168 self.sr.lock() 

1169 try: 

1170 self.sr.pauseVDIs(vdiList) 

1171 try: 

1172 self._setSizeVirt(size) 

1173 finally: 

1174 self.sr.unpauseVDIs(vdiList) 

1175 finally: 

1176 self.sr.unlock() 

1177 else: 

1178 self._setSizeVirt(size) 

1179 

1180 self.sizeVirt = self.cowutil.getSizeVirt(self.path) 

1181 

1182 def _setSizeVirt(self, size) -> None: 

1183 """WARNING: do not call this method directly unless all VDIs in the 

1184 subtree are guaranteed to be unplugged (and remain so for the duration 

1185 of the operation): this operation is only safe for offline COW images""" 

1186 jFile = os.path.join(self.sr.path, self.uuid) 

1187 self.cowutil.setSizeVirt(self.path, size, jFile) 

1188 

1189 def _queryCowBlocks(self) -> bytes: 

1190 return self.cowutil.getBlockBitmap(self.path) 

1191 

1192 def _getCoalescedSizeData(self): 

1193 """Get the data size of the resulting image if we coalesce self onto 

1194 parent. We calculate the actual size by using the image block allocation 

1195 information (as opposed to just adding up the two image sizes to get an 

1196 upper bound)""" 

1197 # make sure we don't use stale BAT info from vdi_rec since the child 

1198 # was writable all this time 

1199 self.delConfig(VDI.DB_VDI_BLOCKS) 

1200 blocksChild = self.getVDIBlocks() 

1201 blocksParent = self.parent.getVDIBlocks() 

1202 numBlocks = Util.countBits(blocksChild, blocksParent) 

1203 Util.log("Num combined blocks = %d" % numBlocks) 

1204 sizeData = numBlocks * self.cowutil.getBlockSize(self.path) 

1205 assert(sizeData <= self.sizeVirt) 

1206 return sizeData 

1207 

1208 def _calcExtraSpaceForCoalescing(self) -> int: 

1209 sizeData = self._getCoalescedSizeData() 

1210 sizeCoalesced = sizeData + self.cowutil.calcOverheadBitmap(sizeData) + \ 

1211 self.cowutil.calcOverheadEmpty(self.sizeVirt) 

1212 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1213 return sizeCoalesced - self.parent.getSizePhys() 

1214 

1215 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1216 """How much extra space in the SR will be required to 

1217 [live-]leaf-coalesce this VDI""" 

1218 # the space requirements are the same as for inline coalesce 

1219 return self._calcExtraSpaceForCoalescing() 

1220 

1221 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1222 """How much extra space in the SR will be required to 

1223 snapshot-coalesce this VDI""" 

1224 return self._calcExtraSpaceForCoalescing() + \ 

1225 self.cowutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1226 

1227 def _getAllSubtree(self): 

1228 """Get self and all VDIs in the subtree of self as a flat list""" 

1229 vdiList = [self] 

1230 for child in self.children: 

1231 vdiList.extend(child._getAllSubtree()) 

1232 return vdiList 

1233 

1234 

1235class FileVDI(VDI): 

1236 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1237 

1238 @override 

1239 @staticmethod 

1240 def extractUuid(path): 

1241 fileName = os.path.basename(path) 

1242 return os.path.splitext(fileName)[0] 

1243 

1244 def __init__(self, sr, uuid, vdi_type): 

1245 VDI.__init__(self, sr, uuid, vdi_type) 

1246 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1247 

1248 @override 

1249 def load(self, info=None) -> None: 

1250 if not info: 

1251 if not util.pathexists(self.path): 

1252 raise util.SMException("%s not found" % self.path) 

1253 try: 

1254 info = self.cowutil.getInfo(self.path, self.extractUuid) 

1255 except util.SMException: 

1256 Util.log(" [VDI %s: failed to read COW image metadata]" % self.uuid) 

1257 return 

1258 self.parent = None 

1259 self.children = [] 

1260 self.parentUuid = info.parentUuid 

1261 self.sizeVirt = info.sizeVirt 

1262 self._sizePhys = info.sizePhys 

1263 self._sizeAllocated = info.sizeAllocated 

1264 self._hidden = info.hidden 

1265 self.scanError = False 

1266 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1267 (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])) 

1268 

1269 @override 

1270 def rename(self, uuid) -> None: 

1271 oldPath = self.path 

1272 VDI.rename(self, uuid) 

1273 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1274 self.path = os.path.join(self.sr.path, self.fileName) 

1275 assert(not util.pathexists(self.path)) 

1276 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1277 os.rename(oldPath, self.path) 

1278 

1279 @override 

1280 def delete(self) -> None: 

1281 if len(self.children) > 0: 1281 ↛ 1282line 1281 didn't jump to line 1282, because the condition on line 1281 was never true

1282 raise util.SMException("VDI %s has children, can't delete" % \ 

1283 self.uuid) 

1284 try: 

1285 self.sr.lock() 

1286 try: 

1287 os.unlink(self.path) 

1288 self.sr.forgetVDI(self.uuid) 

1289 finally: 

1290 self.sr.unlock() 

1291 except OSError: 

1292 raise util.SMException("os.unlink(%s) failed" % self.path) 

1293 VDI.delete(self) 

1294 

1295 @override 

1296 def getAllocatedSize(self) -> int: 

1297 if self._sizeAllocated == -1: 1297 ↛ 1298line 1297 didn't jump to line 1298, because the condition on line 1297 was never true

1298 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1299 return self._sizeAllocated 

1300 

1301 

1302class LVMVDI(VDI): 

1303 """Object representing a VDI in an LVM SR""" 

1304 

1305 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1306 

1307 @override 

1308 def load(self, info=None) -> None: 

1309 # `info` is always set. `None` default value is only here to match parent method. 

1310 assert info, "No info given to LVMVDI.load" 

1311 self.parent = None 

1312 self.children = [] 

1313 self._sizePhys = -1 

1314 self._sizeAllocated = -1 

1315 self.scanError = info.scanError 

1316 self.sizeLV = info.sizeLV 

1317 self.sizeVirt = info.sizeVirt 

1318 self.fileName = info.lvName 

1319 self.lvActive = info.lvActive 

1320 self.lvOpen = info.lvOpen 

1321 self.lvReadonly = info.lvReadonly 

1322 self._hidden = info.hidden 

1323 self.parentUuid = info.parentUuid 

1324 self.path = os.path.join(self.sr.path, self.fileName) 

1325 self.lvmcowutil = LvmCowUtil(self.cowutil) 

1326 

1327 @override 

1328 @staticmethod 

1329 def extractUuid(path): 

1330 return LvmCowUtil.extractUuid(path) 

1331 

1332 def inflate(self, size): 

1333 """inflate the LV containing the COW image to 'size'""" 

1334 if not VdiType.isCowImage(self.vdi_type): 

1335 return 

1336 self._activate() 

1337 self.sr.lock() 

1338 try: 

1339 self.lvmcowutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size) 

1340 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1341 finally: 

1342 self.sr.unlock() 

1343 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1344 self._sizePhys = -1 

1345 self._sizeAllocated = -1 

1346 

1347 def deflate(self): 

1348 """deflate the LV containing the image to minimum""" 

1349 if not VdiType.isCowImage(self.vdi_type): 

1350 return 

1351 self._activate() 

1352 self.sr.lock() 

1353 try: 

1354 self.lvmcowutil.deflate(self.sr.lvmCache, self.fileName, self.getSizePhys()) 

1355 finally: 

1356 self.sr.unlock() 

1357 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1358 self._sizePhys = -1 

1359 self._sizeAllocated = -1 

1360 

1361 def inflateFully(self): 

1362 self.inflate(self.lvmcowutil.calcVolumeSize(self.sizeVirt)) 

1363 

1364 def inflateParentForCoalesce(self): 

1365 """Inflate the parent only as much as needed for the purposes of 

1366 coalescing""" 

1367 if not VdiType.isCowImage(self.parent.vdi_type): 

1368 return 

1369 inc = self._calcExtraSpaceForCoalescing() 

1370 if inc > 0: 

1371 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1372 self.parent.inflate(self.parent.sizeLV + inc) 

1373 

1374 @override 

1375 def updateBlockInfo(self) -> Optional[str]: 

1376 if VdiType.isCowImage(self.vdi_type): 

1377 return VDI.updateBlockInfo(self) 

1378 return None 

1379 

1380 @override 

1381 def rename(self, uuid) -> None: 

1382 oldUuid = self.uuid 

1383 oldLVName = self.fileName 

1384 VDI.rename(self, uuid) 

1385 self.fileName = LV_PREFIX[self.vdi_type] + self.uuid 

1386 self.path = os.path.join(self.sr.path, self.fileName) 

1387 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1388 

1389 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1390 if self.sr.lvActivator.get(oldUuid, False): 

1391 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1392 

1393 ns = NS_PREFIX_LVM + self.sr.uuid 

1394 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1395 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1396 RefCounter.reset(oldUuid, ns) 

1397 

1398 @override 

1399 def delete(self) -> None: 

1400 if len(self.children) > 0: 

1401 raise util.SMException("VDI %s has children, can't delete" % \ 

1402 self.uuid) 

1403 self.sr.lock() 

1404 try: 

1405 self.sr.lvmCache.remove(self.fileName) 

1406 self.sr.forgetVDI(self.uuid) 

1407 finally: 

1408 self.sr.unlock() 

1409 RefCounter.reset(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

1410 VDI.delete(self) 

1411 

1412 @override 

1413 def getSizePhys(self) -> int: 

1414 if self._sizePhys == -1: 

1415 self._loadInfoSizePhys() 

1416 return self._sizePhys 

1417 

1418 def _loadInfoSizePhys(self): 

1419 """Get the physical utilization of the COW image file. We do it individually 

1420 (and not using the COW batch scanner) as an optimization: this info is 

1421 relatively expensive and we need it only for VDI's involved in 

1422 coalescing.""" 

1423 if not VdiType.isCowImage(self.vdi_type): 

1424 return 

1425 self._activate() 

1426 self._sizePhys = self.cowutil.getSizePhys(self.path) 

1427 if self._sizePhys <= 0: 

1428 raise util.SMException("phys size of %s = %d" % \ 

1429 (self, self._sizePhys)) 

1430 

1431 @override 

1432 def getAllocatedSize(self) -> int: 

1433 if self._sizeAllocated == -1: 

1434 self._loadInfoSizeAllocated() 

1435 return self._sizeAllocated 

1436 

1437 def _loadInfoSizeAllocated(self): 

1438 """ 

1439 Get the allocated size of the COW volume. 

1440 """ 

1441 if not VdiType.isCowImage(self.vdi_type): 

1442 return 

1443 self._activate() 

1444 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1445 

1446 @override 

1447 def _loadInfoHidden(self) -> None: 

1448 if not VdiType.isCowImage(self.vdi_type): 

1449 self._hidden = self.sr.lvmCache.getHidden(self.fileName) 

1450 else: 

1451 VDI._loadInfoHidden(self) 

1452 

1453 @override 

1454 def _setHidden(self, hidden=True) -> None: 

1455 if not VdiType.isCowImage(self.vdi_type): 

1456 self._hidden = None 

1457 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1458 self._hidden = hidden 

1459 else: 

1460 VDI._setHidden(self, hidden) 

1461 

1462 @override 

1463 def __str__(self) -> str: 

1464 strType = self.vdi_type 

1465 if self.vdi_type == VdiType.RAW: 

1466 strType = "RAW" 

1467 strHidden = "" 

1468 if self.isHidden(): 

1469 strHidden = "*" 

1470 strSizePhys = "" 

1471 if self._sizePhys > 0: 

1472 strSizePhys = Util.num2str(self._sizePhys) 

1473 strSizeAllocated = "" 

1474 if self._sizeAllocated >= 0: 

1475 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1476 strActive = "n" 

1477 if self.lvActive: 

1478 strActive = "a" 

1479 if self.lvOpen: 

1480 strActive += "o" 

1481 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1482 Util.num2str(self.sizeVirt), strSizePhys, strSizeAllocated, 

1483 Util.num2str(self.sizeLV), strActive) 

1484 

1485 @override 

1486 def validate(self, fast=False) -> None: 

1487 if VdiType.isCowImage(self.vdi_type): 

1488 VDI.validate(self, fast) 

1489 

1490 def _setChainRw(self) -> List[str]: 

1491 """ 

1492 Set the readonly LV and children writable. 

1493 It's needed because the coalesce can be done by tapdisk directly 

1494 and it will need to write parent information for children. 

1495 The VDI we want to coalesce into it's parent need to be writable for libqcow coalesce part. 

1496 Return a list of the LV that were previously readonly to be made RO again after the coalesce. 

1497 """ 

1498 was_ro = [] 

1499 if self.lvReadonly: 

1500 self.sr.lvmCache.setReadonly(self.fileName, False) 

1501 was_ro.append(self.fileName) 

1502 

1503 for child in self.children: 

1504 if child.lvReadonly: 

1505 self.sr.lvmCache.setReadonly(child.fileName, False) 

1506 was_ro.append(child.fileName) 

1507 

1508 return was_ro 

1509 

1510 def _setChainRo(self, was_ro: List[str]) -> None: 

1511 """Set the list of LV in parameters to readonly""" 

1512 for lvName in was_ro: 

1513 self.sr.lvmCache.setReadonly(lvName, True) 

1514 

1515 @override 

1516 def _doCoalesce(self) -> None: 

1517 """LVMVDI parents must first be activated, inflated, and made writable""" 

1518 was_ro = [] 

1519 try: 

1520 self._activateChain() 

1521 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1522 self.parent.validate() 

1523 self.inflateParentForCoalesce() 

1524 was_ro = self._setChainRw() 

1525 VDI._doCoalesce(self) 

1526 finally: 

1527 self.parent._loadInfoSizePhys() 

1528 self.parent.deflate() 

1529 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1530 self._setChainRo(was_ro) 

1531 

1532 @override 

1533 def _setParent(self, parent) -> None: 

1534 self._activate() 

1535 if self.lvReadonly: 

1536 self.sr.lvmCache.setReadonly(self.fileName, False) 

1537 

1538 try: 

1539 self.cowutil.setParent(self.path, parent.path, parent.vdi_type == VdiType.RAW) 

1540 finally: 

1541 if self.lvReadonly: 

1542 self.sr.lvmCache.setReadonly(self.fileName, True) 

1543 self._deactivate() 

1544 self.parent = parent 

1545 self.parentUuid = parent.uuid 

1546 parent.children.append(self) 

1547 try: 

1548 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1549 Util.log("Updated the VDI-parent field for child %s with %s" % \ 

1550 (self.uuid, self.parentUuid)) 

1551 except: 

1552 Util.log("Failed to update the VDI-parent with %s for child %s" % \ 

1553 (self.parentUuid, self.uuid)) 

1554 

1555 def _activate(self): 

1556 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1557 

1558 def _activateChain(self): 

1559 vdi = self 

1560 while vdi: 

1561 vdi._activate() 

1562 vdi = vdi.parent 

1563 

1564 def _deactivate(self): 

1565 self.sr.lvActivator.deactivate(self.uuid, False) 

1566 

1567 @override 

1568 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1569 "ensure the virtual size of 'self' is at least 'size'" 

1570 self._activate() 

1571 if VdiType.isCowImage(self.vdi_type): 

1572 VDI._increaseSizeVirt(self, size, atomic) 

1573 return 

1574 

1575 # raw VDI case 

1576 offset = self.sizeLV 

1577 if self.sizeVirt < size: 

1578 oldSize = self.sizeLV 

1579 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1580 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1581 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1582 offset = oldSize 

1583 unfinishedZero = False 

1584 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1585 if jval: 

1586 unfinishedZero = True 

1587 offset = int(jval) 

1588 length = self.sizeLV - offset 

1589 if not length: 

1590 return 

1591 

1592 if unfinishedZero: 

1593 Util.log(" ==> Redoing unfinished zeroing out") 

1594 else: 

1595 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1596 str(offset)) 

1597 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1598 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1599 func = lambda: util.zeroOut(self.path, offset, length) 

1600 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1601 VDI.POLL_INTERVAL, 0) 

1602 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1603 

1604 @override 

1605 def _setSizeVirt(self, size) -> None: 

1606 """WARNING: do not call this method directly unless all VDIs in the 

1607 subtree are guaranteed to be unplugged (and remain so for the duration 

1608 of the operation): this operation is only safe for offline COW images.""" 

1609 self._activate() 

1610 jFile = self.lvmcowutil.createResizeJournal(self.sr.lvmCache, self.uuid) 

1611 try: 

1612 self.lvmcowutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size, jFile) 

1613 finally: 

1614 self.lvmcowutil.destroyResizeJournal(self.sr.lvmCache, self.uuid) 

1615 

1616 @override 

1617 def _queryCowBlocks(self) -> bytes: 

1618 self._activate() 

1619 return VDI._queryCowBlocks(self) 

1620 

1621 @override 

1622 def _calcExtraSpaceForCoalescing(self) -> int: 

1623 if not VdiType.isCowImage(self.parent.vdi_type): 

1624 return 0 # raw parents are never deflated in the first place 

1625 sizeCoalesced = self.lvmcowutil.calcVolumeSize(self._getCoalescedSizeData()) 

1626 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1627 return sizeCoalesced - self.parent.sizeLV 

1628 

1629 @override 

1630 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1631 """How much extra space in the SR will be required to 

1632 [live-]leaf-coalesce this VDI""" 

1633 # we can deflate the leaf to minimize the space requirements 

1634 deflateDiff = self.sizeLV - lvutil.calcSizeLV(self.getSizePhys()) 

1635 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1636 

1637 @override 

1638 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1639 return self._calcExtraSpaceForCoalescing() + \ 

1640 lvutil.calcSizeLV(self.getSizePhys()) 

1641 

1642 

1643class LinstorVDI(VDI): 

1644 """Object representing a VDI in a LINSTOR SR""" 

1645 

1646 VOLUME_LOCK_TIMEOUT = 30 

1647 

1648 @override 

1649 def load(self, info=None) -> None: 

1650 self.parentUuid = info.parentUuid 

1651 self.scanError = True 

1652 self.parent = None 

1653 self.children = [] 

1654 

1655 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1656 self.path = self.sr._linstor.build_device_path(self.fileName) 

1657 self.linstorcowutil = LinstorCowUtil(self.sr.xapi.session, self.sr._linstor, info.vdiType) 

1658 

1659 if not info: 

1660 try: 

1661 info = self.linstorcowutil.get_info(self.uuid) 

1662 except util.SMException: 

1663 Util.log( 

1664 ' [VDI {}: failed to read COW image metadata]'.format(self.uuid) 

1665 ) 

1666 return 

1667 

1668 self.parentUuid = info.parentUuid 

1669 self.sizeVirt = info.sizeVirt 

1670 self._sizePhys = -1 

1671 self._sizeAllocated = -1 

1672 self.drbd_size = -1 

1673 self._hidden = info.hidden 

1674 self.scanError = False 

1675 

1676 @override 

1677 def getSizePhys(self, fetch=False) -> int: 

1678 if self._sizePhys < 0 or fetch: 

1679 self._sizePhys = self.linstorcowutil.get_size_phys(self.uuid) 

1680 return self._sizePhys 

1681 

1682 def getDrbdSize(self, fetch=False): 

1683 if self.drbd_size < 0 or fetch: 

1684 self.drbd_size = self.linstorcowutil.get_drbd_size(self.uuid) 

1685 return self.drbd_size 

1686 

1687 @override 

1688 def getAllocatedSize(self) -> int: 

1689 if self._sizeAllocated == -1: 

1690 if VdiType.isCowImage(self.vdi_type): 

1691 self._sizeAllocated = self.linstorcowutil.get_allocated_size(self.uuid) 

1692 return self._sizeAllocated 

1693 

1694 def inflate(self, size): 

1695 if not VdiType.isCowImage(self.vdi_type): 

1696 return 

1697 self.sr.lock() 

1698 try: 

1699 # Ensure we use the real DRBD size and not the cached one. 

1700 # Why? Because this attribute can be changed if volume is resized by user. 

1701 self.drbd_size = self.getDrbdSize(fetch=True) 

1702 self.linstorcowutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1703 finally: 

1704 self.sr.unlock() 

1705 self.drbd_size = -1 

1706 self._sizePhys = -1 

1707 self._sizeAllocated = -1 

1708 

1709 def deflate(self): 

1710 if not VdiType.isCowImage(self.vdi_type): 

1711 return 

1712 self.sr.lock() 

1713 try: 

1714 # Ensure we use the real sizes and not the cached info. 

1715 self.drbd_size = self.getDrbdSize(fetch=True) 

1716 self._sizePhys = self.getSizePhys(fetch=True) 

1717 self.linstorcowutil.force_deflate(self.path, self._sizePhys, self.drbd_size, zeroize=False) 

1718 finally: 

1719 self.sr.unlock() 

1720 self.drbd_size = -1 

1721 self._sizePhys = -1 

1722 self._sizeAllocated = -1 

1723 

1724 def inflateFully(self): 

1725 if VdiType.isCowImage(self.vdi_type): 

1726 self.inflate(self.linstorcowutil.compute_volume_size(self.sizeVirt)) 

1727 

1728 @override 

1729 def rename(self, uuid) -> None: 

1730 Util.log('Renaming {} -> {} (path={})'.format( 

1731 self.uuid, uuid, self.path 

1732 )) 

1733 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1734 VDI.rename(self, uuid) 

1735 

1736 @override 

1737 def delete(self) -> None: 

1738 if len(self.children) > 0: 

1739 raise util.SMException( 

1740 'VDI {} has children, can\'t delete'.format(self.uuid) 

1741 ) 

1742 self.sr.lock() 

1743 try: 

1744 self.sr._linstor.destroy_volume(self.uuid) 

1745 self.sr.forgetVDI(self.uuid) 

1746 finally: 

1747 self.sr.unlock() 

1748 VDI.delete(self) 

1749 

1750 @override 

1751 def validate(self, fast=False) -> None: 

1752 if VdiType.isCowImage(self.vdi_type) and self.linstorcowutil.check(self.uuid, fast=fast) != CowUtil.CheckResult.Success: 

1753 raise util.SMException('COW image {} corrupted'.format(self)) 

1754 

1755 @override 

1756 def pause(self, failfast=False) -> None: 

1757 self.sr._linstor.ensure_volume_is_not_locked( 

1758 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1759 ) 

1760 return super(LinstorVDI, self).pause(failfast) 

1761 

1762 @override 

1763 def coalesce(self) -> int: 

1764 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1765 # Using another exception we can't execute the next coalesce calls. 

1766 return self.linstorcowutil.force_coalesce(self.path) 

1767 

1768 @override 

1769 def getParent(self) -> str: 

1770 return self.linstorcowutil.get_parent( 

1771 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1772 ) 

1773 

1774 @override 

1775 def repair(self, parent_uuid) -> None: 

1776 self.linstorcowutil.force_repair( 

1777 self.sr._linstor.get_device_path(parent_uuid) 

1778 ) 

1779 

1780 @override 

1781 def _relinkSkip(self) -> None: 

1782 abortFlag = IPCFlag(self.sr.uuid) 

1783 for child in self.children: 

1784 if abortFlag.test(FLAG_TYPE_ABORT): 

1785 raise AbortException('Aborting due to signal') 

1786 Util.log( 

1787 ' Relinking {} from {} to {}'.format( 

1788 child, self, self.parent 

1789 ) 

1790 ) 

1791 

1792 session = child.sr.xapi.session 

1793 sr_uuid = child.sr.uuid 

1794 vdi_uuid = child.uuid 

1795 try: 

1796 self.sr._linstor.ensure_volume_is_not_locked( 

1797 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1798 ) 

1799 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1800 child._setParent(self.parent) 

1801 finally: 

1802 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1803 self.children = [] 

1804 

1805 @override 

1806 def _setParent(self, parent) -> None: 

1807 self.sr._linstor.get_device_path(self.uuid) 

1808 self.linstorcowutil.force_parent(self.path, parent.path) 

1809 self.parent = parent 

1810 self.parentUuid = parent.uuid 

1811 parent.children.append(self) 

1812 try: 

1813 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1814 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1815 (self.uuid, self.parentUuid)) 

1816 except: 

1817 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1818 (self.uuid, self.parentUuid)) 

1819 

1820 @override 

1821 def _doCoalesce(self) -> None: 

1822 try: 

1823 self._activateChain() 

1824 self.parent.validate() 

1825 self._inflateParentForCoalesce() 

1826 VDI._doCoalesce(self) 

1827 finally: 

1828 self.parent.deflate() 

1829 

1830 def _activateChain(self): 

1831 vdi = self 

1832 while vdi: 

1833 try: 

1834 p = self.sr._linstor.get_device_path(vdi.uuid) 

1835 except Exception as e: 

1836 # Use SMException to skip coalesce. 

1837 # Otherwise the GC is stopped... 

1838 raise util.SMException(str(e)) 

1839 vdi = vdi.parent 

1840 

1841 @override 

1842 def _setHidden(self, hidden=True) -> None: 

1843 HIDDEN_TAG = 'hidden' 

1844 

1845 if not VdiType.isCowImage(self.vdi_type): 

1846 self._hidden = None 

1847 self.sr._linstor.update_volume_metadata(self.uuid, { 

1848 HIDDEN_TAG: hidden 

1849 }) 

1850 self._hidden = hidden 

1851 else: 

1852 VDI._setHidden(self, hidden) 

1853 

1854 @override 

1855 def _increaseSizeVirt(self, size, atomic=True): 

1856 if self.vdi_type == VdiType.RAW: 

1857 offset = self.drbd_size 

1858 if self.sizeVirt < size: 

1859 oldSize = self.drbd_size 

1860 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) 

1861 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) 

1862 self.sr._linstor.resize_volume(self.uuid, self.drbd_size) 

1863 offset = oldSize 

1864 unfinishedZero = False 

1865 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) 

1866 if jval: 

1867 unfinishedZero = True 

1868 offset = int(jval) 

1869 length = self.drbd_size - offset 

1870 if not length: 

1871 return 

1872 

1873 if unfinishedZero: 

1874 Util.log(" ==> Redoing unfinished zeroing out") 

1875 else: 

1876 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) 

1877 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1878 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1879 func = lambda: util.zeroOut(self.path, offset, length) 

1880 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

1881 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) 

1882 return 

1883 

1884 if self.sizeVirt >= size: 

1885 return 

1886 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1887 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1888 

1889 msize = self.linstorcowutil.get_max_resize_size(self.uuid) * 1024 * 1024 

1890 if (size <= msize): 

1891 self.linstorcowutil.set_size_virt_fast(self.path, size) 

1892 else: 

1893 if atomic: 

1894 vdiList = self._getAllSubtree() 

1895 self.sr.lock() 

1896 try: 

1897 self.sr.pauseVDIs(vdiList) 

1898 try: 

1899 self._setSizeVirt(size) 

1900 finally: 

1901 self.sr.unpauseVDIs(vdiList) 

1902 finally: 

1903 self.sr.unlock() 

1904 else: 

1905 self._setSizeVirt(size) 

1906 

1907 self.sizeVirt = self.linstorcowutil.get_size_virt(self.uuid) 

1908 

1909 @override 

1910 def _setSizeVirt(self, size) -> None: 

1911 jfile = self.uuid + '-jvhd' 

1912 self.sr._linstor.create_volume( 

1913 jfile, self.cowutil.getResizeJournalSize(), persistent=False, volume_name=jfile 

1914 ) 

1915 try: 

1916 self.inflate(self.linstorcowutil.compute_volume_size(size)) 

1917 self.linstorcowutil.set_size_virt(self.path, size, jfile) 

1918 finally: 

1919 try: 

1920 self.sr._linstor.destroy_volume(jfile) 

1921 except Exception: 

1922 # We can ignore it, in any case this volume is not persistent. 

1923 pass 

1924 

1925 @override 

1926 def _queryCowBlocks(self) -> bytes: 

1927 return self.linstorcowutil.get_block_bitmap(self.uuid) 

1928 

1929 def _inflateParentForCoalesce(self): 

1930 if not VdiType.isCowImage(self.parent.vdi_type): 

1931 return 

1932 inc = self._calcExtraSpaceForCoalescing() 

1933 if inc > 0: 

1934 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1935 

1936 @override 

1937 def _calcExtraSpaceForCoalescing(self) -> int: 

1938 if not VdiType.isCowImage(self.parent.vdi_type): 

1939 return 0 

1940 size_coalesced = self.linstorcowutil.compute_volume_size(self._getCoalescedSizeData()) 

1941 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1942 return size_coalesced - self.parent.getDrbdSize() 

1943 

1944 @override 

1945 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1946 assert self.getDrbdSize() > 0 

1947 assert self.getSizePhys() > 0 

1948 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1949 assert deflate_diff >= 0 

1950 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1951 

1952 @override 

1953 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1954 assert self.getSizePhys() > 0 

1955 return self._calcExtraSpaceForCoalescing() + \ 

1956 LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1957 

1958################################################################################ 

1959# 

1960# SR 

1961# 

1962class SR(object): 

1963 class LogFilter: 

1964 def __init__(self, sr): 

1965 self.sr = sr 

1966 self.stateLogged = False 

1967 self.prevState = {} 

1968 self.currState = {} 

1969 

1970 def logState(self): 

1971 changes = "" 

1972 self.currState.clear() 

1973 for vdi in self.sr.vdiTrees: 

1974 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1975 if not self.prevState.get(vdi.uuid) or \ 

1976 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1977 changes += self.currState[vdi.uuid] 

1978 

1979 for uuid in self.prevState: 

1980 if not self.currState.get(uuid): 

1981 changes += "Tree %s gone\n" % uuid 

1982 

1983 result = "SR %s (%d VDIs in %d COW trees): " % \ 

1984 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1985 

1986 if len(changes) > 0: 

1987 if self.stateLogged: 

1988 result += "showing only COW trees that changed:" 

1989 result += "\n%s" % changes 

1990 else: 

1991 result += "no changes" 

1992 

1993 for line in result.split("\n"): 

1994 Util.log("%s" % line) 

1995 self.prevState.clear() 

1996 for key, val in self.currState.items(): 

1997 self.prevState[key] = val 

1998 self.stateLogged = True 

1999 

2000 def logNewVDI(self, uuid): 

2001 if self.stateLogged: 

2002 Util.log("Found new VDI when scanning: %s" % uuid) 

2003 

2004 def _getTreeStr(self, vdi, indent=8): 

2005 treeStr = "%s%s\n" % (" " * indent, vdi) 

2006 for child in vdi.children: 

2007 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

2008 return treeStr 

2009 

2010 TYPE_FILE = "file" 

2011 TYPE_LVHD = "lvhd" 

2012 TYPE_LINSTOR = "linstor" 

2013 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

2014 

2015 LOCK_RETRY_INTERVAL = 3 

2016 LOCK_RETRY_ATTEMPTS = 20 

2017 LOCK_RETRY_ATTEMPTS_LOCK = 100 

2018 

2019 SCAN_RETRY_ATTEMPTS = 3 

2020 

2021 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

2022 TMP_RENAME_PREFIX = "OLD_" 

2023 

2024 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

2025 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

2026 

2027 @staticmethod 

2028 def getInstance(uuid, xapiSession, createLock=True, force=False): 

2029 xapi = XAPI(xapiSession, uuid) 

2030 type = normalizeType(xapi.srRecord["type"]) 

2031 if type == SR.TYPE_FILE: 

2032 return FileSR(uuid, xapi, createLock, force) 

2033 elif type == SR.TYPE_LVHD: 

2034 return LVMSR(uuid, xapi, createLock, force) 

2035 elif type == SR.TYPE_LINSTOR: 

2036 return LinstorSR(uuid, xapi, createLock, force) 

2037 raise util.SMException("SR type %s not recognized" % type) 

2038 

2039 def __init__(self, uuid, xapi, createLock, force): 

2040 self.logFilter = self.LogFilter(self) 

2041 self.uuid = uuid 

2042 self.path = "" 

2043 self.name = "" 

2044 self.vdis = {} 

2045 self.vdiTrees = [] 

2046 self.journaler = None 

2047 self.xapi = xapi 

2048 self._locked = 0 

2049 self._srLock = None 

2050 if createLock: 2050 ↛ 2051line 2050 didn't jump to line 2051, because the condition on line 2050 was never true

2051 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, self.uuid) 

2052 else: 

2053 Util.log("Requested no SR locking") 

2054 self.name = self.xapi.srRecord["name_label"] 

2055 self._failedCoalesceTargets = [] 

2056 

2057 if not self.xapi.isPluggedHere(): 

2058 if force: 2058 ↛ 2059line 2058 didn't jump to line 2059, because the condition on line 2058 was never true

2059 Util.log("SR %s not attached on this host, ignoring" % uuid) 

2060 else: 

2061 if not self.wait_for_plug(): 

2062 raise util.SMException("SR %s not attached on this host" % uuid) 

2063 

2064 if force: 2064 ↛ 2065line 2064 didn't jump to line 2065, because the condition on line 2064 was never true

2065 Util.log("Not checking if we are Master (SR %s)" % uuid) 

2066 elif not self.xapi.isMaster(): 2066 ↛ 2067line 2066 didn't jump to line 2067, because the condition on line 2066 was never true

2067 raise util.SMException("This host is NOT master, will not run") 

2068 

2069 self.no_space_candidates = {} 

2070 

2071 def msg_cleared(self, xapi_session, msg_ref): 

2072 try: 

2073 msg = xapi_session.xenapi.message.get_record(msg_ref) 

2074 except XenAPI.Failure: 

2075 return True 

2076 

2077 return msg is None 

2078 

2079 def check_no_space_candidates(self): 

2080 xapi_session = self.xapi.getSession() 

2081 

2082 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE) 

2083 if self.no_space_candidates: 

2084 if msg_id is None or self.msg_cleared(xapi_session, msg_id): 

2085 util.SMlog("Could not coalesce due to a lack of space " 

2086 f"in SR {self.uuid}") 

2087 msg_body = ("Unable to perform data coalesce due to a lack " 

2088 f"of space in SR {self.uuid}") 

2089 msg_id = xapi_session.xenapi.message.create( 

2090 'SM_GC_NO_SPACE', 

2091 3, 

2092 "SR", 

2093 self.uuid, 

2094 msg_body) 

2095 xapi_session.xenapi.SR.remove_from_sm_config( 

2096 self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2097 xapi_session.xenapi.SR.add_to_sm_config( 

2098 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id) 

2099 

2100 for candidate in self.no_space_candidates.values(): 

2101 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id) 

2102 elif msg_id is not None: 

2103 # Everything was coalescable, remove the message 

2104 xapi_session.xenapi.message.destroy(msg_id) 

2105 

2106 def clear_no_space_msg(self, vdi): 

2107 msg_id = None 

2108 try: 

2109 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE) 

2110 except XenAPI.Failure: 

2111 pass 

2112 

2113 self.no_space_candidates.pop(vdi.uuid, None) 

2114 if msg_id is not None: 2114 ↛ exitline 2114 didn't return from function 'clear_no_space_msg', because the condition on line 2114 was never false

2115 vdi.delConfig(VDI.DB_GC_NO_SPACE) 

2116 

2117 

2118 def wait_for_plug(self): 

2119 for _ in range(1, 10): 

2120 time.sleep(2) 

2121 if self.xapi.isPluggedHere(): 

2122 return True 

2123 return False 

2124 

2125 def gcEnabled(self, refresh=True): 

2126 if refresh: 

2127 self.xapi.srRecord = \ 

2128 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

2129 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

2130 Util.log("GC is disabled for this SR, abort") 

2131 return False 

2132 return True 

2133 

2134 def scan(self, force=False) -> None: 

2135 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

2136 update VDI objects if they already exist""" 

2137 pass 

2138 

2139 def scanLocked(self, force=False): 

2140 self.lock() 

2141 try: 

2142 self.scan(force) 

2143 finally: 

2144 self.unlock() 

2145 

2146 def getVDI(self, uuid): 

2147 return self.vdis.get(uuid) 

2148 

2149 def hasWork(self): 

2150 if len(self.findGarbage()) > 0: 

2151 return True 

2152 if self.findCoalesceable(): 

2153 return True 

2154 if self.findLeafCoalesceable(): 

2155 return True 

2156 if self.needUpdateBlockInfo(): 

2157 return True 

2158 return False 

2159 

2160 def findCoalesceable(self): 

2161 """Find a coalesceable VDI. Return a vdi that should be coalesced 

2162 (choosing one among all coalesceable candidates according to some 

2163 criteria) or None if there is no VDI that could be coalesced""" 

2164 

2165 candidates = [] 

2166 

2167 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

2168 if srSwitch == "false": 

2169 Util.log("Coalesce disabled for this SR") 

2170 return candidates 

2171 

2172 # finish any VDI for which a relink journal entry exists first 

2173 journals = self.journaler.getAll(VDI.JRN_RELINK) 

2174 for uuid in journals: 

2175 vdi = self.getVDI(uuid) 

2176 if vdi and vdi not in self._failedCoalesceTargets: 

2177 return vdi 

2178 

2179 for vdi in self.vdis.values(): 

2180 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

2181 candidates.append(vdi) 

2182 Util.log("%s is coalescable" % vdi.uuid) 

2183 

2184 self.xapi.update_task_progress("coalescable", len(candidates)) 

2185 

2186 # pick one in the tallest tree 

2187 treeHeight = dict() 

2188 for c in candidates: 

2189 height = c.getTreeRoot().getTreeHeight() 

2190 if treeHeight.get(height): 

2191 treeHeight[height].append(c) 

2192 else: 

2193 treeHeight[height] = [c] 

2194 

2195 freeSpace = self.getFreeSpace() 

2196 heights = list(treeHeight.keys()) 

2197 heights.sort(reverse=True) 

2198 for h in heights: 

2199 for c in treeHeight[h]: 

2200 spaceNeeded = c._calcExtraSpaceForCoalescing() 

2201 if spaceNeeded <= freeSpace: 

2202 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

2203 self.clear_no_space_msg(c) 

2204 return c 

2205 else: 

2206 self.no_space_candidates[c.uuid] = c 

2207 Util.log("No space to coalesce %s (free space: %d)" % \ 

2208 (c, freeSpace)) 

2209 return None 

2210 

2211 def getSwitch(self, key): 

2212 return self.xapi.srRecord["other_config"].get(key) 

2213 

2214 def forbiddenBySwitch(self, switch, condition, fail_msg): 

2215 srSwitch = self.getSwitch(switch) 

2216 ret = False 

2217 if srSwitch: 

2218 ret = srSwitch == condition 

2219 

2220 if ret: 

2221 Util.log(fail_msg) 

2222 

2223 return ret 

2224 

2225 def leafCoalesceForbidden(self): 

2226 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

2227 "false", 

2228 "Coalesce disabled for this SR") or 

2229 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

2230 VDI.LEAFCLSC_DISABLED, 

2231 "Leaf-coalesce disabled for this SR")) 

2232 

2233 def findLeafCoalesceable(self): 

2234 """Find leaf-coalesceable VDIs in each COW tree""" 

2235 

2236 candidates = [] 

2237 if self.leafCoalesceForbidden(): 

2238 return candidates 

2239 

2240 self.gatherLeafCoalesceable(candidates) 

2241 

2242 self.xapi.update_task_progress("coalescable", len(candidates)) 

2243 

2244 freeSpace = self.getFreeSpace() 

2245 for candidate in candidates: 

2246 # check the space constraints to see if leaf-coalesce is actually 

2247 # feasible for this candidate 

2248 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

2249 spaceNeededLive = spaceNeeded 

2250 if spaceNeeded > freeSpace: 

2251 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

2252 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

2253 spaceNeeded = spaceNeededLive 

2254 

2255 if spaceNeeded <= freeSpace: 

2256 Util.log("Leaf-coalesce candidate: %s" % candidate) 

2257 self.clear_no_space_msg(candidate) 

2258 return candidate 

2259 else: 

2260 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

2261 (candidate, freeSpace)) 

2262 if spaceNeededLive <= freeSpace: 

2263 Util.log("...but enough space if skip snap-coalesce") 

2264 candidate.setConfig(VDI.DB_LEAFCLSC, 

2265 VDI.LEAFCLSC_OFFLINE) 

2266 self.no_space_candidates[candidate.uuid] = candidate 

2267 

2268 return None 

2269 

2270 def gatherLeafCoalesceable(self, candidates): 

2271 for vdi in self.vdis.values(): 

2272 if not vdi.isLeafCoalesceable(): 

2273 continue 

2274 if vdi in self._failedCoalesceTargets: 

2275 continue 

2276 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2277 Util.log("Skipping reset-on-boot %s" % vdi) 

2278 continue 

2279 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2280 Util.log("Skipping allow_caching=true %s" % vdi) 

2281 continue 

2282 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2283 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2284 continue 

2285 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2286 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2287 continue 

2288 candidates.append(vdi) 

2289 

2290 def coalesce(self, vdi, dryRun=False): 

2291 """Coalesce vdi onto parent""" 

2292 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2293 if dryRun: 2293 ↛ 2294line 2293 didn't jump to line 2294, because the condition on line 2293 was never true

2294 return 

2295 

2296 try: 

2297 self._coalesce(vdi) 

2298 except util.SMException as e: 

2299 if isinstance(e, AbortException): 2299 ↛ 2300line 2299 didn't jump to line 2300, because the condition on line 2299 was never true

2300 self.cleanup() 

2301 raise 

2302 else: 

2303 self._failedCoalesceTargets.append(vdi) 

2304 Util.logException("coalesce") 

2305 Util.log("Coalesce failed, skipping") 

2306 self.cleanup() 

2307 

2308 def coalesceLeaf(self, vdi, dryRun=False): 

2309 """Leaf-coalesce vdi onto parent""" 

2310 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2311 if dryRun: 

2312 return 

2313 

2314 try: 

2315 uuid = vdi.uuid 

2316 try: 

2317 # "vdi" object will no longer be valid after this call 

2318 self._coalesceLeaf(vdi) 

2319 finally: 

2320 vdi = self.getVDI(uuid) 

2321 if vdi: 

2322 vdi.delConfig(vdi.DB_LEAFCLSC) 

2323 except AbortException: 

2324 self.cleanup() 

2325 raise 

2326 except (util.SMException, XenAPI.Failure) as e: 

2327 self._failedCoalesceTargets.append(vdi) 

2328 Util.logException("leaf-coalesce") 

2329 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2330 self.cleanup() 

2331 

2332 def garbageCollect(self, dryRun=False): 

2333 vdiList = self.findGarbage() 

2334 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2335 for vdi in vdiList: 

2336 Util.log(" %s" % vdi) 

2337 if not dryRun: 

2338 self.deleteVDIs(vdiList) 

2339 self.cleanupJournals(dryRun) 

2340 

2341 def findGarbage(self): 

2342 vdiList = [] 

2343 for vdi in self.vdiTrees: 

2344 vdiList.extend(vdi.getAllPrunable()) 

2345 return vdiList 

2346 

2347 def deleteVDIs(self, vdiList) -> None: 

2348 for vdi in vdiList: 

2349 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2350 raise AbortException("Aborting due to signal") 

2351 Util.log("Deleting unlinked VDI %s" % vdi) 

2352 self.deleteVDI(vdi) 

2353 

2354 def deleteVDI(self, vdi) -> None: 

2355 assert(len(vdi.children) == 0) 

2356 del self.vdis[vdi.uuid] 

2357 if vdi.parent: 2357 ↛ 2359line 2357 didn't jump to line 2359, because the condition on line 2357 was never false

2358 vdi.parent.children.remove(vdi) 

2359 if vdi in self.vdiTrees: 2359 ↛ 2360line 2359 didn't jump to line 2360, because the condition on line 2359 was never true

2360 self.vdiTrees.remove(vdi) 

2361 vdi.delete() 

2362 

2363 def forgetVDI(self, vdiUuid) -> None: 

2364 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2365 

2366 def pauseVDIs(self, vdiList) -> None: 

2367 paused = [] 

2368 failed = False 

2369 for vdi in vdiList: 

2370 try: 

2371 vdi.pause() 

2372 paused.append(vdi) 

2373 except: 

2374 Util.logException("pauseVDIs") 

2375 failed = True 

2376 break 

2377 

2378 if failed: 

2379 self.unpauseVDIs(paused) 

2380 raise util.SMException("Failed to pause VDIs") 

2381 

2382 def unpauseVDIs(self, vdiList): 

2383 failed = False 

2384 for vdi in vdiList: 

2385 try: 

2386 vdi.unpause() 

2387 except: 

2388 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2389 failed = True 

2390 if failed: 

2391 raise util.SMException("Failed to unpause VDIs") 

2392 

2393 def getFreeSpace(self) -> int: 

2394 return 0 

2395 

2396 def cleanup(self): 

2397 Util.log("In cleanup") 

2398 return 

2399 

2400 @override 

2401 def __str__(self) -> str: 

2402 if self.name: 

2403 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2404 else: 

2405 ret = "%s" % self.uuid 

2406 return ret 

2407 

2408 def lock(self): 

2409 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2410 signal to avoid deadlocking (trying to acquire the SR lock while the 

2411 lock is held by a process that is trying to abort us)""" 

2412 if not self._srLock: 

2413 return 

2414 

2415 if self._locked == 0: 

2416 abortFlag = IPCFlag(self.uuid) 

2417 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2418 if self._srLock.acquireNoblock(): 

2419 self._locked += 1 

2420 return 

2421 if abortFlag.test(FLAG_TYPE_ABORT): 

2422 raise AbortException("Abort requested") 

2423 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2424 raise util.SMException("Unable to acquire the SR lock") 

2425 

2426 self._locked += 1 

2427 

2428 def unlock(self): 

2429 if not self._srLock: 2429 ↛ 2431line 2429 didn't jump to line 2431, because the condition on line 2429 was never false

2430 return 

2431 assert(self._locked > 0) 

2432 self._locked -= 1 

2433 if self._locked == 0: 

2434 self._srLock.release() 

2435 

2436 def needUpdateBlockInfo(self) -> bool: 

2437 for vdi in self.vdis.values(): 

2438 if vdi.scanError or len(vdi.children) == 0: 

2439 continue 

2440 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2441 return True 

2442 return False 

2443 

2444 def updateBlockInfo(self) -> None: 

2445 for vdi in self.vdis.values(): 

2446 if vdi.scanError or len(vdi.children) == 0: 

2447 continue 

2448 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2449 vdi.updateBlockInfo() 

2450 

2451 def cleanupCoalesceJournals(self): 

2452 """Remove stale coalesce VDI indicators""" 

2453 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2454 for uuid, jval in entries.items(): 

2455 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2456 

2457 def cleanupJournals(self, dryRun=False): 

2458 """delete journal entries for non-existing VDIs""" 

2459 for t in [LVMVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2460 entries = self.journaler.getAll(t) 

2461 for uuid, jval in entries.items(): 

2462 if self.getVDI(uuid): 

2463 continue 

2464 if t == SR.JRN_CLONE: 

2465 baseUuid, clonUuid = jval.split("_") 

2466 if self.getVDI(baseUuid): 

2467 continue 

2468 Util.log(" Deleting stale '%s' journal entry for %s " 

2469 "(%s)" % (t, uuid, jval)) 

2470 if not dryRun: 

2471 self.journaler.remove(t, uuid) 

2472 

2473 def cleanupCache(self, maxAge=-1) -> int: 

2474 return 0 

2475 

2476 def _hasLeavesAttachedOn(self, vdi: VDI): 

2477 leaves = vdi.getAllLeaves() 

2478 leaves_vdi = [leaf.uuid for leaf in leaves] 

2479 return util.get_hosts_attached_on(self.xapi.session, leaves_vdi) 

2480 

2481 def _gc_running_file(self, vdi: VDI): 

2482 run_file = "gc_running_{}".format(vdi.uuid) 

2483 return os.path.join(NON_PERSISTENT_DIR, str(self.uuid), run_file) 

2484 

2485 def _create_running_file(self, vdi: VDI): 

2486 with open(self._gc_running_file(vdi), "w") as f: 

2487 f.write("1") 

2488 

2489 def _delete_running_file(self, vdi: VDI): 

2490 os.unlink(self._gc_running_file(vdi)) 

2491 

2492 def _coalesce(self, vdi: VDI): 

2493 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2493 ↛ 2496line 2493 didn't jump to line 2496, because the condition on line 2493 was never true

2494 # this means we had done the actual coalescing already and just 

2495 # need to finish relinking and/or refreshing the children 

2496 Util.log("==> Coalesce apparently already done: skipping") 

2497 else: 

2498 # JRN_COALESCE is used to check which VDI is being coalesced in 

2499 # order to decide whether to abort the coalesce. We remove the 

2500 # journal as soon as the COW coalesce step is done, because we 

2501 # don't expect the rest of the process to take long 

2502 

2503 if os.path.exists(self._gc_running_file(vdi)): 2503 ↛ 2504line 2503 didn't jump to line 2504, because the condition on line 2503 was never true

2504 util.SMlog("gc_running already exist for {}. Ignoring...".format(self.uuid)) 

2505 

2506 self._create_running_file(vdi) 

2507 

2508 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2509 host_refs = self._hasLeavesAttachedOn(vdi) 

2510 #TODO: this check of multiple host_refs should be done earlier in `is_coalesceable` to avoid stopping this late every time 

2511 if len(host_refs) > 1: 2511 ↛ 2512line 2511 didn't jump to line 2512, because the condition on line 2511 was never true

2512 util.SMlog("Not coalesceable, chain activated more than once") 

2513 raise Exception("Not coalesceable, chain activated more than once") #TODO: Use correct error 

2514 

2515 try: 

2516 if host_refs and vdi.cowutil.isCoalesceableOnRemote(): 2516 ↛ 2518line 2516 didn't jump to line 2518, because the condition on line 2516 was never true

2517 #Leaf opened on another host, we need to call online coalesce 

2518 util.SMlog("Remote coalesce for {}".format(vdi.path)) 

2519 vdi._doCoalesceOnHost(list(host_refs)[0]) 

2520 else: 

2521 util.SMlog("Offline coalesce for {}".format(vdi.path)) 

2522 vdi._doCoalesce() 

2523 except Exception as e: 

2524 util.SMlog("EXCEPTION while coalescing: {}".format(e)) 

2525 self._delete_running_file(vdi) 

2526 raise 

2527 

2528 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2529 self._delete_running_file(vdi) 

2530 

2531 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2532 

2533 # we now need to relink the children: lock the SR to prevent ops 

2534 # like SM.clone from manipulating the VDIs we'll be relinking and 

2535 # rescan the SR first in case the children changed since the last 

2536 # scan 

2537 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2538 

2539 self.lock() 

2540 try: 

2541 vdi.parent._tagChildrenForRelink() 

2542 self.scan() 

2543 vdi._relinkSkip() #TODO: We could check if the parent is already the right one before doing the relink, or we could do the relink a second time, it doesn't seem to cause issues 

2544 finally: 

2545 self.unlock() 

2546 # Reload the children to leave things consistent 

2547 vdi.parent._reloadChildren(vdi) 

2548 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2549 

2550 self.deleteVDI(vdi) 

2551 

2552 class CoalesceTracker: 

2553 GRACE_ITERATIONS = 2 

2554 MAX_ITERATIONS_NO_PROGRESS = 3 

2555 MAX_ITERATIONS = 10 

2556 MAX_INCREASE_FROM_MINIMUM = 1.2 

2557 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2558 " --> Final size {finSize}" 

2559 

2560 def __init__(self, sr): 

2561 self.itsNoProgress = 0 

2562 self.its = 0 

2563 self.minSize = float("inf") 

2564 self.history = [] 

2565 self.reason = "" 

2566 self.startSize = None 

2567 self.finishSize = None 

2568 self.sr = sr 

2569 self.grace_remaining = self.GRACE_ITERATIONS 

2570 

2571 def abortCoalesce(self, prevSize, curSize): 

2572 self.its += 1 

2573 self.history.append(self.HISTORY_STRING.format(its=self.its, 

2574 initSize=prevSize, 

2575 finSize=curSize)) 

2576 

2577 self.finishSize = curSize 

2578 

2579 if self.startSize is None: 

2580 self.startSize = prevSize 

2581 

2582 if curSize < self.minSize: 

2583 self.minSize = curSize 

2584 

2585 if prevSize < self.minSize: 

2586 self.minSize = prevSize 

2587 

2588 if self.its == 1: 

2589 # Skip evaluating conditions on first iteration 

2590 return False 

2591 

2592 if prevSize < curSize: 

2593 self.itsNoProgress += 1 

2594 Util.log("No progress, attempt:" 

2595 " {attempt}".format(attempt=self.itsNoProgress)) 

2596 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2597 else: 

2598 # We made progress 

2599 return False 

2600 

2601 if self.its > self.MAX_ITERATIONS: 

2602 max = self.MAX_ITERATIONS 

2603 self.reason = \ 

2604 "Max iterations ({max}) exceeded".format(max=max) 

2605 return True 

2606 

2607 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS: 

2608 max = self.MAX_ITERATIONS_NO_PROGRESS 

2609 self.reason = \ 

2610 "No progress made for {max} iterations".format(max=max) 

2611 return True 

2612 

2613 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2614 if curSize > maxSizeFromMin: 

2615 self.grace_remaining -= 1 

2616 if self.grace_remaining == 0: 

2617 self.reason = "Unexpected bump in size," \ 

2618 " compared to minimum achieved" 

2619 

2620 return True 

2621 

2622 return False 

2623 

2624 def printSizes(self): 

2625 Util.log("Starting size was {size}" 

2626 .format(size=self.startSize)) 

2627 Util.log("Final size was {size}" 

2628 .format(size=self.finishSize)) 

2629 Util.log("Minimum size achieved was {size}" 

2630 .format(size=self.minSize)) 

2631 

2632 def printReasoning(self): 

2633 Util.log("Aborted coalesce") 

2634 for hist in self.history: 

2635 Util.log(hist) 

2636 Util.log(self.reason) 

2637 self.printSizes() 

2638 

2639 def printSummary(self): 

2640 if self.its == 0: 

2641 return 

2642 

2643 if self.reason: 2643 ↛ 2644line 2643 didn't jump to line 2644, because the condition on line 2643 was never true

2644 Util.log("Aborted coalesce") 

2645 Util.log(self.reason) 

2646 else: 

2647 Util.log("Coalesce summary") 

2648 

2649 Util.log(f"Performed {self.its} iterations") 

2650 self.printSizes() 

2651 

2652 

2653 def _coalesceLeaf(self, vdi): 

2654 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2655 complete due to external changes, namely vdi_delete and vdi_snapshot 

2656 that alter leaf-coalescibility of vdi""" 

2657 tracker = self.CoalesceTracker(self) 

2658 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2659 prevSizePhys = vdi.getSizePhys() 

2660 if not self._snapshotCoalesce(vdi): 2660 ↛ 2661line 2660 didn't jump to line 2661, because the condition on line 2660 was never true

2661 return False 

2662 if tracker.abortCoalesce(prevSizePhys, vdi.getSizePhys()): 

2663 tracker.printReasoning() 

2664 raise util.SMException("VDI {uuid} could not be coalesced" 

2665 .format(uuid=vdi.uuid)) 

2666 tracker.printSummary() 

2667 return self._liveLeafCoalesce(vdi) 

2668 

2669 def calcStorageSpeed(self, startTime, endTime, coalescedSize): 

2670 speed = None 

2671 total_time = endTime - startTime 

2672 if total_time > 0: 

2673 speed = float(coalescedSize) / float(total_time) 

2674 return speed 

2675 

2676 def writeSpeedToFile(self, speed): 

2677 content = [] 

2678 speedFile = None 

2679 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2680 self.lock() 

2681 try: 

2682 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2683 lines = "" 

2684 if not os.path.isfile(path): 

2685 lines = str(speed) + "\n" 

2686 else: 

2687 speedFile = open(path, "r+") 

2688 content = speedFile.readlines() 

2689 content.append(str(speed) + "\n") 

2690 if len(content) > N_RUNNING_AVERAGE: 

2691 del content[0] 

2692 lines = "".join(content) 

2693 

2694 util.atomicFileWrite(path, VAR_RUN, lines) 

2695 finally: 

2696 if speedFile is not None: 

2697 speedFile.close() 

2698 Util.log("Closing file: {myfile}".format(myfile=path)) 

2699 self.unlock() 

2700 

2701 def recordStorageSpeed(self, startTime, endTime, coalescedSize): 

2702 speed = self.calcStorageSpeed(startTime, endTime, coalescedSize) 

2703 if speed is None: 

2704 return 

2705 

2706 self.writeSpeedToFile(speed) 

2707 

2708 def getStorageSpeed(self): 

2709 speedFile = None 

2710 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2711 self.lock() 

2712 try: 

2713 speed = None 

2714 if os.path.isfile(path): 

2715 speedFile = open(path) 

2716 content = speedFile.readlines() 

2717 try: 

2718 content = [float(i) for i in content] 

2719 except ValueError: 

2720 Util.log("Something bad in the speed log:{log}". 

2721 format(log=speedFile.readlines())) 

2722 return speed 

2723 

2724 if len(content): 

2725 speed = sum(content) / float(len(content)) 

2726 if speed <= 0: 2726 ↛ 2728line 2726 didn't jump to line 2728, because the condition on line 2726 was never true

2727 # Defensive, should be impossible. 

2728 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2729 format(speed=speed, uuid=self.uuid)) 

2730 speed = None 

2731 else: 

2732 Util.log("Speed file empty for SR: {uuid}". 

2733 format(uuid=self.uuid)) 

2734 else: 

2735 Util.log("Speed log missing for SR: {uuid}". 

2736 format(uuid=self.uuid)) 

2737 return speed 

2738 finally: 

2739 if not (speedFile is None): 

2740 speedFile.close() 

2741 self.unlock() 

2742 

2743 def _snapshotCoalesce(self, vdi): 

2744 # Note that because we are not holding any locks here, concurrent SM 

2745 # operations may change this tree under our feet. In particular, vdi 

2746 # can be deleted, or it can be snapshotted. 

2747 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2748 Util.log("Single-snapshotting %s" % vdi) 

2749 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2750 try: 

2751 ret = self.xapi.singleSnapshotVDI(vdi) 

2752 Util.log("Single-snapshot returned: %s" % ret) 

2753 except XenAPI.Failure as e: 

2754 if util.isInvalidVDI(e): 

2755 Util.log("The VDI appears to have been concurrently deleted") 

2756 return False 

2757 raise 

2758 self.scanLocked() 

2759 tempSnap = vdi.parent 

2760 if not tempSnap.isCoalesceable(): 

2761 Util.log("The VDI appears to have been concurrently snapshotted") 

2762 return False 

2763 Util.log("Coalescing parent %s" % tempSnap) 

2764 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2765 sizePhys = vdi.getSizePhys() 

2766 self._coalesce(tempSnap) 

2767 if not vdi.isLeafCoalesceable(): 

2768 Util.log("The VDI tree appears to have been altered since") 

2769 return False 

2770 return True 

2771 

2772 def _liveLeafCoalesce(self, vdi: VDI) -> bool: 

2773 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2774 self.lock() 

2775 try: 

2776 self.scan() 

2777 if not self.getVDI(vdi.uuid): 

2778 Util.log("The VDI appears to have been deleted meanwhile") 

2779 return False 

2780 if not vdi.isLeafCoalesceable(): 

2781 Util.log("The VDI is no longer leaf-coalesceable") 

2782 return False 

2783 

2784 uuid = vdi.uuid 

2785 vdi.pause(failfast=True) 

2786 try: 

2787 try: 

2788 # "vdi" object will no longer be valid after this call 

2789 self._create_running_file(vdi) 

2790 self._doCoalesceLeaf(vdi) 

2791 except: 

2792 Util.logException("_doCoalesceLeaf") 

2793 self._handleInterruptedCoalesceLeaf() 

2794 raise 

2795 finally: 

2796 vdi = self.getVDI(uuid) 

2797 if vdi: 

2798 vdi.ensureUnpaused() 

2799 self._delete_running_file(vdi) 

2800 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2801 if vdiOld: 

2802 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2803 self.deleteVDI(vdiOld) 

2804 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2805 finally: 

2806 self.cleanup() 

2807 self.unlock() 

2808 self.logFilter.logState() 

2809 return True 

2810 

2811 def _doCoalesceLeaf(self, vdi: VDI): 

2812 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2813 offline/atomic context""" 

2814 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2815 self._prepareCoalesceLeaf(vdi) 

2816 vdi.parent._setHidden(False) 

2817 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2818 vdi.validate(True) 

2819 vdi.parent.validate(True) 

2820 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2821 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2822 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 2822 ↛ 2823line 2822 didn't jump to line 2823, because the condition on line 2822 was never true

2823 Util.log("Leaf-coalesce forced, will not use timeout") 

2824 timeout = 0 

2825 vdi._coalesceCowImage(timeout) 

2826 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2827 vdi.parent.validate(True) 

2828 #vdi._verifyContents(timeout / 2) 

2829 

2830 # rename 

2831 vdiUuid = vdi.uuid 

2832 oldName = vdi.fileName 

2833 origParentUuid = vdi.parent.uuid 

2834 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2835 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2836 vdi.parent.rename(vdiUuid) 

2837 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2838 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2839 

2840 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2841 # garbage 

2842 

2843 # update the VDI record 

2844 if vdi.parent.vdi_type == VdiType.RAW: 2844 ↛ 2845line 2844 didn't jump to line 2845, because the condition on line 2844 was never true

2845 vdi.parent.setConfig(VDI.DB_VDI_TYPE, VdiType.RAW) 

2846 vdi.parent.delConfig(VDI.DB_VDI_BLOCKS) 

2847 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2848 

2849 self._updateNode(vdi) 

2850 

2851 # delete the obsolete leaf & inflate the parent (in that order, to 

2852 # minimize free space requirements) 

2853 parent = vdi.parent 

2854 vdi._setHidden(True) 

2855 vdi.parent.children = [] 

2856 vdi.parent = None 

2857 

2858 if parent.parent is None: 

2859 parent.delConfig(VDI.DB_VDI_PARENT) 

2860 

2861 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2862 freeSpace = self.getFreeSpace() 

2863 if freeSpace < extraSpace: 2863 ↛ 2866line 2863 didn't jump to line 2866, because the condition on line 2863 was never true

2864 # don't delete unless we need the space: deletion is time-consuming 

2865 # because it requires contacting the slaves, and we're paused here 

2866 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2867 self.deleteVDI(vdi) 

2868 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2869 

2870 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2871 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2872 

2873 self.forgetVDI(origParentUuid) 

2874 self._finishCoalesceLeaf(parent) 

2875 self._updateSlavesOnResize(parent) 

2876 

2877 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

2878 assert(VdiType.isCowImage(parent.vdi_type)) 

2879 extra = child.getSizePhys() - parent.getSizePhys() 

2880 if extra < 0: 2880 ↛ 2881line 2880 didn't jump to line 2881, because the condition on line 2880 was never true

2881 extra = 0 

2882 return extra 

2883 

2884 def _prepareCoalesceLeaf(self, vdi) -> None: 

2885 pass 

2886 

2887 def _updateNode(self, vdi) -> None: 

2888 pass 

2889 

2890 def _finishCoalesceLeaf(self, parent) -> None: 

2891 pass 

2892 

2893 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

2894 pass 

2895 

2896 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: 

2897 pass 

2898 

2899 def _updateSlavesOnResize(self, vdi) -> None: 

2900 pass 

2901 

2902 def _removeStaleVDIs(self, uuidsPresent) -> None: 

2903 for uuid in list(self.vdis.keys()): 

2904 if not uuid in uuidsPresent: 

2905 Util.log("VDI %s disappeared since last scan" % \ 

2906 self.vdis[uuid]) 

2907 del self.vdis[uuid] 

2908 

2909 def _handleInterruptedCoalesceLeaf(self) -> None: 

2910 """An interrupted leaf-coalesce operation may leave the COW tree in an 

2911 inconsistent state. If the old-leaf VDI is still present, we revert the 

2912 operation (in case the original error is persistent); otherwise we must 

2913 finish the operation""" 

2914 pass 

2915 

2916 def _buildTree(self, force): 

2917 self.vdiTrees = [] 

2918 for vdi in self.vdis.values(): 

2919 if vdi.parentUuid: 

2920 parent = self.getVDI(vdi.parentUuid) 

2921 if not parent: 

2922 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2923 self.vdiTrees.append(vdi) 

2924 continue 

2925 if force: 

2926 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2927 (vdi.parentUuid, vdi.uuid)) 

2928 self.vdiTrees.append(vdi) 

2929 continue 

2930 else: 

2931 raise util.SMException("Parent VDI %s of %s not " \ 

2932 "found" % (vdi.parentUuid, vdi.uuid)) 

2933 vdi.parent = parent 

2934 parent.children.append(vdi) 

2935 else: 

2936 self.vdiTrees.append(vdi) 

2937 

2938 

2939class FileSR(SR): 

2940 TYPE = SR.TYPE_FILE 

2941 CACHE_FILE_EXT = ".vhdcache" 

2942 # cache cleanup actions 

2943 CACHE_ACTION_KEEP = 0 

2944 CACHE_ACTION_REMOVE = 1 

2945 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2946 

2947 def __init__(self, uuid, xapi, createLock, force): 

2948 SR.__init__(self, uuid, xapi, createLock, force) 

2949 self.path = "/var/run/sr-mount/%s" % self.uuid 

2950 self.journaler = fjournaler.Journaler(self.path) 

2951 

2952 @override 

2953 def scan(self, force=False) -> None: 

2954 if not util.pathexists(self.path): 

2955 raise util.SMException("directory %s not found!" % self.uuid) 

2956 

2957 uuidsPresent: List[str] = [] 

2958 

2959 for vdi_type in VDI_COW_TYPES: 

2960 scan_result = self._scan(vdi_type, force) 

2961 for uuid, image_info in scan_result.items(): 

2962 vdi = self.getVDI(uuid) 

2963 if not vdi: 

2964 self.logFilter.logNewVDI(uuid) 

2965 vdi = FileVDI(self, uuid, vdi_type) 

2966 self.vdis[uuid] = vdi 

2967 vdi.load(image_info) 

2968 uuidsPresent.extend(scan_result.keys()) 

2969 

2970 rawList = [x for x in os.listdir(self.path) if x.endswith(VdiTypeExtension.RAW)] 

2971 for rawName in rawList: 

2972 uuid = FileVDI.extractUuid(rawName) 

2973 uuidsPresent.append(uuid) 

2974 vdi = self.getVDI(uuid) 

2975 if not vdi: 

2976 self.logFilter.logNewVDI(uuid) 

2977 vdi = FileVDI(self, uuid, VdiType.RAW) 

2978 self.vdis[uuid] = vdi 

2979 self._removeStaleVDIs(uuidsPresent) 

2980 self._buildTree(force) 

2981 self.logFilter.logState() 

2982 self._handleInterruptedCoalesceLeaf() 

2983 

2984 @override 

2985 def getFreeSpace(self) -> int: 

2986 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2987 

2988 @override 

2989 def deleteVDIs(self, vdiList) -> None: 

2990 rootDeleted = False 

2991 for vdi in vdiList: 

2992 if not vdi.parent: 

2993 rootDeleted = True 

2994 break 

2995 SR.deleteVDIs(self, vdiList) 

2996 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

2997 self.xapi.markCacheSRsDirty() 

2998 

2999 @override 

3000 def cleanupCache(self, maxAge=-1) -> int: 

3001 """Clean up IntelliCache cache files. Caches for leaf nodes are 

3002 removed when the leaf node no longer exists or its allow-caching 

3003 attribute is not set. Caches for parent nodes are removed when the 

3004 parent node no longer exists or it hasn't been used in more than 

3005 <maxAge> hours. 

3006 Return number of caches removed. 

3007 """ 

3008 numRemoved = 0 

3009 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

3010 Util.log("Found %d cache files" % len(cacheFiles)) 

3011 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

3012 for cacheFile in cacheFiles: 

3013 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

3014 action = self.CACHE_ACTION_KEEP 

3015 rec = self.xapi.getRecordVDI(uuid) 

3016 if not rec: 

3017 Util.log("Cache %s: VDI doesn't exist" % uuid) 

3018 action = self.CACHE_ACTION_REMOVE 

3019 elif rec["managed"] and not rec["allow_caching"]: 

3020 Util.log("Cache %s: caching disabled" % uuid) 

3021 action = self.CACHE_ACTION_REMOVE 

3022 elif not rec["managed"] and maxAge >= 0: 

3023 lastAccess = datetime.datetime.fromtimestamp( \ 

3024 os.path.getatime(os.path.join(self.path, cacheFile))) 

3025 if lastAccess < cutoff: 

3026 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

3027 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

3028 

3029 if action == self.CACHE_ACTION_KEEP: 

3030 Util.log("Keeping cache %s" % uuid) 

3031 continue 

3032 

3033 lockId = uuid 

3034 parentUuid = None 

3035 if rec and rec["managed"]: 

3036 parentUuid = rec["sm_config"].get("vhd-parent") 

3037 if parentUuid: 

3038 lockId = parentUuid 

3039 

3040 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

3041 cacheLock.acquire() 

3042 try: 

3043 if self._cleanupCache(uuid, action): 

3044 numRemoved += 1 

3045 finally: 

3046 cacheLock.release() 

3047 return numRemoved 

3048 

3049 def _cleanupCache(self, uuid, action): 

3050 assert(action != self.CACHE_ACTION_KEEP) 

3051 rec = self.xapi.getRecordVDI(uuid) 

3052 if rec and rec["allow_caching"]: 

3053 Util.log("Cache %s appears to have become valid" % uuid) 

3054 return False 

3055 

3056 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

3057 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

3058 if tapdisk: 

3059 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

3060 Util.log("Cache %s still in use" % uuid) 

3061 return False 

3062 Util.log("Shutting down tapdisk for %s" % fullPath) 

3063 tapdisk.shutdown() 

3064 

3065 Util.log("Deleting file %s" % fullPath) 

3066 os.unlink(fullPath) 

3067 return True 

3068 

3069 def _isCacheFileName(self, name): 

3070 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

3071 name.endswith(self.CACHE_FILE_EXT) 

3072 

3073 def _scan(self, vdi_type, force): 

3074 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3075 error = False 

3076 pattern = os.path.join(self.path, "*%s" % VDI_TYPE_TO_EXTENSION[vdi_type]) 

3077 scan_result = getCowUtil(vdi_type).getAllInfoFromVG(pattern, FileVDI.extractUuid) 

3078 for uuid, vdiInfo in scan_result.items(): 

3079 if vdiInfo.error: 

3080 error = True 

3081 break 

3082 if not error: 

3083 return scan_result 

3084 Util.log("Scan error on attempt %d" % i) 

3085 if force: 

3086 return scan_result 

3087 raise util.SMException("Scan error") 

3088 

3089 @override 

3090 def deleteVDI(self, vdi) -> None: 

3091 self._checkSlaves(vdi) 

3092 SR.deleteVDI(self, vdi) 

3093 

3094 def _checkSlaves(self, vdi): 

3095 onlineHosts = self.xapi.getOnlineHosts() 

3096 abortFlag = IPCFlag(self.uuid) 

3097 for pbdRecord in self.xapi.getAttachedPBDs(): 

3098 hostRef = pbdRecord["host"] 

3099 if hostRef == self.xapi._hostRef: 

3100 continue 

3101 if abortFlag.test(FLAG_TYPE_ABORT): 

3102 raise AbortException("Aborting due to signal") 

3103 try: 

3104 self._checkSlave(hostRef, vdi) 

3105 except util.CommandException: 

3106 if hostRef in onlineHosts: 

3107 raise 

3108 

3109 def _checkSlave(self, hostRef, vdi): 

3110 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

3111 Util.log("Checking with slave: %s" % repr(call)) 

3112 _host = self.xapi.session.xenapi.host 

3113 text = _host.call_plugin( * call) 

3114 

3115 @override 

3116 def _handleInterruptedCoalesceLeaf(self) -> None: 

3117 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3118 for uuid, parentUuid in entries.items(): 

3119 fileList = os.listdir(self.path) 

3120 childName = uuid + VdiTypeExtension.VHD 

3121 tmpChildName = self.TMP_RENAME_PREFIX + uuid + VdiTypeExtension.VHD 

3122 parentName1 = parentUuid + VdiTypeExtension.VHD 

3123 parentName2 = parentUuid + VdiTypeExtension.RAW 

3124 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

3125 if parentPresent or tmpChildName in fileList: 

3126 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3127 else: 

3128 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3129 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3130 vdi = self.getVDI(uuid) 

3131 if vdi: 

3132 vdi.ensureUnpaused() 

3133 

3134 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3135 Util.log("*** UNDO LEAF-COALESCE") 

3136 parent = self.getVDI(parentUuid) 

3137 if not parent: 

3138 parent = self.getVDI(childUuid) 

3139 if not parent: 

3140 raise util.SMException("Neither %s nor %s found" % \ 

3141 (parentUuid, childUuid)) 

3142 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3143 parent.rename(parentUuid) 

3144 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3145 

3146 child = self.getVDI(childUuid) 

3147 if not child: 

3148 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3149 if not child: 

3150 raise util.SMException("Neither %s nor %s found" % \ 

3151 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3152 Util.log("Renaming child back to %s" % childUuid) 

3153 child.rename(childUuid) 

3154 Util.log("Updating the VDI record") 

3155 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3156 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3157 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3158 

3159 if child.isHidden(): 

3160 child._setHidden(False) 

3161 if not parent.isHidden(): 

3162 parent._setHidden(True) 

3163 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3164 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3165 Util.log("*** leaf-coalesce undo successful") 

3166 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3167 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3168 

3169 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3170 Util.log("*** FINISH LEAF-COALESCE") 

3171 vdi = self.getVDI(childUuid) 

3172 if not vdi: 

3173 Util.log(f"_finishInterruptedCoalesceLeaf, vdi {childUuid} not found, aborting") 

3174 raise util.SMException("VDI %s not found" % childUuid) 

3175 try: 

3176 self.forgetVDI(parentUuid) 

3177 except XenAPI.Failure: 

3178 Util.logException('_finishInterruptedCoalesceLeaf') 

3179 pass 

3180 self._updateSlavesOnResize(vdi) 

3181 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3182 Util.log("*** finished leaf-coalesce successfully") 

3183 

3184 

3185class LVMSR(SR): 

3186 TYPE = SR.TYPE_LVHD 

3187 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

3188 

3189 def __init__(self, uuid, xapi, createLock, force): 

3190 SR.__init__(self, uuid, xapi, createLock, force) 

3191 self.vgName = "%s%s" % (VG_PREFIX, self.uuid) 

3192 self.path = os.path.join(VG_LOCATION, self.vgName) 

3193 

3194 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) 

3195 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) 

3196 lvm_conf = other_conf.get('lvm-conf') if other_conf else None 

3197 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) 

3198 

3199 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

3200 self.journaler = journaler.Journaler(self.lvmCache) 

3201 

3202 @override 

3203 def deleteVDI(self, vdi) -> None: 

3204 if self.lvActivator.get(vdi.uuid, False): 

3205 self.lvActivator.deactivate(vdi.uuid, False) 

3206 self._checkSlaves(vdi) 

3207 SR.deleteVDI(self, vdi) 

3208 

3209 @override 

3210 def forgetVDI(self, vdiUuid) -> None: 

3211 SR.forgetVDI(self, vdiUuid) 

3212 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

3213 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

3214 

3215 @override 

3216 def getFreeSpace(self) -> int: 

3217 stats = lvutil._getVGstats(self.vgName) 

3218 return stats['physical_size'] - stats['physical_utilisation'] 

3219 

3220 @override 

3221 def cleanup(self): 

3222 if not self.lvActivator.deactivateAll(): 

3223 Util.log("ERROR deactivating LVs while cleaning up") 

3224 

3225 @override 

3226 def needUpdateBlockInfo(self) -> bool: 

3227 for vdi in self.vdis.values(): 

3228 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3229 continue 

3230 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3231 return True 

3232 return False 

3233 

3234 @override 

3235 def updateBlockInfo(self) -> None: 

3236 numUpdated = 0 

3237 for vdi in self.vdis.values(): 

3238 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3239 continue 

3240 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3241 vdi.updateBlockInfo() 

3242 numUpdated += 1 

3243 if numUpdated: 

3244 # deactivate the LVs back sooner rather than later. If we don't 

3245 # now, by the time this thread gets to deactivations, another one 

3246 # might have leaf-coalesced a node and deleted it, making the child 

3247 # inherit the refcount value and preventing the correct decrement 

3248 self.cleanup() 

3249 

3250 @override 

3251 def scan(self, force=False) -> None: 

3252 vdis = self._scan(force) 

3253 for uuid, vdiInfo in vdis.items(): 

3254 vdi = self.getVDI(uuid) 

3255 if not vdi: 

3256 self.logFilter.logNewVDI(uuid) 

3257 vdi = LVMVDI(self, uuid, vdiInfo.vdiType) 

3258 self.vdis[uuid] = vdi 

3259 vdi.load(vdiInfo) 

3260 self._removeStaleVDIs(vdis.keys()) 

3261 self._buildTree(force) 

3262 self.logFilter.logState() 

3263 self._handleInterruptedCoalesceLeaf() 

3264 

3265 def _scan(self, force): 

3266 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3267 error = False 

3268 self.lvmCache.refresh() 

3269 vdis = LvmCowUtil.getVDIInfo(self.lvmCache) 

3270 for uuid, vdiInfo in vdis.items(): 

3271 if vdiInfo.scanError: 

3272 error = True 

3273 break 

3274 if not error: 

3275 return vdis 

3276 Util.log("Scan error, retrying (%d)" % i) 

3277 if force: 

3278 return vdis 

3279 raise util.SMException("Scan error") 

3280 

3281 @override 

3282 def _removeStaleVDIs(self, uuidsPresent) -> None: 

3283 for uuid in list(self.vdis.keys()): 

3284 if not uuid in uuidsPresent: 

3285 Util.log("VDI %s disappeared since last scan" % \ 

3286 self.vdis[uuid]) 

3287 del self.vdis[uuid] 

3288 if self.lvActivator.get(uuid, False): 

3289 self.lvActivator.remove(uuid, False) 

3290 

3291 @override 

3292 def _liveLeafCoalesce(self, vdi) -> bool: 

3293 """If the parent is raw and the child was resized (virt. size), then 

3294 we'll need to resize the parent, which can take a while due to zeroing 

3295 out of the extended portion of the LV. Do it before pausing the child 

3296 to avoid a protracted downtime""" 

3297 if not VdiType.isCowImage(vdi.parent.vdi_type) and vdi.sizeVirt > vdi.parent.sizeVirt: 

3298 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3299 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

3300 

3301 return SR._liveLeafCoalesce(self, vdi) 

3302 

3303 @override 

3304 def _prepareCoalesceLeaf(self, vdi) -> None: 

3305 vdi._activateChain() 

3306 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3307 vdi.deflate() 

3308 vdi.inflateParentForCoalesce() 

3309 

3310 @override 

3311 def _updateNode(self, vdi) -> None: 

3312 # fix the refcounts: the remaining node should inherit the binary 

3313 # refcount from the leaf (because if it was online, it should remain 

3314 # refcounted as such), but the normal refcount from the parent (because 

3315 # this node is really the parent node) - minus 1 if it is online (since 

3316 # non-leaf nodes increment their normal counts when they are online and 

3317 # we are now a leaf, storing that 1 in the binary refcount). 

3318 ns = NS_PREFIX_LVM + self.uuid 

3319 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

3320 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

3321 pCnt = pCnt - cBcnt 

3322 assert(pCnt >= 0) 

3323 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

3324 

3325 @override 

3326 def _finishCoalesceLeaf(self, parent) -> None: 

3327 if not parent.isSnapshot() or parent.isAttachedRW(): 

3328 parent.inflateFully() 

3329 else: 

3330 parent.deflate() 

3331 

3332 @override 

3333 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3334 return parent.lvmcowutil.calcVolumeSize(parent.sizeVirt) - parent.sizeLV 

3335 

3336 @override 

3337 def _handleInterruptedCoalesceLeaf(self) -> None: 

3338 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3339 for uuid, parentUuid in entries.items(): 

3340 undo = False 

3341 for prefix in LV_PREFIX.values(): 

3342 parentLV = prefix + parentUuid 

3343 undo = self.lvmCache.checkLV(parentLV) 

3344 if undo: 

3345 break 

3346 

3347 if not undo: 

3348 for prefix in LV_PREFIX.values(): 

3349 tmpChildLV = prefix + uuid 

3350 undo = self.lvmCache.checkLV(tmpChildLV) 

3351 if undo: 

3352 break 

3353 

3354 if undo: 

3355 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3356 else: 

3357 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3358 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3359 vdi = self.getVDI(uuid) 

3360 if vdi: 

3361 vdi.ensureUnpaused() 

3362 

3363 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3364 Util.log("*** UNDO LEAF-COALESCE") 

3365 parent = self.getVDI(parentUuid) 

3366 if not parent: 

3367 parent = self.getVDI(childUuid) 

3368 if not parent: 

3369 raise util.SMException("Neither %s nor %s found" % \ 

3370 (parentUuid, childUuid)) 

3371 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3372 parent.rename(parentUuid) 

3373 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3374 

3375 child = self.getVDI(childUuid) 

3376 if not child: 

3377 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3378 if not child: 

3379 raise util.SMException("Neither %s nor %s found" % \ 

3380 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3381 Util.log("Renaming child back to %s" % childUuid) 

3382 child.rename(childUuid) 

3383 Util.log("Updating the VDI record") 

3384 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3385 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3386 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3387 

3388 # refcount (best effort - assume that it had succeeded if the 

3389 # second rename succeeded; if not, this adjustment will be wrong, 

3390 # leading to a non-deactivation of the LV) 

3391 ns = NS_PREFIX_LVM + self.uuid 

3392 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3393 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3394 pCnt = pCnt + cBcnt 

3395 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3396 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3397 

3398 parent.deflate() 

3399 child.inflateFully() 

3400 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3401 if child.isHidden(): 

3402 child._setHidden(False) 

3403 if not parent.isHidden(): 

3404 parent._setHidden(True) 

3405 if not parent.lvReadonly: 

3406 self.lvmCache.setReadonly(parent.fileName, True) 

3407 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3408 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3409 Util.log("*** leaf-coalesce undo successful") 

3410 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3411 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3412 

3413 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3414 Util.log("*** FINISH LEAF-COALESCE") 

3415 vdi = self.getVDI(childUuid) 

3416 if not vdi: 

3417 raise util.SMException("VDI %s not found" % childUuid) 

3418 vdi.inflateFully() 

3419 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3420 try: 

3421 self.forgetVDI(parentUuid) 

3422 except XenAPI.Failure: 

3423 pass 

3424 self._updateSlavesOnResize(vdi) 

3425 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3426 Util.log("*** finished leaf-coalesce successfully") 

3427 

3428 def _checkSlaves(self, vdi): 

3429 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3430 try to check all slaves, including those that the Agent believes are 

3431 offline, but ignore failures for offline hosts. This is to avoid cases 

3432 where the Agent thinks a host is offline but the host is up.""" 

3433 args = {"vgName": self.vgName, 

3434 "action1": "deactivateNoRefcount", 

3435 "lvName1": vdi.fileName, 

3436 "action2": "cleanupLockAndRefcount", 

3437 "uuid2": vdi.uuid, 

3438 "ns2": NS_PREFIX_LVM + self.uuid} 

3439 onlineHosts = self.xapi.getOnlineHosts() 

3440 abortFlag = IPCFlag(self.uuid) 

3441 for pbdRecord in self.xapi.getAttachedPBDs(): 

3442 hostRef = pbdRecord["host"] 

3443 if hostRef == self.xapi._hostRef: 

3444 continue 

3445 if abortFlag.test(FLAG_TYPE_ABORT): 

3446 raise AbortException("Aborting due to signal") 

3447 Util.log("Checking with slave %s (path %s)" % ( 

3448 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3449 try: 

3450 self.xapi.ensureInactive(hostRef, args) 

3451 except XenAPI.Failure: 

3452 if hostRef in onlineHosts: 

3453 raise 

3454 

3455 @override 

3456 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

3457 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3458 if not slaves: 

3459 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3460 child) 

3461 return 

3462 

3463 tmpName = child.vdi_type + self.TMP_RENAME_PREFIX + child.uuid 

3464 args = {"vgName": self.vgName, 

3465 "action1": "deactivateNoRefcount", 

3466 "lvName1": tmpName, 

3467 "action2": "deactivateNoRefcount", 

3468 "lvName2": child.fileName, 

3469 "action3": "refresh", 

3470 "lvName3": child.fileName, 

3471 "action4": "refresh", 

3472 "lvName4": parent.fileName} 

3473 for slave in slaves: 

3474 Util.log("Updating %s, %s, %s on slave %s" % \ 

3475 (tmpName, child.fileName, parent.fileName, 

3476 self.xapi.getRecordHost(slave)['hostname'])) 

3477 text = self.xapi.session.xenapi.host.call_plugin( \ 

3478 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3479 Util.log("call-plugin returned: '%s'" % text) 

3480 

3481 @override 

3482 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: 

3483 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3484 if not slaves: 

3485 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3486 return 

3487 

3488 args = {"vgName": self.vgName, 

3489 "action1": "deactivateNoRefcount", 

3490 "lvName1": oldNameLV, 

3491 "action2": "refresh", 

3492 "lvName2": vdi.fileName, 

3493 "action3": "cleanupLockAndRefcount", 

3494 "uuid3": origParentUuid, 

3495 "ns3": NS_PREFIX_LVM + self.uuid} 

3496 for slave in slaves: 

3497 Util.log("Updating %s to %s on slave %s" % \ 

3498 (oldNameLV, vdi.fileName, 

3499 self.xapi.getRecordHost(slave)['hostname'])) 

3500 text = self.xapi.session.xenapi.host.call_plugin( \ 

3501 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3502 Util.log("call-plugin returned: '%s'" % text) 

3503 

3504 @override 

3505 def _updateSlavesOnResize(self, vdi) -> None: 

3506 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3507 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3508 if not slaves: 

3509 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3510 return 

3511 LvmCowUtil.refreshVolumeOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3512 vdi.fileName, vdi.uuid, slaves) 

3513 

3514 

3515class LinstorSR(SR): 

3516 TYPE = SR.TYPE_LINSTOR 

3517 

3518 def __init__(self, uuid, xapi, createLock, force): 

3519 if not LINSTOR_AVAILABLE: 

3520 raise util.SMException( 

3521 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3522 ) 

3523 

3524 SR.__init__(self, uuid, xapi, createLock, force) 

3525 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3526 

3527 class LinstorProxy: 

3528 def __init__(self, sr: LinstorSR) -> None: 

3529 self.sr = sr 

3530 

3531 def __getattr__(self, attr: str) -> Any: 

3532 assert self.sr, "Cannot use `LinstorProxy` without valid `LinstorVolumeManager` instance" 

3533 return getattr(self.sr._linstor, attr) 

3534 

3535 self._linstor_proxy = LinstorProxy(self) 

3536 self._reloadLinstor(journaler_only=True) 

3537 

3538 @override 

3539 def deleteVDI(self, vdi) -> None: 

3540 self._checkSlaves(vdi) 

3541 SR.deleteVDI(self, vdi) 

3542 

3543 @override 

3544 def getFreeSpace(self) -> int: 

3545 return self._linstor.max_volume_size_allowed 

3546 

3547 @override 

3548 def scan(self, force=False) -> None: 

3549 all_vdi_info = self._scan(force) 

3550 for uuid, vdiInfo in all_vdi_info.items(): 

3551 # When vdiInfo is None, the VDI is RAW. 

3552 vdi = self.getVDI(uuid) 

3553 if not vdi: 

3554 self.logFilter.logNewVDI(uuid) 

3555 vdi = LinstorVDI(self, uuid, vdiInfo.vdiType if vdiInfo else VdiType.RAW) 

3556 self.vdis[uuid] = vdi 

3557 if vdiInfo: 

3558 vdi.load(vdiInfo) 

3559 self._removeStaleVDIs(all_vdi_info.keys()) 

3560 self._buildTree(force) 

3561 self.logFilter.logState() 

3562 self._handleInterruptedCoalesceLeaf() 

3563 

3564 @override 

3565 def pauseVDIs(self, vdiList) -> None: 

3566 self._linstor.ensure_volume_list_is_not_locked( 

3567 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3568 ) 

3569 return super(LinstorSR, self).pauseVDIs(vdiList) 

3570 

3571 def _reloadLinstor(self, journaler_only=False): 

3572 session = self.xapi.session 

3573 host_ref = util.get_this_host_ref(session) 

3574 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3575 

3576 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3577 if pbd is None: 

3578 raise util.SMException('Failed to find PBD') 

3579 

3580 dconf = session.xenapi.PBD.get_device_config(pbd) 

3581 group_name = dconf['group-name'] 

3582 

3583 controller_uri = get_controller_uri() 

3584 self.journaler = LinstorJournaler( 

3585 controller_uri, group_name, logger=util.SMlog 

3586 ) 

3587 

3588 if journaler_only: 

3589 return 

3590 

3591 self._linstor = LinstorVolumeManager( 

3592 controller_uri, 

3593 group_name, 

3594 repair=True, 

3595 logger=util.SMlog 

3596 ) 

3597 

3598 def _scan(self, force): 

3599 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3600 self._reloadLinstor() 

3601 error = False 

3602 try: 

3603 all_vdi_info = self._load_vdi_info() 

3604 for uuid, vdiInfo in all_vdi_info.items(): 

3605 if vdiInfo and vdiInfo.error: 

3606 error = True 

3607 break 

3608 if not error: 

3609 return all_vdi_info 

3610 Util.log('Scan error, retrying ({})'.format(i)) 

3611 except Exception as e: 

3612 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3613 Util.log(traceback.format_exc()) 

3614 

3615 if force: 

3616 return all_vdi_info 

3617 raise util.SMException('Scan error') 

3618 

3619 def _load_vdi_info(self): 

3620 all_vdi_info = {} 

3621 

3622 # TODO: Ensure metadata contains the right info. 

3623 

3624 all_volume_info = self._linstor.get_volumes_with_info() 

3625 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3626 for vdi_uuid, volume_info in all_volume_info.items(): 

3627 vdi_type = VdiType.RAW 

3628 try: 

3629 volume_metadata = volumes_metadata[vdi_uuid] 

3630 if not volume_info.name and not list(volume_metadata.items()): 

3631 continue # Ignore it, probably deleted. 

3632 

3633 if vdi_uuid.startswith('DELETED_'): 

3634 # Assume it's really a RAW volume of a failed snap without COW header/footer. 

3635 # We must remove this VDI now without adding it in the VDI list. 

3636 # Otherwise `Relinking` calls and other actions can be launched on it. 

3637 # We don't want that... 

3638 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3639 

3640 self.lock() 

3641 try: 

3642 self._linstor.destroy_volume(vdi_uuid) 

3643 try: 

3644 self.forgetVDI(vdi_uuid) 

3645 except: 

3646 pass 

3647 except Exception as e: 

3648 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3649 finally: 

3650 self.unlock() 

3651 continue 

3652 

3653 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3654 volume_name = self._linstor.get_volume_name(vdi_uuid) 

3655 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): 

3656 # Always RAW! 

3657 info = None 

3658 elif VdiType.isCowImage(vdi_type): 

3659 info = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type).get_info(vdi_uuid) 

3660 else: 

3661 # Ensure it's not a COW image... 

3662 linstorcowutil = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type) 

3663 try: 

3664 info = linstorcowutil.get_info(vdi_uuid) 

3665 except: 

3666 try: 

3667 linstorcowutil.force_repair( 

3668 self._linstor.get_device_path(vdi_uuid) 

3669 ) 

3670 info = linstorcowutil.get_info(vdi_uuid) 

3671 except: 

3672 info = None 

3673 

3674 except Exception as e: 

3675 Util.log( 

3676 ' [VDI {}: failed to load VDI info]: {}' 

3677 .format(vdi_uuid, e) 

3678 ) 

3679 info = CowImageInfo(vdi_uuid) 

3680 info.error = 1 

3681 

3682 if info: 

3683 info.vdiType = vdi_type 

3684 

3685 all_vdi_info[vdi_uuid] = info 

3686 

3687 return all_vdi_info 

3688 

3689 @override 

3690 def _prepareCoalesceLeaf(self, vdi) -> None: 

3691 vdi._activateChain() 

3692 vdi.deflate() 

3693 vdi._inflateParentForCoalesce() 

3694 

3695 @override 

3696 def _finishCoalesceLeaf(self, parent) -> None: 

3697 if not parent.isSnapshot() or parent.isAttachedRW(): 

3698 parent.inflateFully() 

3699 else: 

3700 parent.deflate() 

3701 

3702 @override 

3703 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3704 return LinstorCowUtil( 

3705 self.xapi.session, self._linstor, parent.vdi_type 

3706 ).compute_volume_size(parent.sizeVirt) - parent.getDrbdSize() 

3707 

3708 def _hasValidDevicePath(self, uuid): 

3709 try: 

3710 self._linstor.get_device_path(uuid) 

3711 except Exception: 

3712 # TODO: Maybe log exception. 

3713 return False 

3714 return True 

3715 

3716 @override 

3717 def _liveLeafCoalesce(self, vdi) -> bool: 

3718 self.lock() 

3719 try: 

3720 self._linstor.ensure_volume_is_not_locked( 

3721 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3722 ) 

3723 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3724 finally: 

3725 self.unlock() 

3726 

3727 @override 

3728 def _handleInterruptedCoalesceLeaf(self) -> None: 

3729 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3730 for uuid, parentUuid in entries.items(): 

3731 if self._hasValidDevicePath(parentUuid) or \ 

3732 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3733 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3734 else: 

3735 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3736 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3737 vdi = self.getVDI(uuid) 

3738 if vdi: 

3739 vdi.ensureUnpaused() 

3740 

3741 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3742 Util.log('*** UNDO LEAF-COALESCE') 

3743 parent = self.getVDI(parentUuid) 

3744 if not parent: 

3745 parent = self.getVDI(childUuid) 

3746 if not parent: 

3747 raise util.SMException( 

3748 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3749 ) 

3750 Util.log( 

3751 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3752 ) 

3753 parent.rename(parentUuid) 

3754 

3755 child = self.getVDI(childUuid) 

3756 if not child: 

3757 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3758 if not child: 

3759 raise util.SMException( 

3760 'Neither {} nor {} found'.format( 

3761 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3762 ) 

3763 ) 

3764 Util.log('Renaming child back to {}'.format(childUuid)) 

3765 child.rename(childUuid) 

3766 Util.log('Updating the VDI record') 

3767 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3768 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3769 

3770 # TODO: Maybe deflate here. 

3771 

3772 if child.isHidden(): 

3773 child._setHidden(False) 

3774 if not parent.isHidden(): 

3775 parent._setHidden(True) 

3776 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3777 Util.log('*** leaf-coalesce undo successful') 

3778 

3779 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3780 Util.log('*** FINISH LEAF-COALESCE') 

3781 vdi = self.getVDI(childUuid) 

3782 if not vdi: 

3783 raise util.SMException('VDI {} not found'.format(childUuid)) 

3784 # TODO: Maybe inflate. 

3785 try: 

3786 self.forgetVDI(parentUuid) 

3787 except XenAPI.Failure: 

3788 pass 

3789 self._updateSlavesOnResize(vdi) 

3790 Util.log('*** finished leaf-coalesce successfully') 

3791 

3792 def _checkSlaves(self, vdi): 

3793 try: 

3794 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3795 for openers in all_openers.values(): 

3796 for opener in openers.values(): 

3797 if opener['process-name'] != 'tapdisk': 

3798 raise util.SMException( 

3799 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3800 ) 

3801 except LinstorVolumeManagerError as e: 

3802 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3803 raise 

3804 

3805 

3806################################################################################ 

3807# 

3808# Helpers 

3809# 

3810def daemonize(): 

3811 pid = os.fork() 

3812 if pid: 

3813 os.waitpid(pid, 0) 

3814 Util.log("New PID [%d]" % pid) 

3815 return False 

3816 os.chdir("/") 

3817 os.setsid() 

3818 pid = os.fork() 

3819 if pid: 

3820 Util.log("Will finish as PID [%d]" % pid) 

3821 os._exit(0) 

3822 for fd in [0, 1, 2]: 

3823 try: 

3824 os.close(fd) 

3825 except OSError: 

3826 pass 

3827 # we need to fill those special fd numbers or pread won't work 

3828 sys.stdin = open("/dev/null", 'r') 

3829 sys.stderr = open("/dev/null", 'w') 

3830 sys.stdout = open("/dev/null", 'w') 

3831 # As we're a new process we need to clear the lock objects 

3832 lock.Lock.clearAll() 

3833 return True 

3834 

3835 

3836def normalizeType(type): 

3837 if type in LVMSR.SUBTYPES: 

3838 type = SR.TYPE_LVHD 

3839 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3840 # temporary while LVHD is symlinked as LVM 

3841 type = SR.TYPE_LVHD 

3842 if type in [ 

3843 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3844 "moosefs", "xfs", "zfs", "largeblock" 

3845 ]: 

3846 type = SR.TYPE_FILE 

3847 if type in ["linstor"]: 

3848 type = SR.TYPE_LINSTOR 

3849 if type not in SR.TYPES: 

3850 raise util.SMException("Unsupported SR type: %s" % type) 

3851 return type 

3852 

3853GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3854 

3855 

3856def _gc_init_file(sr_uuid): 

3857 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3858 

3859 

3860def _create_init_file(sr_uuid): 

3861 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3862 with open(os.path.join(_gc_init_file(sr_uuid)), 'w+') as f: 

3863 f.write('1') 

3864 

3865 

3866def _gcLoopPause(sr, dryRun=False, immediate=False): 

3867 if immediate: 

3868 return 

3869 

3870 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3871 # point will just return. Otherwise, fall back on an abortable sleep. 

3872 

3873 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3874 

3875 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3875 ↛ exitline 3875 didn't jump to the function exit

3876 lambda *args: None) 

3877 elif os.path.exists(_gc_init_file(sr.uuid)): 

3878 def abortTest(): 

3879 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3880 

3881 # If time.sleep hangs we are in deep trouble, however for 

3882 # completeness we set the timeout of the abort thread to 

3883 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3884 Util.log("GC active, about to go quiet") 

3885 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3885 ↛ exitline 3885 didn't run the lambda on line 3885

3886 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3887 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3888 Util.log("GC active, quiet period ended") 

3889 

3890 

3891def _gcLoop(sr, dryRun=False, immediate=False): 

3892 if not lockGCActive.acquireNoblock(): 3892 ↛ 3893line 3892 didn't jump to line 3893, because the condition on line 3892 was never true

3893 Util.log("Another GC instance already active, exiting") 

3894 return 

3895 

3896 # Check we're still attached after acquiring locks 

3897 if not sr.xapi.isPluggedHere(): 

3898 Util.log("SR no longer attached, exiting") 

3899 return 

3900 

3901 # Clean up Intellicache files 

3902 sr.cleanupCache() 

3903 

3904 # Track how many we do 

3905 coalesced = 0 

3906 task_status = "success" 

3907 try: 

3908 # Check if any work needs to be done 

3909 if not sr.xapi.isPluggedHere(): 3909 ↛ 3910line 3909 didn't jump to line 3910, because the condition on line 3909 was never true

3910 Util.log("SR no longer attached, exiting") 

3911 return 

3912 sr.scanLocked() 

3913 if not sr.hasWork(): 

3914 Util.log("No work, exiting") 

3915 return 

3916 sr.xapi.create_task( 

3917 "Garbage Collection", 

3918 "Garbage collection for SR %s" % sr.uuid) 

3919 _gcLoopPause(sr, dryRun, immediate=immediate) 

3920 while True: 

3921 if SIGTERM: 

3922 Util.log("Term requested") 

3923 return 

3924 

3925 if not sr.xapi.isPluggedHere(): 3925 ↛ 3926line 3925 didn't jump to line 3926, because the condition on line 3925 was never true

3926 Util.log("SR no longer attached, exiting") 

3927 break 

3928 sr.scanLocked() 

3929 if not sr.hasWork(): 

3930 Util.log("No work, exiting") 

3931 break 

3932 

3933 if not lockGCRunning.acquireNoblock(): 3933 ↛ 3934line 3933 didn't jump to line 3934, because the condition on line 3933 was never true

3934 Util.log("Unable to acquire GC running lock.") 

3935 return 

3936 try: 

3937 if not sr.gcEnabled(): 3937 ↛ 3938line 3937 didn't jump to line 3938, because the condition on line 3937 was never true

3938 break 

3939 

3940 sr.xapi.update_task_progress("done", coalesced) 

3941 

3942 sr.cleanupCoalesceJournals() 

3943 # Create the init file here in case startup is waiting on it 

3944 _create_init_file(sr.uuid) 

3945 sr.scanLocked() 

3946 sr.updateBlockInfo() 

3947 

3948 howmany = len(sr.findGarbage()) 

3949 if howmany > 0: 

3950 Util.log("Found %d orphaned vdis" % howmany) 

3951 sr.lock() 

3952 try: 

3953 sr.garbageCollect(dryRun) 

3954 finally: 

3955 sr.unlock() 

3956 sr.xapi.srUpdate() 

3957 

3958 candidate = sr.findCoalesceable() 

3959 if candidate: 

3960 util.fistpoint.activate( 

3961 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3962 sr.coalesce(candidate, dryRun) 

3963 sr.xapi.srUpdate() 

3964 coalesced += 1 

3965 continue 

3966 

3967 candidate = sr.findLeafCoalesceable() 

3968 if candidate: 3968 ↛ 3975line 3968 didn't jump to line 3975, because the condition on line 3968 was never false

3969 sr.coalesceLeaf(candidate, dryRun) 

3970 sr.xapi.srUpdate() 

3971 coalesced += 1 

3972 continue 

3973 

3974 finally: 

3975 lockGCRunning.release() 3975 ↛ 3980line 3975 didn't jump to line 3980, because the break on line 3938 wasn't executed

3976 except: 

3977 task_status = "failure" 

3978 raise 

3979 finally: 

3980 sr.xapi.set_task_status(task_status) 

3981 Util.log("GC process exiting, no work left") 

3982 _create_init_file(sr.uuid) 

3983 lockGCActive.release() 

3984 

3985 

3986def _gc(session, srUuid, dryRun=False, immediate=False): 

3987 init(srUuid) 

3988 sr = SR.getInstance(srUuid, session) 

3989 if not sr.gcEnabled(False): 3989 ↛ 3990line 3989 didn't jump to line 3990, because the condition on line 3989 was never true

3990 return 

3991 

3992 try: 

3993 _gcLoop(sr, dryRun, immediate=immediate) 

3994 finally: 

3995 sr.check_no_space_candidates() 

3996 sr.cleanup() 

3997 sr.logFilter.logState() 

3998 del sr.xapi 

3999 

4000 

4001def _abort(srUuid, soft=False): 

4002 """Aborts an GC/coalesce. 

4003 

4004 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

4005 soft: If set to True and there is a pending abort signal, the function 

4006 doesn't do anything. If set to False, a new abort signal is issued. 

4007 

4008 returns: If soft is set to False, we return True holding lockGCActive. If 

4009 soft is set to False and an abort signal is pending, we return False 

4010 without holding lockGCActive. An exception is raised in case of error.""" 

4011 Util.log("=== SR %s: abort ===" % (srUuid)) 

4012 init(srUuid) 

4013 if not lockGCActive.acquireNoblock(): 

4014 gotLock = False 

4015 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

4016 abortFlag = IPCFlag(srUuid) 

4017 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

4018 return False 

4019 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

4020 gotLock = lockGCActive.acquireNoblock() 

4021 if gotLock: 

4022 break 

4023 time.sleep(SR.LOCK_RETRY_INTERVAL) 

4024 abortFlag.clear(FLAG_TYPE_ABORT) 

4025 if not gotLock: 

4026 raise util.CommandException(code=errno.ETIMEDOUT, 

4027 reason="SR %s: error aborting existing process" % srUuid) 

4028 return True 

4029 

4030 

4031def init(srUuid): 

4032 global lockGCRunning 

4033 if not lockGCRunning: 4033 ↛ 4034line 4033 didn't jump to line 4034, because the condition on line 4033 was never true

4034 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid) 

4035 global lockGCActive 

4036 if not lockGCActive: 4036 ↛ 4037line 4036 didn't jump to line 4037, because the condition on line 4036 was never true

4037 lockGCActive = LockActive(srUuid) 

4038 

4039 

4040class LockActive: 

4041 """ 

4042 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

4043 if another process holds the SR lock. 

4044 """ 

4045 def __init__(self, srUuid): 

4046 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

4047 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, srUuid) 

4048 

4049 def acquireNoblock(self): 

4050 self._srLock.acquire() 

4051 

4052 try: 

4053 return self._lock.acquireNoblock() 

4054 finally: 

4055 self._srLock.release() 

4056 

4057 def release(self): 

4058 self._lock.release() 

4059 

4060 

4061def usage(): 

4062 output = """Garbage collect and/or coalesce COW images in a COW-based SR 

4063 

4064Parameters: 

4065 -u --uuid UUID SR UUID 

4066 and one of: 

4067 -g --gc garbage collect, coalesce, and repeat while there is work 

4068 -G --gc_force garbage collect once, aborting any current operations 

4069 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

4070 max_age hours 

4071 -a --abort abort any currently running operation (GC or coalesce) 

4072 -q --query query the current state (GC'ing, coalescing or not running) 

4073 -x --disable disable GC/coalesce (will be in effect until you exit) 

4074 -t --debug see Debug below 

4075 

4076Options: 

4077 -b --background run in background (return immediately) (valid for -g only) 

4078 -f --force continue in the presence of COW images with errors (when doing 

4079 GC, this might cause removal of any such images) (only valid 

4080 for -G) (DANGEROUS) 

4081 

4082Debug: 

4083 The --debug parameter enables manipulation of LVHD VDIs for debugging 

4084 purposes. ** NEVER USE IT ON A LIVE VM ** 

4085 The following parameters are required: 

4086 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

4087 "deflate". 

4088 -v --vdi_uuid VDI UUID 

4089 """ 

4090 #-d --dry-run don't actually perform any SR-modifying operations 

4091 print(output) 

4092 Util.log("(Invalid usage)") 

4093 sys.exit(1) 

4094 

4095 

4096############################################################################## 

4097# 

4098# API 

4099# 

4100def abort(srUuid, soft=False): 

4101 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

4102 """ 

4103 if _abort(srUuid, soft): 

4104 Util.log("abort: releasing the process lock") 

4105 lockGCActive.release() 

4106 return True 

4107 else: 

4108 return False 

4109 

4110 

4111def gc(session, srUuid, inBackground, dryRun=False): 

4112 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

4113 immediately if inBackground=True. 

4114 

4115 The following algorithm is used: 

4116 1. If we are already GC'ing in this SR, return 

4117 2. If we are already coalescing a VDI pair: 

4118 a. Scan the SR and determine if the VDI pair is GC'able 

4119 b. If the pair is not GC'able, return 

4120 c. If the pair is GC'able, abort coalesce 

4121 3. Scan the SR 

4122 4. If there is nothing to collect, nor to coalesce, return 

4123 5. If there is something to collect, GC all, then goto 3 

4124 6. If there is something to coalesce, coalesce one pair, then goto 3 

4125 """ 

4126 Util.log("=== SR %s: gc ===" % srUuid) 

4127 

4128 signal.signal(signal.SIGTERM, receiveSignal) 

4129 

4130 if inBackground: 

4131 if daemonize(): 4131 ↛ exitline 4131 didn't return from function 'gc', because the condition on line 4131 was never false

4132 # we are now running in the background. Catch & log any errors 

4133 # because there is no other way to propagate them back at this 

4134 # point 

4135 

4136 try: 

4137 _gc(None, srUuid, dryRun) 

4138 except AbortException: 

4139 Util.log("Aborted") 

4140 except Exception: 

4141 Util.logException("gc") 

4142 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

4143 os._exit(0) 

4144 else: 

4145 _gc(session, srUuid, dryRun, immediate=True) 

4146 

4147 

4148def start_gc(session, sr_uuid): 

4149 """ 

4150 This function is used to try to start a backgrounded GC session by forking 

4151 the current process. If using the systemd version, call start_gc_service() instead. 

4152 """ 

4153 # don't bother if an instance already running (this is just an 

4154 # optimization to reduce the overhead of forking a new process if we 

4155 # don't have to, but the process will check the lock anyways) 

4156 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid) 

4157 if not lockRunning.acquireNoblock(): 

4158 if should_preempt(session, sr_uuid): 

4159 util.SMlog("Aborting currently-running coalesce of garbage VDI") 

4160 try: 

4161 if not abort(sr_uuid, soft=True): 

4162 util.SMlog("The GC has already been scheduled to re-start") 

4163 except util.CommandException as e: 

4164 if e.code != errno.ETIMEDOUT: 

4165 raise 

4166 util.SMlog('failed to abort the GC') 

4167 else: 

4168 util.SMlog("A GC instance already running, not kicking") 

4169 return 

4170 else: 

4171 lockRunning.release() 

4172 

4173 util.SMlog(f"Starting GC file is {__file__}") 

4174 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'], 

4175 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4176 

4177def start_gc_service(sr_uuid, wait=False): 

4178 """ 

4179 This starts the templated systemd service which runs GC on the given SR UUID. 

4180 If the service was already started, this is a no-op. 

4181 

4182 Because the service is a one-shot with RemainAfterExit=no, when called with 

4183 wait=True this will run the service synchronously and will not return until the 

4184 run has finished. This is used to force a run of the GC instead of just kicking it 

4185 in the background. 

4186 """ 

4187 sr_uuid_esc = sr_uuid.replace("-", "\\x2d") 

4188 util.SMlog(f"Kicking SMGC@{sr_uuid}...") 

4189 cmd=[ "/usr/bin/systemctl", "--quiet" ] 

4190 if not wait: 4190 ↛ 4192line 4190 didn't jump to line 4192, because the condition on line 4190 was never false

4191 cmd.append("--no-block") 

4192 cmd += ["start", f"SMGC@{sr_uuid_esc}"] 

4193 subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4194 

4195 

4196def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

4197 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

4198 the SR lock is held. 

4199 The following algorithm is used: 

4200 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

4201 2. Scan the SR 

4202 3. GC 

4203 4. return 

4204 """ 

4205 Util.log("=== SR %s: gc_force ===" % srUuid) 

4206 init(srUuid) 

4207 sr = SR.getInstance(srUuid, session, lockSR, True) 

4208 if not lockGCActive.acquireNoblock(): 

4209 abort(srUuid) 

4210 else: 

4211 Util.log("Nothing was running, clear to proceed") 

4212 

4213 if force: 

4214 Util.log("FORCED: will continue even if there are COW image errors") 

4215 sr.scanLocked(force) 

4216 sr.cleanupCoalesceJournals() 

4217 

4218 try: 

4219 sr.cleanupCache() 

4220 sr.garbageCollect(dryRun) 

4221 finally: 

4222 sr.cleanup() 

4223 sr.logFilter.logState() 

4224 lockGCActive.release() 

4225 

4226 

4227def get_state(srUuid): 

4228 """Return whether GC/coalesce is currently running or not. This asks systemd for 

4229 the state of the templated SMGC service and will return True if it is "activating" 

4230 or "running" (for completeness, as in practice it will never achieve the latter state) 

4231 """ 

4232 sr_uuid_esc = srUuid.replace("-", "\\x2d") 

4233 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"] 

4234 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4235 state = result.stdout.decode('utf-8').rstrip() 

4236 if state == "activating" or state == "running": 

4237 return True 

4238 return False 

4239 

4240 

4241def should_preempt(session, srUuid): 

4242 sr = SR.getInstance(srUuid, session) 

4243 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

4244 if len(entries) == 0: 

4245 return False 

4246 elif len(entries) > 1: 

4247 raise util.SMException("More than one coalesce entry: " + str(entries)) 

4248 sr.scanLocked() 

4249 coalescedUuid = entries.popitem()[0] 

4250 garbage = sr.findGarbage() 

4251 for vdi in garbage: 

4252 if vdi.uuid == coalescedUuid: 

4253 return True 

4254 return False 

4255 

4256 

4257def get_coalesceable_leaves(session, srUuid, vdiUuids): 

4258 coalesceable = [] 

4259 sr = SR.getInstance(srUuid, session) 

4260 sr.scanLocked() 

4261 for uuid in vdiUuids: 

4262 vdi = sr.getVDI(uuid) 

4263 if not vdi: 

4264 raise util.SMException("VDI %s not found" % uuid) 

4265 if vdi.isLeafCoalesceable(): 

4266 coalesceable.append(uuid) 

4267 return coalesceable 

4268 

4269 

4270def cache_cleanup(session, srUuid, maxAge): 

4271 sr = SR.getInstance(srUuid, session) 

4272 return sr.cleanupCache(maxAge) 

4273 

4274 

4275def debug(sr_uuid, cmd, vdi_uuid): 

4276 Util.log("Debug command: %s" % cmd) 

4277 sr = SR.getInstance(sr_uuid, None) 

4278 if not isinstance(sr, LVMSR): 

4279 print("Error: not an LVHD SR") 

4280 return 

4281 sr.scanLocked() 

4282 vdi = sr.getVDI(vdi_uuid) 

4283 if not vdi: 

4284 print("Error: VDI %s not found") 

4285 return 

4286 print("Running %s on SR %s" % (cmd, sr)) 

4287 print("VDI before: %s" % vdi) 

4288 if cmd == "activate": 

4289 vdi._activate() 

4290 print("VDI file: %s" % vdi.path) 

4291 if cmd == "deactivate": 

4292 ns = NS_PREFIX_LVM + sr.uuid 

4293 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

4294 if cmd == "inflate": 

4295 vdi.inflateFully() 

4296 sr.cleanup() 

4297 if cmd == "deflate": 

4298 vdi.deflate() 

4299 sr.cleanup() 

4300 sr.scanLocked() 

4301 print("VDI after: %s" % vdi) 

4302 

4303 

4304def abort_optional_reenable(uuid): 

4305 print("Disabling GC/coalesce for %s" % uuid) 

4306 ret = _abort(uuid) 

4307 input("Press enter to re-enable...") 

4308 print("GC/coalesce re-enabled") 

4309 lockGCRunning.release() 

4310 if ret: 

4311 lockGCActive.release() 

4312 

4313 

4314############################################################################## 

4315# 

4316# CLI 

4317# 

4318def main(): 

4319 action = "" 

4320 maxAge = 0 

4321 uuid = "" 

4322 background = False 

4323 force = False 

4324 dryRun = False 

4325 debug_cmd = "" 

4326 vdi_uuid = "" 

4327 shortArgs = "gGc:aqxu:bfdt:v:" 

4328 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

4329 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

4330 

4331 try: 

4332 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

4333 except getopt.GetoptError: 

4334 usage() 

4335 for o, a in opts: 

4336 if o in ("-g", "--gc"): 

4337 action = "gc" 

4338 if o in ("-G", "--gc_force"): 

4339 action = "gc_force" 

4340 if o in ("-c", "--clean_cache"): 

4341 action = "clean_cache" 

4342 maxAge = int(a) 

4343 if o in ("-a", "--abort"): 

4344 action = "abort" 

4345 if o in ("-q", "--query"): 

4346 action = "query" 

4347 if o in ("-x", "--disable"): 

4348 action = "disable" 

4349 if o in ("-u", "--uuid"): 

4350 uuid = a 

4351 if o in ("-b", "--background"): 

4352 background = True 

4353 if o in ("-f", "--force"): 

4354 force = True 

4355 if o in ("-d", "--dry-run"): 

4356 Util.log("Dry run mode") 

4357 dryRun = True 

4358 if o in ("-t", "--debug"): 

4359 action = "debug" 

4360 debug_cmd = a 

4361 if o in ("-v", "--vdi_uuid"): 

4362 vdi_uuid = a 

4363 

4364 if not action or not uuid: 

4365 usage() 

4366 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

4367 action != "debug" and (debug_cmd or vdi_uuid): 

4368 usage() 

4369 

4370 if action != "query" and action != "debug": 

4371 print("All output goes to log") 

4372 

4373 if action == "gc": 

4374 gc(None, uuid, background, dryRun) 

4375 elif action == "gc_force": 

4376 gc_force(None, uuid, force, dryRun, True) 

4377 elif action == "clean_cache": 

4378 cache_cleanup(None, uuid, maxAge) 

4379 elif action == "abort": 

4380 abort(uuid) 

4381 elif action == "query": 

4382 print("Currently running: %s" % get_state(uuid)) 

4383 elif action == "disable": 

4384 abort_optional_reenable(uuid) 

4385 elif action == "debug": 

4386 debug(uuid, debug_cmd, vdi_uuid) 

4387 

4388 

4389if __name__ == '__main__': 4389 ↛ 4390line 4389 didn't jump to line 4390, because the condition on line 4389 was never true

4390 main()