Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect COW-based SR's in the background 

19# 

20 

21from sm_typing import Any, Optional, List, override 

22 

23import os 

24import os.path 

25import sys 

26import time 

27import signal 

28import subprocess 

29import getopt 

30import datetime 

31import traceback 

32import base64 

33import zlib 

34import errno 

35import stat 

36 

37import XenAPI # pylint: disable=import-error 

38import util 

39import lvutil 

40import lvmcache 

41import journaler 

42import fjournaler 

43import lock 

44import blktap2 

45import xs_errors 

46from refcounter import RefCounter 

47from ipc import IPCFlag 

48from lvmanager import LVActivator 

49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

50from functools import reduce 

51from time import monotonic as _time 

52 

53from constants import NS_PREFIX_LVM, VG_LOCATION, VG_PREFIX 

54from cowutil import CowImageInfo, CowUtil, getCowUtil 

55from lvmcowutil import LV_PREFIX, LvmCowUtil 

56from vditype import VdiType, VdiTypeExtension, VDI_COW_TYPES, VDI_TYPE_TO_EXTENSION 

57 

58try: 

59 from linstorcowutil import LinstorCowUtil 

60 from linstorjournaler import LinstorJournaler 

61 from linstorvolumemanager import get_controller_uri 

62 from linstorvolumemanager import LinstorVolumeManager 

63 from linstorvolumemanager import LinstorVolumeManagerError 

64 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

65 

66 LINSTOR_AVAILABLE = True 

67except ImportError: 

68 LINSTOR_AVAILABLE = False 

69 

70# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

71# possible due to lvhd_stop_using_() not working correctly. However, we leave 

72# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

73# record for use by the offline tool (which makes the operation safe by pausing 

74# the VM first) 

75AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

76 

77FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

78 

79# process "lock", used simply as an indicator that a process already exists 

80# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

81# check for the fact by trying the lock). 

82lockGCRunning = None 

83 

84# process "lock" to indicate that the GC process has been activated but may not 

85# yet be running, stops a second process from being started. 

86LOCK_TYPE_GC_ACTIVE = "gc_active" 

87lockGCActive = None 

88 

89# Default coalesce error rate limit, in messages per minute. A zero value 

90# disables throttling, and a negative value disables error reporting. 

91DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

92 

93COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

94COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

95VAR_RUN = "/var/run/" 

96SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

97 

98N_RUNNING_AVERAGE = 10 

99 

100NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

101 

102# Signal Handler 

103SIGTERM = False 

104 

105 

106class AbortException(util.SMException): 

107 pass 

108 

109class CancelException(util.SMException): 

110 pass 

111 

112def receiveSignal(signalNumber, frame): 

113 global SIGTERM 

114 

115 util.SMlog("GC: recieved SIGTERM") 

116 SIGTERM = True 

117 return 

118 

119 

120################################################################################ 

121# 

122# Util 

123# 

124class Util: 

125 RET_RC = 1 

126 RET_STDOUT = 2 

127 RET_STDERR = 4 

128 

129 UUID_LEN = 36 

130 

131 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

132 

133 @staticmethod 

134 def log(text) -> None: 

135 util.SMlog(text, ident="SMGC") 

136 

137 @staticmethod 

138 def logException(tag): 

139 info = sys.exc_info() 

140 if info[0] == SystemExit: 140 ↛ 142line 140 didn't jump to line 142, because the condition on line 140 was never true

141 # this should not be happening when catching "Exception", but it is 

142 sys.exit(0) 

143 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

144 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

145 Util.log(" ***********************") 

146 Util.log(" * E X C E P T I O N *") 

147 Util.log(" ***********************") 

148 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

149 Util.log(tb) 

150 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

151 

152 @staticmethod 

153 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

154 "Execute a subprocess, then return its return code, stdout, stderr" 

155 proc = subprocess.Popen(args, 

156 stdin=subprocess.PIPE, \ 

157 stdout=subprocess.PIPE, \ 

158 stderr=subprocess.PIPE, \ 

159 shell=True, \ 

160 close_fds=True) 

161 (stdout, stderr) = proc.communicate(inputtext) 

162 stdout = str(stdout) 

163 stderr = str(stderr) 

164 rc = proc.returncode 

165 if log: 

166 Util.log("`%s`: %s" % (args, rc)) 

167 if type(expectedRC) != type([]): 

168 expectedRC = [expectedRC] 

169 if not rc in expectedRC: 

170 reason = stderr.strip() 

171 if stdout.strip(): 

172 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

173 Util.log("Failed: %s" % reason) 

174 raise util.CommandException(rc, args, reason) 

175 

176 if ret == Util.RET_RC: 

177 return rc 

178 if ret == Util.RET_STDERR: 

179 return stderr 

180 return stdout 

181 

182 @staticmethod 

183 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut, prefSig=signal.SIGKILL): 

184 """execute func in a separate thread and kill it if abortTest signals 

185 so""" 

186 abortSignaled = abortTest() # check now before we clear resultFlag 

187 resultFlag = IPCFlag(ns) 

188 resultFlag.clearAll() 

189 pid = os.fork() 

190 if pid: 

191 startTime = _time() 

192 try: 

193 while True: 

194 if resultFlag.test("success"): 

195 Util.log(" Child process completed successfully") 

196 resultFlag.clear("success") 

197 return 

198 if resultFlag.test("failure"): 

199 resultFlag.clear("failure") 

200 raise util.SMException("Child process exited with error") 

201 if abortTest() or abortSignaled or SIGTERM: 

202 os.killpg(pid, prefSig) 

203 raise AbortException("Aborting due to signal") 

204 if timeOut and _time() - startTime > timeOut: 

205 os.killpg(pid, prefSig) 

206 resultFlag.clearAll() 

207 raise util.SMException("Timed out") 

208 time.sleep(pollInterval) 

209 finally: 

210 wait_pid = 0 

211 rc = -1 

212 count = 0 

213 while wait_pid == 0 and count < 10: 

214 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

215 if wait_pid == 0: 

216 time.sleep(2) 

217 count += 1 

218 

219 if wait_pid == 0: 

220 Util.log("runAbortable: wait for process completion timed out") 

221 else: 

222 os.setpgrp() 

223 try: 

224 if func() == ret: 

225 resultFlag.set("success") 

226 else: 

227 resultFlag.set("failure") 

228 except Exception as e: 

229 Util.log("Child process failed with : (%s)" % e) 

230 resultFlag.set("failure") 

231 Util.logException("This exception has occured") 

232 os._exit(0) 

233 

234 @staticmethod 

235 def num2str(number): 

236 for prefix in ("G", "M", "K"): 236 ↛ 239line 236 didn't jump to line 239, because the loop on line 236 didn't complete

237 if number >= Util.PREFIX[prefix]: 

238 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

239 return "%s" % number 

240 

241 @staticmethod 

242 def numBits(val): 

243 count = 0 

244 while val: 

245 count += val & 1 

246 val = val >> 1 

247 return count 

248 

249 @staticmethod 

250 def countBits(bitmap1, bitmap2): 

251 """return bit count in the bitmap produced by ORing the two bitmaps""" 

252 len1 = len(bitmap1) 

253 len2 = len(bitmap2) 

254 lenLong = len1 

255 lenShort = len2 

256 bitmapLong = bitmap1 

257 if len2 > len1: 

258 lenLong = len2 

259 lenShort = len1 

260 bitmapLong = bitmap2 

261 

262 count = 0 

263 for i in range(lenShort): 

264 val = bitmap1[i] | bitmap2[i] 

265 count += Util.numBits(val) 

266 

267 for i in range(i + 1, lenLong): 

268 val = bitmapLong[i] 

269 count += Util.numBits(val) 

270 return count 

271 

272 @staticmethod 

273 def getThisScript(): 

274 thisScript = util.get_real_path(__file__) 

275 if thisScript.endswith(".pyc"): 

276 thisScript = thisScript[:-1] 

277 return thisScript 

278 

279 

280################################################################################ 

281# 

282# XAPI 

283# 

284class XAPI: 

285 USER = "root" 

286 PLUGIN_ON_SLAVE = "on-slave" 

287 

288 CONFIG_SM = 0 

289 CONFIG_OTHER = 1 

290 CONFIG_ON_BOOT = 2 

291 CONFIG_ALLOW_CACHING = 3 

292 

293 CONFIG_NAME = { 

294 CONFIG_SM: "sm-config", 

295 CONFIG_OTHER: "other-config", 

296 CONFIG_ON_BOOT: "on-boot", 

297 CONFIG_ALLOW_CACHING: "allow_caching" 

298 } 

299 

300 class LookupError(util.SMException): 

301 pass 

302 

303 @staticmethod 

304 def getSession(): 

305 session = XenAPI.xapi_local() 

306 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

307 return session 

308 

309 def __init__(self, session, srUuid): 

310 self.sessionPrivate = False 

311 self.session = session 

312 if self.session is None: 

313 self.session = self.getSession() 

314 self.sessionPrivate = True 

315 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

316 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

317 self.hostUuid = util.get_this_host() 

318 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

319 self.task = None 

320 self.task_progress = {"coalescable": 0, "done": 0} 

321 

322 def __del__(self): 

323 if self.sessionPrivate: 

324 self.session.xenapi.session.logout() 

325 

326 @property 

327 def srRef(self): 

328 return self._srRef 

329 

330 def isPluggedHere(self): 

331 pbds = self.getAttachedPBDs() 

332 for pbdRec in pbds: 

333 if pbdRec["host"] == self._hostRef: 

334 return True 

335 return False 

336 

337 def poolOK(self): 

338 host_recs = self.session.xenapi.host.get_all_records() 

339 for host_ref, host_rec in host_recs.items(): 

340 if not host_rec["enabled"]: 

341 Util.log("Host %s not enabled" % host_rec["uuid"]) 

342 return False 

343 return True 

344 

345 def isMaster(self): 

346 if self.srRecord["shared"]: 

347 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

348 return pool["master"] == self._hostRef 

349 else: 

350 pbds = self.getAttachedPBDs() 

351 if len(pbds) < 1: 

352 raise util.SMException("Local SR not attached") 

353 elif len(pbds) > 1: 

354 raise util.SMException("Local SR multiply attached") 

355 return pbds[0]["host"] == self._hostRef 

356 

357 def getAttachedPBDs(self): 

358 """Return PBD records for all PBDs of this SR that are currently 

359 attached""" 

360 attachedPBDs = [] 

361 pbds = self.session.xenapi.PBD.get_all_records() 

362 for pbdRec in pbds.values(): 

363 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

364 attachedPBDs.append(pbdRec) 

365 return attachedPBDs 

366 

367 def getOnlineHosts(self): 

368 return util.get_online_hosts(self.session) 

369 

370 def ensureInactive(self, hostRef, args): 

371 text = self.session.xenapi.host.call_plugin( \ 

372 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

373 Util.log("call-plugin returned: '%s'" % text) 

374 

375 def getRecordHost(self, hostRef): 

376 return self.session.xenapi.host.get_record(hostRef) 

377 

378 def _getRefVDI(self, uuid): 

379 return self.session.xenapi.VDI.get_by_uuid(uuid) 

380 

381 def getRefVDI(self, vdi): 

382 return self._getRefVDI(vdi.uuid) 

383 

384 def getRecordVDI(self, uuid): 

385 try: 

386 ref = self._getRefVDI(uuid) 

387 return self.session.xenapi.VDI.get_record(ref) 

388 except XenAPI.Failure: 

389 return None 

390 

391 def singleSnapshotVDI(self, vdi): 

392 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

393 {"type": "internal"}) 

394 

395 def forgetVDI(self, srUuid, vdiUuid): 

396 """Forget the VDI, but handle the case where the VDI has already been 

397 forgotten (i.e. ignore errors)""" 

398 try: 

399 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

400 self.session.xenapi.VDI.forget(vdiRef) 

401 except XenAPI.Failure: 

402 pass 

403 

404 def getConfigVDI(self, vdi, key): 

405 kind = vdi.CONFIG_TYPE[key] 

406 if kind == self.CONFIG_SM: 

407 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

408 elif kind == self.CONFIG_OTHER: 

409 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

410 elif kind == self.CONFIG_ON_BOOT: 

411 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

412 elif kind == self.CONFIG_ALLOW_CACHING: 

413 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

414 else: 

415 assert(False) 

416 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

417 return cfg 

418 

419 def removeFromConfigVDI(self, vdi, key): 

420 kind = vdi.CONFIG_TYPE[key] 

421 if kind == self.CONFIG_SM: 

422 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

423 elif kind == self.CONFIG_OTHER: 

424 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

425 else: 

426 assert(False) 

427 

428 def addToConfigVDI(self, vdi, key, val): 

429 kind = vdi.CONFIG_TYPE[key] 

430 if kind == self.CONFIG_SM: 

431 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

432 elif kind == self.CONFIG_OTHER: 

433 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

434 else: 

435 assert(False) 

436 

437 def isSnapshot(self, vdi): 

438 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

439 

440 def markCacheSRsDirty(self): 

441 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

442 'field "local_cache_enabled" = "true"') 

443 for sr_ref in sr_refs: 

444 Util.log("Marking SR %s dirty" % sr_ref) 

445 util.set_dirty(self.session, sr_ref) 

446 

447 def srUpdate(self): 

448 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

449 abortFlag = IPCFlag(self.srRecord["uuid"]) 

450 task = self.session.xenapi.Async.SR.update(self._srRef) 

451 cancelTask = True 

452 try: 

453 for i in range(60): 

454 status = self.session.xenapi.task.get_status(task) 

455 if not status == "pending": 

456 Util.log("SR.update_asynch status changed to [%s]" % status) 

457 cancelTask = False 

458 return 

459 if abortFlag.test(FLAG_TYPE_ABORT): 

460 Util.log("Abort signalled during srUpdate, cancelling task...") 

461 try: 

462 self.session.xenapi.task.cancel(task) 

463 cancelTask = False 

464 Util.log("Task cancelled") 

465 except: 

466 pass 

467 return 

468 time.sleep(1) 

469 finally: 

470 if cancelTask: 

471 self.session.xenapi.task.cancel(task) 

472 self.session.xenapi.task.destroy(task) 

473 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

474 

475 def update_task(self): 

476 self.session.xenapi.task.set_other_config( 

477 self.task, 

478 { 

479 "applies_to": self._srRef 

480 }) 

481 total = self.task_progress['coalescable'] + self.task_progress['done'] 

482 if (total > 0): 

483 self.session.xenapi.task.set_progress( 

484 self.task, float(self.task_progress['done']) / total) 

485 

486 def create_task(self, label, description): 

487 self.task = self.session.xenapi.task.create(label, description) 

488 self.update_task() 

489 

490 def update_task_progress(self, key, value): 

491 self.task_progress[key] = value 

492 if self.task: 

493 self.update_task() 

494 

495 def set_task_status(self, status): 

496 if self.task: 

497 self.session.xenapi.task.set_status(self.task, status) 

498 

499 

500################################################################################ 

501# 

502# VDI 

503# 

504class VDI(object): 

505 """Object representing a VDI of a COW-based SR""" 

506 

507 POLL_INTERVAL = 1 

508 POLL_TIMEOUT = 30 

509 DEVICE_MAJOR = 202 

510 

511 # config keys & values 

512 DB_VDI_PARENT = "vhd-parent" 

513 DB_VDI_TYPE = "vdi_type" 

514 DB_VDI_BLOCKS = "vhd-blocks" 

515 DB_VDI_PAUSED = "paused" 

516 DB_VDI_RELINKING = "relinking" 

517 DB_VDI_ACTIVATING = "activating" 

518 DB_GC = "gc" 

519 DB_COALESCE = "coalesce" 

520 DB_LEAFCLSC = "leaf-coalesce" # config key 

521 DB_GC_NO_SPACE = "gc_no_space" 

522 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

523 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

524 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

525 # no space to snap-coalesce or unable to keep 

526 # up with VDI. This is not used by the SM, it 

527 # might be used by external components. 

528 DB_ONBOOT = "on-boot" 

529 ONBOOT_RESET = "reset" 

530 DB_ALLOW_CACHING = "allow_caching" 

531 

532 CONFIG_TYPE = { 

533 DB_VDI_PARENT: XAPI.CONFIG_SM, 

534 DB_VDI_TYPE: XAPI.CONFIG_SM, 

535 DB_VDI_BLOCKS: XAPI.CONFIG_SM, 

536 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

537 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

538 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

539 DB_GC: XAPI.CONFIG_OTHER, 

540 DB_COALESCE: XAPI.CONFIG_OTHER, 

541 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

542 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

543 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

544 DB_GC_NO_SPACE: XAPI.CONFIG_SM 

545 } 

546 

547 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

548 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

549 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

550 # feasibility of leaf coalesce 

551 

552 JRN_RELINK = "relink" # journal entry type for relinking children 

553 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

554 JRN_LEAF = "leaf" # used in coalesce-leaf 

555 

556 STR_TREE_INDENT = 4 

557 

558 def __init__(self, sr, uuid, vdi_type): 

559 self.sr = sr 

560 self.scanError = True 

561 self.uuid = uuid 

562 self.vdi_type = vdi_type 

563 self.fileName = "" 

564 self.parentUuid = "" 

565 self.sizeVirt = -1 

566 self._sizePhys = -1 

567 self._sizeAllocated = -1 

568 self._hidden = False 

569 self.parent = None 

570 self.children = [] 

571 self._vdiRef = None 

572 self.cowutil = getCowUtil(vdi_type) 

573 self._clearRef() 

574 

575 @staticmethod 

576 def extractUuid(path): 

577 raise NotImplementedError("Implement in sub class") 

578 

579 def load(self, info=None) -> None: 

580 """Load VDI info""" 

581 pass 

582 

583 def getDriverName(self) -> str: 

584 return self.vdi_type 

585 

586 def getRef(self): 

587 if self._vdiRef is None: 

588 self._vdiRef = self.sr.xapi.getRefVDI(self) 

589 return self._vdiRef 

590 

591 def getConfig(self, key, default=None): 

592 config = self.sr.xapi.getConfigVDI(self, key) 

593 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 593 ↛ 594line 593 didn't jump to line 594, because the condition on line 593 was never true

594 val = config 

595 else: 

596 val = config.get(key) 

597 if val: 

598 return val 

599 return default 

600 

601 def setConfig(self, key, val): 

602 self.sr.xapi.removeFromConfigVDI(self, key) 

603 self.sr.xapi.addToConfigVDI(self, key, val) 

604 Util.log("Set %s = %s for %s" % (key, val, self)) 

605 

606 def delConfig(self, key): 

607 self.sr.xapi.removeFromConfigVDI(self, key) 

608 Util.log("Removed %s from %s" % (key, self)) 

609 

610 def ensureUnpaused(self): 

611 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

612 Util.log("Unpausing VDI %s" % self) 

613 self.unpause() 

614 

615 def pause(self, failfast=False) -> None: 

616 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

617 self.uuid, failfast): 

618 raise util.SMException("Failed to pause VDI %s" % self) 

619 

620 def _report_tapdisk_unpause_error(self): 

621 try: 

622 xapi = self.sr.xapi.session.xenapi 

623 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

624 msg_name = "failed to unpause tapdisk" 

625 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

626 "VMs using this tapdisk have lost access " \ 

627 "to the corresponding disk(s)" % self.uuid 

628 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

629 except Exception as e: 

630 util.SMlog("failed to generate message: %s" % e) 

631 

632 def unpause(self): 

633 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

634 self.uuid): 

635 self._report_tapdisk_unpause_error() 

636 raise util.SMException("Failed to unpause VDI %s" % self) 

637 

638 def refresh(self, ignoreNonexistent=True): 

639 """Pause-unpause in one step""" 

640 self.sr.lock() 

641 try: 

642 try: 

643 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 643 ↛ 645line 643 didn't jump to line 645, because the condition on line 643 was never true

644 self.sr.uuid, self.uuid): 

645 self._report_tapdisk_unpause_error() 

646 raise util.SMException("Failed to refresh %s" % self) 

647 except XenAPI.Failure as e: 

648 if util.isInvalidVDI(e) and ignoreNonexistent: 

649 Util.log("VDI %s not found, ignoring" % self) 

650 return 

651 raise 

652 finally: 

653 self.sr.unlock() 

654 

655 def isSnapshot(self): 

656 return self.sr.xapi.isSnapshot(self) 

657 

658 def isAttachedRW(self): 

659 return util.is_attached_rw( 

660 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

661 

662 def getVDIBlocks(self): 

663 val = self.updateBlockInfo() 

664 bitmap = zlib.decompress(base64.b64decode(val)) 

665 return bitmap 

666 

667 def isCoalesceable(self): 

668 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

669 return not self.scanError and \ 

670 self.parent and \ 

671 len(self.parent.children) == 1 and \ 

672 self.isHidden() and \ 

673 len(self.children) > 0 

674 

675 def isLeafCoalesceable(self): 

676 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

677 return not self.scanError and \ 

678 self.parent and \ 

679 len(self.parent.children) == 1 and \ 

680 not self.isHidden() and \ 

681 len(self.children) == 0 

682 

683 def canLiveCoalesce(self, speed): 

684 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

685 isLeafCoalesceable() already""" 

686 feasibleSize = False 

687 allowedDownTime = \ 

688 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

689 allocated_size = self.getAllocatedSize() 

690 if speed: 

691 feasibleSize = \ 

692 allocated_size // speed < allowedDownTime 

693 else: 

694 feasibleSize = \ 

695 allocated_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

696 

697 return (feasibleSize or 

698 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

699 

700 def getAllPrunable(self): 

701 if len(self.children) == 0: # base case 

702 # it is possible to have a hidden leaf that was recently coalesced 

703 # onto its parent, its children already relinked but not yet 

704 # reloaded - in which case it may not be garbage collected yet: 

705 # some tapdisks could still be using the file. 

706 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

707 return [] 

708 if not self.scanError and self.isHidden(): 

709 return [self] 

710 return [] 

711 

712 thisPrunable = True 

713 vdiList = [] 

714 for child in self.children: 

715 childList = child.getAllPrunable() 

716 vdiList.extend(childList) 

717 if child not in childList: 

718 thisPrunable = False 

719 

720 # We can destroy the current VDI if all childs are hidden BUT the 

721 # current VDI must be hidden too to do that! 

722 # Example in this case (after a failed live leaf coalesce): 

723 # 

724 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

725 # SMGC: [32436] b5458d61(1.000G/4.127M) 

726 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

727 # 

728 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

729 # Normally we are not in this function when the delete action is 

730 # executed but in `_liveLeafCoalesce`. 

731 

732 if not self.scanError and not self.isHidden() and thisPrunable: 

733 vdiList.append(self) 

734 return vdiList 

735 

736 def getSizePhys(self) -> int: 

737 return self._sizePhys 

738 

739 def getAllocatedSize(self) -> int: 

740 return self._sizeAllocated 

741 

742 def getTreeRoot(self): 

743 "Get the root of the tree that self belongs to" 

744 root = self 

745 while root.parent: 

746 root = root.parent 

747 return root 

748 

749 def getTreeHeight(self): 

750 "Get the height of the subtree rooted at self" 

751 if len(self.children) == 0: 

752 return 1 

753 

754 maxChildHeight = 0 

755 for child in self.children: 

756 childHeight = child.getTreeHeight() 

757 if childHeight > maxChildHeight: 

758 maxChildHeight = childHeight 

759 

760 return maxChildHeight + 1 

761 

762 def getAllLeaves(self) -> List["VDI"]: 

763 "Get all leaf nodes in the subtree rooted at self" 

764 if len(self.children) == 0: 

765 return [self] 

766 

767 leaves = [] 

768 for child in self.children: 

769 leaves.extend(child.getAllLeaves()) 

770 return leaves 

771 

772 def updateBlockInfo(self) -> Optional[str]: 

773 val = base64.b64encode(self._queryCowBlocks()).decode() 

774 try: 

775 self.setConfig(VDI.DB_VDI_BLOCKS, val) 

776 except Exception: 

777 if self.vdi_type != VdiType.QCOW2: 

778 raise 

779 # Sometime with QCOW2, our allocation table is too big to be stored in XAPI, in this case we do not store it 

780 # and we write `skipped` instead so that hasWork is happy (and the GC doesn't run in loop indefinitely). 

781 self.setConfig(VDI.DB_VDI_BLOCKS, "skipped") 

782 

783 return val 

784 

785 def rename(self, uuid) -> None: 

786 "Rename the VDI file" 

787 assert(not self.sr.vdis.get(uuid)) 

788 self._clearRef() 

789 oldUuid = self.uuid 

790 self.uuid = uuid 

791 self.children = [] 

792 # updating the children themselves is the responsibility of the caller 

793 del self.sr.vdis[oldUuid] 

794 self.sr.vdis[self.uuid] = self 

795 

796 def delete(self) -> None: 

797 "Physically delete the VDI" 

798 lock.Lock.cleanup(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

799 lock.Lock.cleanupAll(self.uuid) 

800 self._clear() 

801 

802 def getParent(self) -> str: 

803 return self.cowutil.getParent(self.path, lambda x: x.strip()) 803 ↛ exitline 803 didn't run the lambda on line 803

804 

805 def repair(self, parent) -> None: 

806 self.cowutil.repair(parent) 

807 

808 @override 

809 def __str__(self) -> str: 

810 strHidden = "" 

811 if self.isHidden(): 811 ↛ 812line 811 didn't jump to line 812, because the condition on line 811 was never true

812 strHidden = "*" 

813 strSizeVirt = "?" 

814 if self.sizeVirt > 0: 814 ↛ 815line 814 didn't jump to line 815, because the condition on line 814 was never true

815 strSizeVirt = Util.num2str(self.sizeVirt) 

816 strSizePhys = "?" 

817 if self._sizePhys > 0: 817 ↛ 818line 817 didn't jump to line 818, because the condition on line 817 was never true

818 strSizePhys = "/%s" % Util.num2str(self._sizePhys) 

819 strSizeAllocated = "?" 

820 if self._sizeAllocated >= 0: 

821 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

822 strType = "[{}]".format(self.vdi_type) 

823 

824 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

825 strSizePhys, strSizeAllocated, strType) 

826 

827 def validate(self, fast=False) -> None: 

828 if self.cowutil.check(self.path, fast=fast) != CowUtil.CheckResult.Success: 828 ↛ 829line 828 didn't jump to line 829, because the condition on line 828 was never true

829 raise util.SMException("COW image %s corrupted" % self) 

830 

831 def _clear(self): 

832 self.uuid = "" 

833 self.path = "" 

834 self.parentUuid = "" 

835 self.parent = None 

836 self._clearRef() 

837 

838 def _clearRef(self): 

839 self._vdiRef = None 

840 

841 @staticmethod 

842 def _cancel_exception(sig, frame): 

843 raise CancelException() 

844 

845 def _call_plugin_coalesce(self, hostRef): 

846 signal.signal(signal.SIGTERM, self._cancel_exception) 

847 args = {"path": self.path, "vdi_type": self.vdi_type} 

848 util.SMlog("DAMS: Calling remote coalesce with: {}".format(args)) 

849 try: 

850 ret = self.sr.xapi.session.xenapi.host.call_plugin( \ 

851 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_tapdisk", args) 

852 util.SMlog("DAMS: Remote coalesce returned {}".format(ret)) 

853 except CancelException: 

854 util.SMlog(f"DAMS: Cancelling online coalesce following signal {args}") 

855 self.sr.xapi.session.xenapi.host.call_plugin( \ 

856 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_cancel", args) 

857 raise 

858 except Exception: 

859 raise 

860 

861 def _doCoalesceOnHost(self, hostRef): 

862 self.validate() 

863 self.parent.validate(True) 

864 self.parent._increaseSizeVirt(self.sizeVirt) 

865 self.sr._updateSlavesOnResize(self.parent) 

866 #TODO: We might need to make the LV RW on the slave directly for coalesce? 

867 # Children and parent need to be RW for QCOW2 coalesce, otherwise tapdisk(libqcow) will crash trying to access them 

868 

869 def abortTest(): 

870 file = self.sr._gc_running_file(self) 

871 try: 

872 with open(file, "r") as f: 

873 if not f.read(): 

874 util.SMlog("DAMS: abortTest: Cancelling coalesce") 

875 return True 

876 except OSError as e: 

877 if e.errno == errno.ENOENT: 

878 util.SMlog("File {} does not exist".format(file)) 

879 else: 

880 util.SMlog("IOError: {}".format(e)) 

881 return True 

882 return False 

883 

884 Util.runAbortable(lambda: self._call_plugin_coalesce(hostRef), \ 

885 None, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0, prefSig=signal.SIGTERM) 

886 

887 self.parent.validate(True) 

888 #self._verifyContents(0) 

889 self.parent.updateBlockInfo() 

890 

891 def _isOpenOnHosts(self) -> Optional[str]: 

892 for pbdRecord in self.sr.xapi.getAttachedPBDs(): 

893 hostRef = pbdRecord["host"] 

894 args = {"path": self.path} 

895 is_openers = util.strtobool(self.sr.xapi.session.xenapi.host.call_plugin( \ 

896 hostRef, XAPI.PLUGIN_ON_SLAVE, "is_openers", args)) 

897 if is_openers: 

898 return hostRef 

899 return None 

900 

901 def _doCoalesce(self) -> None: 

902 """Coalesce self onto parent. Only perform the actual coalescing of 

903 an image, but not the subsequent relinking. We'll do that as the next step, 

904 after reloading the entire SR in case things have changed while we 

905 were coalescing""" 

906 self.validate() 

907 self.parent.validate(True) 

908 self.parent._increaseSizeVirt(self.sizeVirt) 

909 self.sr._updateSlavesOnResize(self.parent) 

910 self._coalesceCowImage(0) 

911 self.parent.validate(True) 

912 #self._verifyContents(0) 

913 self.parent.updateBlockInfo() 

914 

915 def _verifyContents(self, timeOut): 

916 Util.log(" Coalesce verification on %s" % self) 

917 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

918 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

919 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

920 Util.log(" Coalesce verification succeeded") 

921 

922 def _runTapdiskDiff(self): 

923 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

924 (self.getDriverName(), self.path, \ 

925 self.parent.getDriverName(), self.parent.path) 

926 Util.doexec(cmd, 0) 

927 return True 

928 

929 @staticmethod 

930 def _reportCoalesceError(vdi, ce): 

931 """Reports a coalesce error to XenCenter. 

932 

933 vdi: the VDI object on which the coalesce error occured 

934 ce: the CommandException that was raised""" 

935 

936 msg_name = os.strerror(ce.code) 

937 if ce.code == errno.ENOSPC: 

938 # TODO We could add more information here, e.g. exactly how much 

939 # space is required for the particular coalesce, as well as actions 

940 # to be taken by the user and consequences of not taking these 

941 # actions. 

942 msg_body = 'Run out of space while coalescing.' 

943 elif ce.code == errno.EIO: 

944 msg_body = 'I/O error while coalescing.' 

945 else: 

946 msg_body = '' 

947 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

948 % (vdi.sr.uuid, msg_name, msg_body)) 

949 

950 # Create a XenCenter message, but don't spam. 

951 xapi = vdi.sr.xapi.session.xenapi 

952 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

953 oth_cfg = xapi.SR.get_other_config(sr_ref) 

954 if COALESCE_ERR_RATE_TAG in oth_cfg: 

955 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

956 else: 

957 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

958 

959 xcmsg = False 

960 if coalesce_err_rate == 0: 

961 xcmsg = True 

962 elif coalesce_err_rate > 0: 

963 now = datetime.datetime.now() 

964 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

965 if COALESCE_LAST_ERR_TAG in sm_cfg: 

966 # seconds per message (minimum distance in time between two 

967 # messages in seconds) 

968 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

969 last = datetime.datetime.fromtimestamp( 

970 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

971 if now - last >= spm: 

972 xapi.SR.remove_from_sm_config(sr_ref, 

973 COALESCE_LAST_ERR_TAG) 

974 xcmsg = True 

975 else: 

976 xcmsg = True 

977 if xcmsg: 

978 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

979 str(now.strftime('%s'))) 

980 if xcmsg: 

981 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

982 

983 def coalesce(self) -> int: 

984 return self.cowutil.coalesce(self.path) 

985 

986 @staticmethod 

987 def _doCoalesceCowImage(vdi: "VDI"): 

988 try: 

989 startTime = time.time() 

990 allocated_size = vdi.getAllocatedSize() 

991 coalesced_size = vdi.coalesce() 

992 endTime = time.time() 

993 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

994 except util.CommandException as ce: 

995 # We use try/except for the following piece of code because it runs 

996 # in a separate process context and errors will not be caught and 

997 # reported by anyone. 

998 try: 

999 # Report coalesce errors back to user via XC 

1000 VDI._reportCoalesceError(vdi, ce) 

1001 except Exception as e: 

1002 util.SMlog('failed to create XenCenter message: %s' % e) 

1003 raise ce 

1004 except: 

1005 raise 

1006 

1007 def _vdi_is_raw(self, vdi_path): 

1008 """ 

1009 Given path to vdi determine if it is raw 

1010 """ 

1011 uuid = self.extractUuid(vdi_path) 

1012 return self.sr.vdis[uuid].vdi_type == VdiType.RAW 

1013 

1014 def _coalesceCowImage(self, timeOut): 

1015 Util.log(" Running COW coalesce on %s" % self) 

1016 def abortTest(): 

1017 if self.cowutil.isCoalesceableOnRemote(): 

1018 file = self.sr._gc_running_file(self) 

1019 try: 

1020 with open(file, "r") as f: 

1021 if not f.read(): 

1022 return True 

1023 except OSError as e: 

1024 if e.errno == errno.ENOENT: 

1025 util.SMlog("File {} does not exist".format(file)) 

1026 else: 

1027 util.SMlog("IOError: {}".format(e)) 

1028 return True 

1029 return IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1030 

1031 try: 

1032 util.fistpoint.activate_custom_fn( 

1033 "cleanup_coalesceVHD_inject_failure", 

1034 util.inject_failure) 

1035 Util.runAbortable(lambda: VDI._doCoalesceCowImage(self), None, 

1036 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

1037 except: 

1038 # Exception at this phase could indicate a failure in COW coalesce 

1039 # or a kill of COW coalesce by runAbortable due to timeOut 

1040 # Try a repair and reraise the exception 

1041 parent = "" 

1042 try: 

1043 parent = self.getParent() 

1044 if not self._vdi_is_raw(parent): 

1045 # Repair error is logged and ignored. Error reraised later 

1046 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

1047 'parent %s' % (self.uuid, parent)) 

1048 self.repair(parent) 

1049 except Exception as e: 

1050 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

1051 'after failed coalesce on %s, err: %s' % 

1052 (parent, self.path, e)) 

1053 raise 

1054 

1055 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

1056 

1057 def _relinkSkip(self) -> None: 

1058 """Relink children of this VDI to point to the parent of this VDI""" 

1059 abortFlag = IPCFlag(self.sr.uuid) 

1060 for child in self.children: 

1061 if abortFlag.test(FLAG_TYPE_ABORT): 1061 ↛ 1062line 1061 didn't jump to line 1062, because the condition on line 1061 was never true

1062 raise AbortException("Aborting due to signal") 

1063 Util.log(" Relinking %s from %s to %s" % \ 

1064 (child, self, self.parent)) 

1065 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

1066 child._setParent(self.parent) 

1067 self.children = [] 

1068 

1069 def _reloadChildren(self, vdiSkip): 

1070 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

1071 the COW image metadata for this file in any online VDI""" 

1072 abortFlag = IPCFlag(self.sr.uuid) 

1073 for child in self.children: 

1074 if child == vdiSkip: 

1075 continue 

1076 if abortFlag.test(FLAG_TYPE_ABORT): 1076 ↛ 1077line 1076 didn't jump to line 1077, because the condition on line 1076 was never true

1077 raise AbortException("Aborting due to signal") 

1078 Util.log(" Reloading VDI %s" % child) 

1079 child._reload() 

1080 

1081 def _reload(self): 

1082 """Pause & unpause to cause blktap to reload the image metadata""" 

1083 for child in self.children: 1083 ↛ 1084line 1083 didn't jump to line 1084, because the loop on line 1083 never started

1084 child._reload() 

1085 

1086 # only leaves can be attached 

1087 if len(self.children) == 0: 1087 ↛ exitline 1087 didn't return from function '_reload', because the condition on line 1087 was never false

1088 try: 

1089 self.delConfig(VDI.DB_VDI_RELINKING) 

1090 except XenAPI.Failure as e: 

1091 if not util.isInvalidVDI(e): 

1092 raise 

1093 self.refresh() 

1094 

1095 def _tagChildrenForRelink(self): 

1096 if len(self.children) == 0: 

1097 retries = 0 

1098 try: 

1099 while retries < 15: 

1100 retries += 1 

1101 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1102 Util.log("VDI %s is activating, wait to relink" % 

1103 self.uuid) 

1104 else: 

1105 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1106 

1107 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1108 self.delConfig(VDI.DB_VDI_RELINKING) 

1109 Util.log("VDI %s started activating while tagging" % 

1110 self.uuid) 

1111 else: 

1112 return 

1113 time.sleep(2) 

1114 

1115 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1116 except XenAPI.Failure as e: 

1117 if not util.isInvalidVDI(e): 

1118 raise 

1119 

1120 for child in self.children: 

1121 child._tagChildrenForRelink() 

1122 

1123 def _loadInfoParent(self): 

1124 ret = self.cowutil.getParent(self.path, LvmCowUtil.extractUuid) 

1125 if ret: 

1126 self.parentUuid = ret 

1127 

1128 def _setParent(self, parent) -> None: 

1129 self.cowutil.setParent(self.path, parent.path, False) 

1130 self.parent = parent 

1131 self.parentUuid = parent.uuid 

1132 parent.children.append(self) 

1133 try: 

1134 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1135 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1136 (self.uuid, self.parentUuid)) 

1137 except: 

1138 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1139 (self.uuid, self.parentUuid)) 

1140 

1141 def isHidden(self) -> bool: 

1142 if self._hidden is None: 1142 ↛ 1143line 1142 didn't jump to line 1143, because the condition on line 1142 was never true

1143 self._loadInfoHidden() 

1144 return self._hidden 

1145 

1146 def _loadInfoHidden(self) -> None: 

1147 hidden = self.cowutil.getHidden(self.path) 

1148 self._hidden = (hidden != 0) 

1149 

1150 def _setHidden(self, hidden=True) -> None: 

1151 self._hidden = None 

1152 self.cowutil.setHidden(self.path, hidden) 

1153 self._hidden = hidden 

1154 

1155 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1156 """ensure the virtual size of 'self' is at least 'size'. Note that 

1157 resizing a COW image must always be offline and atomically: the file must 

1158 not be open by anyone and no concurrent operations may take place. 

1159 Thus we use the Agent API call for performing paused atomic 

1160 operations. If the caller is already in the atomic context, it must 

1161 call with atomic = False""" 

1162 if self.sizeVirt >= size: 1162 ↛ 1164line 1162 didn't jump to line 1164, because the condition on line 1162 was never false

1163 return 

1164 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1165 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1166 

1167 msize = self.cowutil.getMaxResizeSize(self.path) 

1168 if (size <= msize): 

1169 self.cowutil.setSizeVirtFast(self.path, size) 

1170 else: 

1171 if atomic: 

1172 vdiList = self._getAllSubtree() 

1173 self.sr.lock() 

1174 try: 

1175 self.sr.pauseVDIs(vdiList) 

1176 try: 

1177 self._setSizeVirt(size) 

1178 finally: 

1179 self.sr.unpauseVDIs(vdiList) 

1180 finally: 

1181 self.sr.unlock() 

1182 else: 

1183 self._setSizeVirt(size) 

1184 

1185 self.sizeVirt = self.cowutil.getSizeVirt(self.path) 

1186 

1187 def _setSizeVirt(self, size) -> None: 

1188 """WARNING: do not call this method directly unless all VDIs in the 

1189 subtree are guaranteed to be unplugged (and remain so for the duration 

1190 of the operation): this operation is only safe for offline COW images""" 

1191 jFile = os.path.join(self.sr.path, self.uuid) 

1192 self.cowutil.setSizeVirt(self.path, size, jFile) 

1193 

1194 def _queryCowBlocks(self) -> bytes: 

1195 return self.cowutil.getBlockBitmap(self.path) 

1196 

1197 def _getCoalescedSizeData(self): 

1198 """Get the data size of the resulting image if we coalesce self onto 

1199 parent. We calculate the actual size by using the image block allocation 

1200 information (as opposed to just adding up the two image sizes to get an 

1201 upper bound)""" 

1202 # make sure we don't use stale BAT info from vdi_rec since the child 

1203 # was writable all this time 

1204 self.delConfig(VDI.DB_VDI_BLOCKS) 

1205 blocksChild = self.getVDIBlocks() 

1206 blocksParent = self.parent.getVDIBlocks() 

1207 numBlocks = Util.countBits(blocksChild, blocksParent) 

1208 Util.log("Num combined blocks = %d" % numBlocks) 

1209 sizeData = numBlocks * self.cowutil.getBlockSize(self.path) 

1210 assert(sizeData <= self.sizeVirt) 

1211 return sizeData 

1212 

1213 def _calcExtraSpaceForCoalescing(self) -> int: 

1214 sizeData = self._getCoalescedSizeData() 

1215 sizeCoalesced = sizeData + self.cowutil.calcOverheadBitmap(sizeData) + \ 

1216 self.cowutil.calcOverheadEmpty(self.sizeVirt) 

1217 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1218 return sizeCoalesced - self.parent.getSizePhys() 

1219 

1220 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1221 """How much extra space in the SR will be required to 

1222 [live-]leaf-coalesce this VDI""" 

1223 # the space requirements are the same as for inline coalesce 

1224 return self._calcExtraSpaceForCoalescing() 

1225 

1226 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1227 """How much extra space in the SR will be required to 

1228 snapshot-coalesce this VDI""" 

1229 return self._calcExtraSpaceForCoalescing() + \ 

1230 self.cowutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1231 

1232 def _getAllSubtree(self): 

1233 """Get self and all VDIs in the subtree of self as a flat list""" 

1234 vdiList = [self] 

1235 for child in self.children: 

1236 vdiList.extend(child._getAllSubtree()) 

1237 return vdiList 

1238 

1239 

1240class FileVDI(VDI): 

1241 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1242 

1243 @override 

1244 @staticmethod 

1245 def extractUuid(path): 

1246 fileName = os.path.basename(path) 

1247 return os.path.splitext(fileName)[0] 

1248 

1249 def __init__(self, sr, uuid, vdi_type): 

1250 VDI.__init__(self, sr, uuid, vdi_type) 

1251 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1252 

1253 @override 

1254 def load(self, info=None) -> None: 

1255 if not info: 

1256 if not util.pathexists(self.path): 

1257 raise util.SMException("%s not found" % self.path) 

1258 try: 

1259 info = self.cowutil.getInfo(self.path, self.extractUuid) 

1260 except util.SMException: 

1261 Util.log(" [VDI %s: failed to read COW image metadata]" % self.uuid) 

1262 return 

1263 self.parent = None 

1264 self.children = [] 

1265 self.parentUuid = info.parentUuid 

1266 self.sizeVirt = info.sizeVirt 

1267 self._sizePhys = info.sizePhys 

1268 self._sizeAllocated = info.sizeAllocated 

1269 self._hidden = info.hidden 

1270 self.scanError = False 

1271 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1272 (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])) 

1273 

1274 @override 

1275 def rename(self, uuid) -> None: 

1276 oldPath = self.path 

1277 VDI.rename(self, uuid) 

1278 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1279 self.path = os.path.join(self.sr.path, self.fileName) 

1280 assert(not util.pathexists(self.path)) 

1281 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1282 os.rename(oldPath, self.path) 

1283 

1284 @override 

1285 def delete(self) -> None: 

1286 if len(self.children) > 0: 1286 ↛ 1287line 1286 didn't jump to line 1287, because the condition on line 1286 was never true

1287 raise util.SMException("VDI %s has children, can't delete" % \ 

1288 self.uuid) 

1289 try: 

1290 self.sr.lock() 

1291 try: 

1292 os.unlink(self.path) 

1293 self.sr.forgetVDI(self.uuid) 

1294 finally: 

1295 self.sr.unlock() 

1296 except OSError: 

1297 raise util.SMException("os.unlink(%s) failed" % self.path) 

1298 VDI.delete(self) 

1299 

1300 @override 

1301 def getAllocatedSize(self) -> int: 

1302 if self._sizeAllocated == -1: 1302 ↛ 1303line 1302 didn't jump to line 1303, because the condition on line 1302 was never true

1303 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1304 return self._sizeAllocated 

1305 

1306 

1307class LVMVDI(VDI): 

1308 """Object representing a VDI in an LVM SR""" 

1309 

1310 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1311 

1312 @override 

1313 def load(self, info=None) -> None: 

1314 # `info` is always set. `None` default value is only here to match parent method. 

1315 assert info, "No info given to LVMVDI.load" 

1316 self.parent = None 

1317 self.children = [] 

1318 self._sizePhys = -1 

1319 self._sizeAllocated = -1 

1320 self.scanError = info.scanError 

1321 self.sizeLV = info.sizeLV 

1322 self.sizeVirt = info.sizeVirt 

1323 self.fileName = info.lvName 

1324 self.lvActive = info.lvActive 

1325 self.lvOpen = info.lvOpen 

1326 self.lvReadonly = info.lvReadonly 

1327 self._hidden = info.hidden 

1328 self.parentUuid = info.parentUuid 

1329 self.path = os.path.join(self.sr.path, self.fileName) 

1330 self.lvmcowutil = LvmCowUtil(self.cowutil) 

1331 

1332 @override 

1333 @staticmethod 

1334 def extractUuid(path): 

1335 return LvmCowUtil.extractUuid(path) 

1336 

1337 def inflate(self, size): 

1338 """inflate the LV containing the COW image to 'size'""" 

1339 if not VdiType.isCowImage(self.vdi_type): 

1340 return 

1341 self._activate() 

1342 self.sr.lock() 

1343 try: 

1344 self.lvmcowutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size) 

1345 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1346 finally: 

1347 self.sr.unlock() 

1348 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1349 self._sizePhys = -1 

1350 self._sizeAllocated = -1 

1351 

1352 def deflate(self): 

1353 """deflate the LV containing the image to minimum""" 

1354 if not VdiType.isCowImage(self.vdi_type): 

1355 return 

1356 self._activate() 

1357 self.sr.lock() 

1358 try: 

1359 self.lvmcowutil.deflate(self.sr.lvmCache, self.fileName, self.getSizePhys()) 

1360 finally: 

1361 self.sr.unlock() 

1362 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1363 self._sizePhys = -1 

1364 self._sizeAllocated = -1 

1365 

1366 def inflateFully(self): 

1367 self.inflate(self.lvmcowutil.calcVolumeSize(self.sizeVirt)) 

1368 

1369 def inflateParentForCoalesce(self): 

1370 """Inflate the parent only as much as needed for the purposes of 

1371 coalescing""" 

1372 if not VdiType.isCowImage(self.parent.vdi_type): 

1373 return 

1374 inc = self._calcExtraSpaceForCoalescing() 

1375 if inc > 0: 

1376 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1377 self.parent.inflate(self.parent.sizeLV + inc) 

1378 

1379 @override 

1380 def updateBlockInfo(self) -> Optional[str]: 

1381 if VdiType.isCowImage(self.vdi_type): 

1382 return VDI.updateBlockInfo(self) 

1383 return None 

1384 

1385 @override 

1386 def rename(self, uuid) -> None: 

1387 oldUuid = self.uuid 

1388 oldLVName = self.fileName 

1389 VDI.rename(self, uuid) 

1390 self.fileName = LV_PREFIX[self.vdi_type] + self.uuid 

1391 self.path = os.path.join(self.sr.path, self.fileName) 

1392 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1393 

1394 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1395 if self.sr.lvActivator.get(oldUuid, False): 

1396 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1397 

1398 ns = NS_PREFIX_LVM + self.sr.uuid 

1399 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1400 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1401 RefCounter.reset(oldUuid, ns) 

1402 

1403 @override 

1404 def delete(self) -> None: 

1405 if len(self.children) > 0: 

1406 raise util.SMException("VDI %s has children, can't delete" % \ 

1407 self.uuid) 

1408 self.sr.lock() 

1409 try: 

1410 self.sr.lvmCache.remove(self.fileName) 

1411 self.sr.forgetVDI(self.uuid) 

1412 finally: 

1413 self.sr.unlock() 

1414 RefCounter.reset(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

1415 VDI.delete(self) 

1416 

1417 @override 

1418 def getSizePhys(self) -> int: 

1419 if self._sizePhys == -1: 

1420 self._loadInfoSizePhys() 

1421 return self._sizePhys 

1422 

1423 def _loadInfoSizePhys(self): 

1424 """Get the physical utilization of the COW image file. We do it individually 

1425 (and not using the COW batch scanner) as an optimization: this info is 

1426 relatively expensive and we need it only for VDI's involved in 

1427 coalescing.""" 

1428 if not VdiType.isCowImage(self.vdi_type): 

1429 return 

1430 self._activate() 

1431 self._sizePhys = self.cowutil.getSizePhys(self.path) 

1432 if self._sizePhys <= 0: 

1433 raise util.SMException("phys size of %s = %d" % \ 

1434 (self, self._sizePhys)) 

1435 

1436 @override 

1437 def getAllocatedSize(self) -> int: 

1438 if self._sizeAllocated == -1: 

1439 self._loadInfoSizeAllocated() 

1440 return self._sizeAllocated 

1441 

1442 def _loadInfoSizeAllocated(self): 

1443 """ 

1444 Get the allocated size of the COW volume. 

1445 """ 

1446 if not VdiType.isCowImage(self.vdi_type): 

1447 return 

1448 self._activate() 

1449 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1450 

1451 @override 

1452 def _loadInfoHidden(self) -> None: 

1453 if not VdiType.isCowImage(self.vdi_type): 

1454 self._hidden = self.sr.lvmCache.getHidden(self.fileName) 

1455 else: 

1456 VDI._loadInfoHidden(self) 

1457 

1458 @override 

1459 def _setHidden(self, hidden=True) -> None: 

1460 if not VdiType.isCowImage(self.vdi_type): 

1461 self._hidden = None 

1462 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1463 self._hidden = hidden 

1464 else: 

1465 VDI._setHidden(self, hidden) 

1466 

1467 @override 

1468 def __str__(self) -> str: 

1469 strType = self.vdi_type 

1470 if self.vdi_type == VdiType.RAW: 

1471 strType = "RAW" 

1472 strHidden = "" 

1473 if self.isHidden(): 

1474 strHidden = "*" 

1475 strSizePhys = "" 

1476 if self._sizePhys > 0: 

1477 strSizePhys = Util.num2str(self._sizePhys) 

1478 strSizeAllocated = "" 

1479 if self._sizeAllocated >= 0: 

1480 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1481 strActive = "n" 

1482 if self.lvActive: 

1483 strActive = "a" 

1484 if self.lvOpen: 

1485 strActive += "o" 

1486 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1487 Util.num2str(self.sizeVirt), strSizePhys, strSizeAllocated, 

1488 Util.num2str(self.sizeLV), strActive) 

1489 

1490 @override 

1491 def validate(self, fast=False) -> None: 

1492 if VdiType.isCowImage(self.vdi_type): 

1493 VDI.validate(self, fast) 

1494 

1495 def _setChainRw(self) -> List[str]: 

1496 """ 

1497 Set the readonly LV and children writable. 

1498 It's needed because the coalesce can be done by tapdisk directly 

1499 and it will need to write parent information for children. 

1500 The VDI we want to coalesce into it's parent need to be writable for libqcow coalesce part. 

1501 Return a list of the LV that were previously readonly to be made RO again after the coalesce. 

1502 """ 

1503 was_ro = [] 

1504 if self.lvReadonly: 

1505 self.sr.lvmCache.setReadonly(self.fileName, False) 

1506 was_ro.append(self.fileName) 

1507 

1508 for child in self.children: 

1509 if child.lvReadonly: 

1510 self.sr.lvmCache.setReadonly(child.fileName, False) 

1511 was_ro.append(child.fileName) 

1512 

1513 return was_ro 

1514 

1515 def _setChainRo(self, was_ro: List[str]) -> None: 

1516 """Set the list of LV in parameters to readonly""" 

1517 for lvName in was_ro: 

1518 self.sr.lvmCache.setReadonly(lvName, True) 

1519 

1520 @override 

1521 def _doCoalesce(self) -> None: 

1522 """LVMVDI parents must first be activated, inflated, and made writable""" 

1523 was_ro = [] 

1524 try: 

1525 self._activateChain() 

1526 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1527 self.parent.validate() 

1528 self.inflateParentForCoalesce() 

1529 was_ro = self._setChainRw() 

1530 VDI._doCoalesce(self) 

1531 finally: 

1532 self.parent._loadInfoSizePhys() 

1533 self.parent.deflate() 

1534 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1535 self._setChainRo(was_ro) 

1536 

1537 @override 

1538 def _setParent(self, parent) -> None: 

1539 self._activate() 

1540 if self.lvReadonly: 

1541 self.sr.lvmCache.setReadonly(self.fileName, False) 

1542 

1543 try: 

1544 self.cowutil.setParent(self.path, parent.path, parent.vdi_type == VdiType.RAW) 

1545 finally: 

1546 if self.lvReadonly: 

1547 self.sr.lvmCache.setReadonly(self.fileName, True) 

1548 self._deactivate() 

1549 self.parent = parent 

1550 self.parentUuid = parent.uuid 

1551 parent.children.append(self) 

1552 try: 

1553 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1554 Util.log("Updated the VDI-parent field for child %s with %s" % \ 

1555 (self.uuid, self.parentUuid)) 

1556 except: 

1557 Util.log("Failed to update the VDI-parent with %s for child %s" % \ 

1558 (self.parentUuid, self.uuid)) 

1559 

1560 def _activate(self): 

1561 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1562 

1563 def _activateChain(self): 

1564 vdi = self 

1565 while vdi: 

1566 vdi._activate() 

1567 vdi = vdi.parent 

1568 

1569 def _deactivate(self): 

1570 self.sr.lvActivator.deactivate(self.uuid, False) 

1571 

1572 @override 

1573 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1574 "ensure the virtual size of 'self' is at least 'size'" 

1575 self._activate() 

1576 if VdiType.isCowImage(self.vdi_type): 

1577 VDI._increaseSizeVirt(self, size, atomic) 

1578 return 

1579 

1580 # raw VDI case 

1581 offset = self.sizeLV 

1582 if self.sizeVirt < size: 

1583 oldSize = self.sizeLV 

1584 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1585 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1586 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1587 offset = oldSize 

1588 unfinishedZero = False 

1589 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1590 if jval: 

1591 unfinishedZero = True 

1592 offset = int(jval) 

1593 length = self.sizeLV - offset 

1594 if not length: 

1595 return 

1596 

1597 if unfinishedZero: 

1598 Util.log(" ==> Redoing unfinished zeroing out") 

1599 else: 

1600 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1601 str(offset)) 

1602 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1603 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1604 func = lambda: util.zeroOut(self.path, offset, length) 

1605 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1606 VDI.POLL_INTERVAL, 0) 

1607 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1608 

1609 @override 

1610 def _setSizeVirt(self, size) -> None: 

1611 """WARNING: do not call this method directly unless all VDIs in the 

1612 subtree are guaranteed to be unplugged (and remain so for the duration 

1613 of the operation): this operation is only safe for offline COW images.""" 

1614 self._activate() 

1615 jFile = self.lvmcowutil.createResizeJournal(self.sr.lvmCache, self.uuid) 

1616 try: 

1617 self.lvmcowutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size, jFile) 

1618 finally: 

1619 self.lvmcowutil.destroyResizeJournal(self.sr.lvmCache, self.uuid) 

1620 

1621 @override 

1622 def _queryCowBlocks(self) -> bytes: 

1623 self._activate() 

1624 return VDI._queryCowBlocks(self) 

1625 

1626 @override 

1627 def _calcExtraSpaceForCoalescing(self) -> int: 

1628 if not VdiType.isCowImage(self.parent.vdi_type): 

1629 return 0 # raw parents are never deflated in the first place 

1630 sizeCoalesced = self.lvmcowutil.calcVolumeSize(self._getCoalescedSizeData()) 

1631 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1632 return sizeCoalesced - self.parent.sizeLV 

1633 

1634 @override 

1635 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1636 """How much extra space in the SR will be required to 

1637 [live-]leaf-coalesce this VDI""" 

1638 # we can deflate the leaf to minimize the space requirements 

1639 deflateDiff = self.sizeLV - lvutil.calcSizeLV(self.getSizePhys()) 

1640 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1641 

1642 @override 

1643 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1644 return self._calcExtraSpaceForCoalescing() + \ 

1645 lvutil.calcSizeLV(self.getSizePhys()) 

1646 

1647 

1648class LinstorVDI(VDI): 

1649 """Object representing a VDI in a LINSTOR SR""" 

1650 

1651 VOLUME_LOCK_TIMEOUT = 30 

1652 

1653 @override 

1654 def load(self, info=None) -> None: 

1655 self.parentUuid = info.parentUuid 

1656 self.scanError = True 

1657 self.parent = None 

1658 self.children = [] 

1659 

1660 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1661 self.path = self.sr._linstor.build_device_path(self.fileName) 

1662 self.linstorcowutil = LinstorCowUtil(self.sr.xapi.session, self.sr._linstor, info.vdiType) 

1663 

1664 if not info: 

1665 try: 

1666 info = self.linstorcowutil.get_info(self.uuid) 

1667 except util.SMException: 

1668 Util.log( 

1669 ' [VDI {}: failed to read COW image metadata]'.format(self.uuid) 

1670 ) 

1671 return 

1672 

1673 self.parentUuid = info.parentUuid 

1674 self.sizeVirt = info.sizeVirt 

1675 self._sizePhys = -1 

1676 self._sizeAllocated = -1 

1677 self.drbd_size = -1 

1678 self._hidden = info.hidden 

1679 self.scanError = False 

1680 

1681 @override 

1682 def getSizePhys(self, fetch=False) -> int: 

1683 if self._sizePhys < 0 or fetch: 

1684 self._sizePhys = self.linstorcowutil.get_size_phys(self.uuid) 

1685 return self._sizePhys 

1686 

1687 def getDrbdSize(self, fetch=False): 

1688 if self.drbd_size < 0 or fetch: 

1689 self.drbd_size = self.linstorcowutil.get_drbd_size(self.uuid) 

1690 return self.drbd_size 

1691 

1692 @override 

1693 def getAllocatedSize(self) -> int: 

1694 if self._sizeAllocated == -1: 

1695 if VdiType.isCowImage(self.vdi_type): 

1696 self._sizeAllocated = self.linstorcowutil.get_allocated_size(self.uuid) 

1697 return self._sizeAllocated 

1698 

1699 def inflate(self, size): 

1700 if not VdiType.isCowImage(self.vdi_type): 

1701 return 

1702 self.sr.lock() 

1703 try: 

1704 # Ensure we use the real DRBD size and not the cached one. 

1705 # Why? Because this attribute can be changed if volume is resized by user. 

1706 self.drbd_size = self.getDrbdSize(fetch=True) 

1707 self.linstorcowutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1708 finally: 

1709 self.sr.unlock() 

1710 self.drbd_size = -1 

1711 self._sizePhys = -1 

1712 self._sizeAllocated = -1 

1713 

1714 def deflate(self): 

1715 if not VdiType.isCowImage(self.vdi_type): 

1716 return 

1717 self.sr.lock() 

1718 try: 

1719 # Ensure we use the real sizes and not the cached info. 

1720 self.drbd_size = self.getDrbdSize(fetch=True) 

1721 self._sizePhys = self.getSizePhys(fetch=True) 

1722 self.linstorcowutil.force_deflate(self.path, self._sizePhys, self.drbd_size, zeroize=False) 

1723 finally: 

1724 self.sr.unlock() 

1725 self.drbd_size = -1 

1726 self._sizePhys = -1 

1727 self._sizeAllocated = -1 

1728 

1729 def inflateFully(self): 

1730 if VdiType.isCowImage(self.vdi_type): 

1731 self.inflate(self.linstorcowutil.compute_volume_size(self.sizeVirt)) 

1732 

1733 @override 

1734 def rename(self, uuid) -> None: 

1735 Util.log('Renaming {} -> {} (path={})'.format( 

1736 self.uuid, uuid, self.path 

1737 )) 

1738 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1739 VDI.rename(self, uuid) 

1740 

1741 @override 

1742 def delete(self) -> None: 

1743 if len(self.children) > 0: 

1744 raise util.SMException( 

1745 'VDI {} has children, can\'t delete'.format(self.uuid) 

1746 ) 

1747 self.sr.lock() 

1748 try: 

1749 self.sr._linstor.destroy_volume(self.uuid) 

1750 self.sr.forgetVDI(self.uuid) 

1751 finally: 

1752 self.sr.unlock() 

1753 VDI.delete(self) 

1754 

1755 @override 

1756 def validate(self, fast=False) -> None: 

1757 if VdiType.isCowImage(self.vdi_type) and self.linstorcowutil.check(self.uuid, fast=fast) != CowUtil.CheckResult.Success: 

1758 raise util.SMException('COW image {} corrupted'.format(self)) 

1759 

1760 @override 

1761 def pause(self, failfast=False) -> None: 

1762 self.sr._linstor.ensure_volume_is_not_locked( 

1763 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1764 ) 

1765 return super(LinstorVDI, self).pause(failfast) 

1766 

1767 @override 

1768 def coalesce(self) -> int: 

1769 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1770 # Using another exception we can't execute the next coalesce calls. 

1771 return self.linstorcowutil.force_coalesce(self.path) 

1772 

1773 @override 

1774 def getParent(self) -> str: 

1775 return self.linstorcowutil.get_parent( 

1776 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1777 ) 

1778 

1779 @override 

1780 def repair(self, parent_uuid) -> None: 

1781 self.linstorcowutil.force_repair( 

1782 self.sr._linstor.get_device_path(parent_uuid) 

1783 ) 

1784 

1785 @override 

1786 def _relinkSkip(self) -> None: 

1787 abortFlag = IPCFlag(self.sr.uuid) 

1788 for child in self.children: 

1789 if abortFlag.test(FLAG_TYPE_ABORT): 

1790 raise AbortException('Aborting due to signal') 

1791 Util.log( 

1792 ' Relinking {} from {} to {}'.format( 

1793 child, self, self.parent 

1794 ) 

1795 ) 

1796 

1797 session = child.sr.xapi.session 

1798 sr_uuid = child.sr.uuid 

1799 vdi_uuid = child.uuid 

1800 try: 

1801 self.sr._linstor.ensure_volume_is_not_locked( 

1802 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1803 ) 

1804 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1805 child._setParent(self.parent) 

1806 finally: 

1807 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1808 self.children = [] 

1809 

1810 @override 

1811 def _setParent(self, parent) -> None: 

1812 self.sr._linstor.get_device_path(self.uuid) 

1813 self.linstorcowutil.force_parent(self.path, parent.path) 

1814 self.parent = parent 

1815 self.parentUuid = parent.uuid 

1816 parent.children.append(self) 

1817 try: 

1818 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1819 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1820 (self.uuid, self.parentUuid)) 

1821 except: 

1822 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1823 (self.uuid, self.parentUuid)) 

1824 

1825 @override 

1826 def _doCoalesce(self) -> None: 

1827 try: 

1828 self._activateChain() 

1829 self.parent.validate() 

1830 self._inflateParentForCoalesce() 

1831 VDI._doCoalesce(self) 

1832 finally: 

1833 self.parent.deflate() 

1834 

1835 def _activateChain(self): 

1836 vdi = self 

1837 while vdi: 

1838 try: 

1839 p = self.sr._linstor.get_device_path(vdi.uuid) 

1840 except Exception as e: 

1841 # Use SMException to skip coalesce. 

1842 # Otherwise the GC is stopped... 

1843 raise util.SMException(str(e)) 

1844 vdi = vdi.parent 

1845 

1846 @override 

1847 def _setHidden(self, hidden=True) -> None: 

1848 HIDDEN_TAG = 'hidden' 

1849 

1850 if not VdiType.isCowImage(self.vdi_type): 

1851 self._hidden = None 

1852 self.sr._linstor.update_volume_metadata(self.uuid, { 

1853 HIDDEN_TAG: hidden 

1854 }) 

1855 self._hidden = hidden 

1856 else: 

1857 VDI._setHidden(self, hidden) 

1858 

1859 @override 

1860 def _increaseSizeVirt(self, size, atomic=True): 

1861 if self.vdi_type == VdiType.RAW: 

1862 offset = self.drbd_size 

1863 if self.sizeVirt < size: 

1864 oldSize = self.drbd_size 

1865 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) 

1866 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) 

1867 self.sr._linstor.resize_volume(self.uuid, self.drbd_size) 

1868 offset = oldSize 

1869 unfinishedZero = False 

1870 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) 

1871 if jval: 

1872 unfinishedZero = True 

1873 offset = int(jval) 

1874 length = self.drbd_size - offset 

1875 if not length: 

1876 return 

1877 

1878 if unfinishedZero: 

1879 Util.log(" ==> Redoing unfinished zeroing out") 

1880 else: 

1881 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) 

1882 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1883 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1884 func = lambda: util.zeroOut(self.path, offset, length) 

1885 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

1886 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) 

1887 return 

1888 

1889 if self.sizeVirt >= size: 

1890 return 

1891 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1892 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1893 

1894 msize = self.linstorcowutil.get_max_resize_size(self.uuid) * 1024 * 1024 

1895 if (size <= msize): 

1896 self.linstorcowutil.set_size_virt_fast(self.path, size) 

1897 else: 

1898 if atomic: 

1899 vdiList = self._getAllSubtree() 

1900 self.sr.lock() 

1901 try: 

1902 self.sr.pauseVDIs(vdiList) 

1903 try: 

1904 self._setSizeVirt(size) 

1905 finally: 

1906 self.sr.unpauseVDIs(vdiList) 

1907 finally: 

1908 self.sr.unlock() 

1909 else: 

1910 self._setSizeVirt(size) 

1911 

1912 self.sizeVirt = self.linstorcowutil.get_size_virt(self.uuid) 

1913 

1914 @override 

1915 def _setSizeVirt(self, size) -> None: 

1916 jfile = self.uuid + '-jvhd' 

1917 self.sr._linstor.create_volume( 

1918 jfile, self.cowutil.getResizeJournalSize(), persistent=False, volume_name=jfile 

1919 ) 

1920 try: 

1921 self.inflate(self.linstorcowutil.compute_volume_size(size)) 

1922 self.linstorcowutil.set_size_virt(self.path, size, jfile) 

1923 finally: 

1924 try: 

1925 self.sr._linstor.destroy_volume(jfile) 

1926 except Exception: 

1927 # We can ignore it, in any case this volume is not persistent. 

1928 pass 

1929 

1930 @override 

1931 def _queryCowBlocks(self) -> bytes: 

1932 return self.linstorcowutil.get_block_bitmap(self.uuid) 

1933 

1934 def _inflateParentForCoalesce(self): 

1935 if not VdiType.isCowImage(self.parent.vdi_type): 

1936 return 

1937 inc = self._calcExtraSpaceForCoalescing() 

1938 if inc > 0: 

1939 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1940 

1941 @override 

1942 def _calcExtraSpaceForCoalescing(self) -> int: 

1943 if not VdiType.isCowImage(self.parent.vdi_type): 

1944 return 0 

1945 size_coalesced = self.linstorcowutil.compute_volume_size(self._getCoalescedSizeData()) 

1946 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1947 return size_coalesced - self.parent.getDrbdSize() 

1948 

1949 @override 

1950 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1951 assert self.getDrbdSize() > 0 

1952 assert self.getSizePhys() > 0 

1953 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1954 assert deflate_diff >= 0 

1955 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1956 

1957 @override 

1958 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1959 assert self.getSizePhys() > 0 

1960 return self._calcExtraSpaceForCoalescing() + \ 

1961 LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1962 

1963################################################################################ 

1964# 

1965# SR 

1966# 

1967class SR(object): 

1968 class LogFilter: 

1969 def __init__(self, sr): 

1970 self.sr = sr 

1971 self.stateLogged = False 

1972 self.prevState = {} 

1973 self.currState = {} 

1974 

1975 def logState(self): 

1976 changes = "" 

1977 self.currState.clear() 

1978 for vdi in self.sr.vdiTrees: 

1979 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1980 if not self.prevState.get(vdi.uuid) or \ 

1981 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1982 changes += self.currState[vdi.uuid] 

1983 

1984 for uuid in self.prevState: 

1985 if not self.currState.get(uuid): 

1986 changes += "Tree %s gone\n" % uuid 

1987 

1988 result = "SR %s (%d VDIs in %d COW trees): " % \ 

1989 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1990 

1991 if len(changes) > 0: 

1992 if self.stateLogged: 

1993 result += "showing only COW trees that changed:" 

1994 result += "\n%s" % changes 

1995 else: 

1996 result += "no changes" 

1997 

1998 for line in result.split("\n"): 

1999 Util.log("%s" % line) 

2000 self.prevState.clear() 

2001 for key, val in self.currState.items(): 

2002 self.prevState[key] = val 

2003 self.stateLogged = True 

2004 

2005 def logNewVDI(self, uuid): 

2006 if self.stateLogged: 

2007 Util.log("Found new VDI when scanning: %s" % uuid) 

2008 

2009 def _getTreeStr(self, vdi, indent=8): 

2010 treeStr = "%s%s\n" % (" " * indent, vdi) 

2011 for child in vdi.children: 

2012 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

2013 return treeStr 

2014 

2015 TYPE_FILE = "file" 

2016 TYPE_LVHD = "lvhd" 

2017 TYPE_LINSTOR = "linstor" 

2018 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

2019 

2020 LOCK_RETRY_INTERVAL = 3 

2021 LOCK_RETRY_ATTEMPTS = 20 

2022 LOCK_RETRY_ATTEMPTS_LOCK = 100 

2023 

2024 SCAN_RETRY_ATTEMPTS = 3 

2025 

2026 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

2027 TMP_RENAME_PREFIX = "OLD_" 

2028 

2029 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

2030 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

2031 

2032 @staticmethod 

2033 def getInstance(uuid, xapiSession, createLock=True, force=False): 

2034 xapi = XAPI(xapiSession, uuid) 

2035 type = normalizeType(xapi.srRecord["type"]) 

2036 if type == SR.TYPE_FILE: 

2037 return FileSR(uuid, xapi, createLock, force) 

2038 elif type == SR.TYPE_LVHD: 

2039 return LVMSR(uuid, xapi, createLock, force) 

2040 elif type == SR.TYPE_LINSTOR: 

2041 return LinstorSR(uuid, xapi, createLock, force) 

2042 raise util.SMException("SR type %s not recognized" % type) 

2043 

2044 def __init__(self, uuid, xapi, createLock, force): 

2045 self.logFilter = self.LogFilter(self) 

2046 self.uuid = uuid 

2047 self.path = "" 

2048 self.name = "" 

2049 self.vdis = {} 

2050 self.vdiTrees = [] 

2051 self.journaler = None 

2052 self.xapi = xapi 

2053 self._locked = 0 

2054 self._srLock = None 

2055 if createLock: 2055 ↛ 2056line 2055 didn't jump to line 2056, because the condition on line 2055 was never true

2056 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, self.uuid) 

2057 else: 

2058 Util.log("Requested no SR locking") 

2059 self.name = self.xapi.srRecord["name_label"] 

2060 self._failedCoalesceTargets = [] 

2061 

2062 if not self.xapi.isPluggedHere(): 

2063 if force: 2063 ↛ 2064line 2063 didn't jump to line 2064, because the condition on line 2063 was never true

2064 Util.log("SR %s not attached on this host, ignoring" % uuid) 

2065 else: 

2066 if not self.wait_for_plug(): 

2067 raise util.SMException("SR %s not attached on this host" % uuid) 

2068 

2069 if force: 2069 ↛ 2070line 2069 didn't jump to line 2070, because the condition on line 2069 was never true

2070 Util.log("Not checking if we are Master (SR %s)" % uuid) 

2071 elif not self.xapi.isMaster(): 2071 ↛ 2072line 2071 didn't jump to line 2072, because the condition on line 2071 was never true

2072 raise util.SMException("This host is NOT master, will not run") 

2073 

2074 self.no_space_candidates = {} 

2075 

2076 def msg_cleared(self, xapi_session, msg_ref): 

2077 try: 

2078 msg = xapi_session.xenapi.message.get_record(msg_ref) 

2079 except XenAPI.Failure: 

2080 return True 

2081 

2082 return msg is None 

2083 

2084 def check_no_space_candidates(self): 

2085 xapi_session = self.xapi.getSession() 

2086 

2087 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE) 

2088 if self.no_space_candidates: 

2089 if msg_id is None or self.msg_cleared(xapi_session, msg_id): 

2090 util.SMlog("Could not coalesce due to a lack of space " 

2091 f"in SR {self.uuid}") 

2092 msg_body = ("Unable to perform data coalesce due to a lack " 

2093 f"of space in SR {self.uuid}") 

2094 msg_id = xapi_session.xenapi.message.create( 

2095 'SM_GC_NO_SPACE', 

2096 3, 

2097 "SR", 

2098 self.uuid, 

2099 msg_body) 

2100 xapi_session.xenapi.SR.remove_from_sm_config( 

2101 self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2102 xapi_session.xenapi.SR.add_to_sm_config( 

2103 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id) 

2104 

2105 for candidate in self.no_space_candidates.values(): 

2106 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id) 

2107 elif msg_id is not None: 

2108 # Everything was coalescable, remove the message 

2109 xapi_session.xenapi.message.destroy(msg_id) 

2110 

2111 def clear_no_space_msg(self, vdi): 

2112 msg_id = None 

2113 try: 

2114 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE) 

2115 except XenAPI.Failure: 

2116 pass 

2117 

2118 self.no_space_candidates.pop(vdi.uuid, None) 

2119 if msg_id is not None: 2119 ↛ exitline 2119 didn't return from function 'clear_no_space_msg', because the condition on line 2119 was never false

2120 vdi.delConfig(VDI.DB_GC_NO_SPACE) 

2121 

2122 

2123 def wait_for_plug(self): 

2124 for _ in range(1, 10): 

2125 time.sleep(2) 

2126 if self.xapi.isPluggedHere(): 

2127 return True 

2128 return False 

2129 

2130 def gcEnabled(self, refresh=True): 

2131 if refresh: 

2132 self.xapi.srRecord = \ 

2133 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

2134 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

2135 Util.log("GC is disabled for this SR, abort") 

2136 return False 

2137 return True 

2138 

2139 def scan(self, force=False) -> None: 

2140 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

2141 update VDI objects if they already exist""" 

2142 pass 

2143 

2144 def scanLocked(self, force=False): 

2145 self.lock() 

2146 try: 

2147 self.scan(force) 

2148 finally: 

2149 self.unlock() 

2150 

2151 def getVDI(self, uuid): 

2152 return self.vdis.get(uuid) 

2153 

2154 def hasWork(self): 

2155 if len(self.findGarbage()) > 0: 

2156 return True 

2157 if self.findCoalesceable(): 

2158 return True 

2159 if self.findLeafCoalesceable(): 

2160 return True 

2161 if self.needUpdateBlockInfo(): 

2162 return True 

2163 return False 

2164 

2165 def findCoalesceable(self): 

2166 """Find a coalesceable VDI. Return a vdi that should be coalesced 

2167 (choosing one among all coalesceable candidates according to some 

2168 criteria) or None if there is no VDI that could be coalesced""" 

2169 

2170 candidates = [] 

2171 

2172 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

2173 if srSwitch == "false": 

2174 Util.log("Coalesce disabled for this SR") 

2175 return candidates 

2176 

2177 # finish any VDI for which a relink journal entry exists first 

2178 journals = self.journaler.getAll(VDI.JRN_RELINK) 

2179 for uuid in journals: 

2180 vdi = self.getVDI(uuid) 

2181 if vdi and vdi not in self._failedCoalesceTargets: 

2182 return vdi 

2183 

2184 for vdi in self.vdis.values(): 

2185 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

2186 candidates.append(vdi) 

2187 Util.log("%s is coalescable" % vdi.uuid) 

2188 

2189 self.xapi.update_task_progress("coalescable", len(candidates)) 

2190 

2191 # pick one in the tallest tree 

2192 treeHeight = dict() 

2193 for c in candidates: 

2194 height = c.getTreeRoot().getTreeHeight() 

2195 if treeHeight.get(height): 

2196 treeHeight[height].append(c) 

2197 else: 

2198 treeHeight[height] = [c] 

2199 

2200 freeSpace = self.getFreeSpace() 

2201 heights = list(treeHeight.keys()) 

2202 heights.sort(reverse=True) 

2203 for h in heights: 

2204 for c in treeHeight[h]: 

2205 spaceNeeded = c._calcExtraSpaceForCoalescing() 

2206 if spaceNeeded <= freeSpace: 

2207 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

2208 self.clear_no_space_msg(c) 

2209 return c 

2210 else: 

2211 self.no_space_candidates[c.uuid] = c 

2212 Util.log("No space to coalesce %s (free space: %d)" % \ 

2213 (c, freeSpace)) 

2214 return None 

2215 

2216 def getSwitch(self, key): 

2217 return self.xapi.srRecord["other_config"].get(key) 

2218 

2219 def forbiddenBySwitch(self, switch, condition, fail_msg): 

2220 srSwitch = self.getSwitch(switch) 

2221 ret = False 

2222 if srSwitch: 

2223 ret = srSwitch == condition 

2224 

2225 if ret: 

2226 Util.log(fail_msg) 

2227 

2228 return ret 

2229 

2230 def leafCoalesceForbidden(self): 

2231 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

2232 "false", 

2233 "Coalesce disabled for this SR") or 

2234 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

2235 VDI.LEAFCLSC_DISABLED, 

2236 "Leaf-coalesce disabled for this SR")) 

2237 

2238 def findLeafCoalesceable(self): 

2239 """Find leaf-coalesceable VDIs in each COW tree""" 

2240 

2241 candidates = [] 

2242 if self.leafCoalesceForbidden(): 

2243 return candidates 

2244 

2245 self.gatherLeafCoalesceable(candidates) 

2246 

2247 self.xapi.update_task_progress("coalescable", len(candidates)) 

2248 

2249 freeSpace = self.getFreeSpace() 

2250 for candidate in candidates: 

2251 # check the space constraints to see if leaf-coalesce is actually 

2252 # feasible for this candidate 

2253 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

2254 spaceNeededLive = spaceNeeded 

2255 if spaceNeeded > freeSpace: 

2256 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

2257 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

2258 spaceNeeded = spaceNeededLive 

2259 

2260 if spaceNeeded <= freeSpace: 

2261 Util.log("Leaf-coalesce candidate: %s" % candidate) 

2262 self.clear_no_space_msg(candidate) 

2263 return candidate 

2264 else: 

2265 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

2266 (candidate, freeSpace)) 

2267 if spaceNeededLive <= freeSpace: 

2268 Util.log("...but enough space if skip snap-coalesce") 

2269 candidate.setConfig(VDI.DB_LEAFCLSC, 

2270 VDI.LEAFCLSC_OFFLINE) 

2271 self.no_space_candidates[candidate.uuid] = candidate 

2272 

2273 return None 

2274 

2275 def gatherLeafCoalesceable(self, candidates): 

2276 for vdi in self.vdis.values(): 

2277 if not vdi.isLeafCoalesceable(): 

2278 continue 

2279 if vdi in self._failedCoalesceTargets: 

2280 continue 

2281 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2282 Util.log("Skipping reset-on-boot %s" % vdi) 

2283 continue 

2284 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2285 Util.log("Skipping allow_caching=true %s" % vdi) 

2286 continue 

2287 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2288 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2289 continue 

2290 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2291 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2292 continue 

2293 candidates.append(vdi) 

2294 

2295 def coalesce(self, vdi, dryRun=False): 

2296 """Coalesce vdi onto parent""" 

2297 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2298 if dryRun: 2298 ↛ 2299line 2298 didn't jump to line 2299, because the condition on line 2298 was never true

2299 return 

2300 

2301 try: 

2302 self._coalesce(vdi) 

2303 except util.SMException as e: 

2304 if isinstance(e, AbortException): 2304 ↛ 2305line 2304 didn't jump to line 2305, because the condition on line 2304 was never true

2305 self.cleanup() 

2306 raise 

2307 else: 

2308 self._failedCoalesceTargets.append(vdi) 

2309 Util.logException("coalesce") 

2310 Util.log("Coalesce failed, skipping") 

2311 self.cleanup() 

2312 

2313 def coalesceLeaf(self, vdi, dryRun=False): 

2314 """Leaf-coalesce vdi onto parent""" 

2315 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2316 if dryRun: 

2317 return 

2318 

2319 try: 

2320 uuid = vdi.uuid 

2321 try: 

2322 # "vdi" object will no longer be valid after this call 

2323 self._coalesceLeaf(vdi) 

2324 finally: 

2325 vdi = self.getVDI(uuid) 

2326 if vdi: 

2327 vdi.delConfig(vdi.DB_LEAFCLSC) 

2328 except AbortException: 

2329 self.cleanup() 

2330 raise 

2331 except (util.SMException, XenAPI.Failure) as e: 

2332 self._failedCoalesceTargets.append(vdi) 

2333 Util.logException("leaf-coalesce") 

2334 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2335 self.cleanup() 

2336 

2337 def garbageCollect(self, dryRun=False): 

2338 vdiList = self.findGarbage() 

2339 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2340 for vdi in vdiList: 

2341 Util.log(" %s" % vdi) 

2342 if not dryRun: 

2343 self.deleteVDIs(vdiList) 

2344 self.cleanupJournals(dryRun) 

2345 

2346 def findGarbage(self): 

2347 vdiList = [] 

2348 for vdi in self.vdiTrees: 

2349 vdiList.extend(vdi.getAllPrunable()) 

2350 return vdiList 

2351 

2352 def deleteVDIs(self, vdiList) -> None: 

2353 for vdi in vdiList: 

2354 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2355 raise AbortException("Aborting due to signal") 

2356 Util.log("Deleting unlinked VDI %s" % vdi) 

2357 self.deleteVDI(vdi) 

2358 

2359 def deleteVDI(self, vdi) -> None: 

2360 assert(len(vdi.children) == 0) 

2361 del self.vdis[vdi.uuid] 

2362 if vdi.parent: 2362 ↛ 2364line 2362 didn't jump to line 2364, because the condition on line 2362 was never false

2363 vdi.parent.children.remove(vdi) 

2364 if vdi in self.vdiTrees: 2364 ↛ 2365line 2364 didn't jump to line 2365, because the condition on line 2364 was never true

2365 self.vdiTrees.remove(vdi) 

2366 vdi.delete() 

2367 

2368 def forgetVDI(self, vdiUuid) -> None: 

2369 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2370 

2371 def pauseVDIs(self, vdiList) -> None: 

2372 paused = [] 

2373 failed = False 

2374 for vdi in vdiList: 

2375 try: 

2376 vdi.pause() 

2377 paused.append(vdi) 

2378 except: 

2379 Util.logException("pauseVDIs") 

2380 failed = True 

2381 break 

2382 

2383 if failed: 

2384 self.unpauseVDIs(paused) 

2385 raise util.SMException("Failed to pause VDIs") 

2386 

2387 def unpauseVDIs(self, vdiList): 

2388 failed = False 

2389 for vdi in vdiList: 

2390 try: 

2391 vdi.unpause() 

2392 except: 

2393 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2394 failed = True 

2395 if failed: 

2396 raise util.SMException("Failed to unpause VDIs") 

2397 

2398 def getFreeSpace(self) -> int: 

2399 return 0 

2400 

2401 def cleanup(self): 

2402 Util.log("In cleanup") 

2403 return 

2404 

2405 @override 

2406 def __str__(self) -> str: 

2407 if self.name: 

2408 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2409 else: 

2410 ret = "%s" % self.uuid 

2411 return ret 

2412 

2413 def lock(self): 

2414 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2415 signal to avoid deadlocking (trying to acquire the SR lock while the 

2416 lock is held by a process that is trying to abort us)""" 

2417 if not self._srLock: 

2418 return 

2419 

2420 if self._locked == 0: 

2421 abortFlag = IPCFlag(self.uuid) 

2422 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2423 if self._srLock.acquireNoblock(): 

2424 self._locked += 1 

2425 return 

2426 if abortFlag.test(FLAG_TYPE_ABORT): 

2427 raise AbortException("Abort requested") 

2428 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2429 raise util.SMException("Unable to acquire the SR lock") 

2430 

2431 self._locked += 1 

2432 

2433 def unlock(self): 

2434 if not self._srLock: 2434 ↛ 2436line 2434 didn't jump to line 2436, because the condition on line 2434 was never false

2435 return 

2436 assert(self._locked > 0) 

2437 self._locked -= 1 

2438 if self._locked == 0: 

2439 self._srLock.release() 

2440 

2441 def needUpdateBlockInfo(self) -> bool: 

2442 for vdi in self.vdis.values(): 

2443 if vdi.scanError or len(vdi.children) == 0: 

2444 continue 

2445 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2446 return True 

2447 return False 

2448 

2449 def updateBlockInfo(self) -> None: 

2450 for vdi in self.vdis.values(): 

2451 if vdi.scanError or len(vdi.children) == 0: 

2452 continue 

2453 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2454 vdi.updateBlockInfo() 

2455 

2456 def cleanupCoalesceJournals(self): 

2457 """Remove stale coalesce VDI indicators""" 

2458 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2459 for uuid, jval in entries.items(): 

2460 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2461 

2462 def cleanupJournals(self, dryRun=False): 

2463 """delete journal entries for non-existing VDIs""" 

2464 for t in [LVMVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2465 entries = self.journaler.getAll(t) 

2466 for uuid, jval in entries.items(): 

2467 if self.getVDI(uuid): 

2468 continue 

2469 if t == SR.JRN_CLONE: 

2470 baseUuid, clonUuid = jval.split("_") 

2471 if self.getVDI(baseUuid): 

2472 continue 

2473 Util.log(" Deleting stale '%s' journal entry for %s " 

2474 "(%s)" % (t, uuid, jval)) 

2475 if not dryRun: 

2476 self.journaler.remove(t, uuid) 

2477 

2478 def cleanupCache(self, maxAge=-1) -> int: 

2479 return 0 

2480 

2481 def _hasLeavesAttachedOn(self, vdi: VDI): 

2482 leaves = vdi.getAllLeaves() 

2483 leaves_vdi = [leaf.uuid for leaf in leaves] 

2484 return util.get_hosts_attached_on(self.xapi.session, leaves_vdi) 

2485 

2486 def _gc_running_file(self, vdi: VDI): 

2487 run_file = "gc_running_{}".format(vdi.uuid) 

2488 return os.path.join(NON_PERSISTENT_DIR, str(self.uuid), run_file) 

2489 

2490 def _create_running_file(self, vdi: VDI): 

2491 with open(self._gc_running_file(vdi), "w") as f: 

2492 f.write("1") 

2493 

2494 def _delete_running_file(self, vdi: VDI): 

2495 os.unlink(self._gc_running_file(vdi)) 

2496 

2497 def _coalesce(self, vdi: VDI): 

2498 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2498 ↛ 2501line 2498 didn't jump to line 2501, because the condition on line 2498 was never true

2499 # this means we had done the actual coalescing already and just 

2500 # need to finish relinking and/or refreshing the children 

2501 Util.log("==> Coalesce apparently already done: skipping") 

2502 else: 

2503 # JRN_COALESCE is used to check which VDI is being coalesced in 

2504 # order to decide whether to abort the coalesce. We remove the 

2505 # journal as soon as the COW coalesce step is done, because we 

2506 # don't expect the rest of the process to take long 

2507 

2508 if os.path.exists(self._gc_running_file(vdi)): 2508 ↛ 2509line 2508 didn't jump to line 2509, because the condition on line 2508 was never true

2509 util.SMlog("gc_running already exist for {}. Ignoring...".format(self.uuid)) 

2510 

2511 self._create_running_file(vdi) 

2512 

2513 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2514 host_refs = self._hasLeavesAttachedOn(vdi) 

2515 #TODO: this check of multiple host_refs should be done earlier in `is_coalesceable` to avoid stopping this late every time 

2516 if len(host_refs) > 1: 2516 ↛ 2517line 2516 didn't jump to line 2517, because the condition on line 2516 was never true

2517 util.SMlog("Not coalesceable, chain activated more than once") 

2518 raise Exception("Not coalesceable, chain activated more than once") #TODO: Use correct error 

2519 

2520 try: 

2521 if host_refs and vdi.cowutil.isCoalesceableOnRemote(): 2521 ↛ 2523line 2521 didn't jump to line 2523, because the condition on line 2521 was never true

2522 #Leaf opened on another host, we need to call online coalesce 

2523 util.SMlog("Remote coalesce for {}".format(vdi.path)) 

2524 vdi._doCoalesceOnHost(list(host_refs)[0]) 

2525 else: 

2526 util.SMlog("Offline coalesce for {}".format(vdi.path)) 

2527 vdi._doCoalesce() 

2528 except Exception as e: 

2529 util.SMlog("EXCEPTION while coalescing: {}".format(e)) 

2530 self._delete_running_file(vdi) 

2531 raise 

2532 

2533 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2534 self._delete_running_file(vdi) 

2535 

2536 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2537 

2538 # we now need to relink the children: lock the SR to prevent ops 

2539 # like SM.clone from manipulating the VDIs we'll be relinking and 

2540 # rescan the SR first in case the children changed since the last 

2541 # scan 

2542 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2543 

2544 self.lock() 

2545 try: 

2546 vdi.parent._tagChildrenForRelink() 

2547 self.scan() 

2548 vdi._relinkSkip() #TODO: We could check if the parent is already the right one before doing the relink, or we could do the relink a second time, it doesn't seem to cause issues 

2549 finally: 

2550 self.unlock() 

2551 # Reload the children to leave things consistent 

2552 vdi.parent._reloadChildren(vdi) 

2553 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2554 

2555 self.deleteVDI(vdi) 

2556 

2557 class CoalesceTracker: 

2558 GRACE_ITERATIONS = 2 

2559 MAX_ITERATIONS_NO_PROGRESS = 3 

2560 MAX_ITERATIONS = 10 

2561 MAX_INCREASE_FROM_MINIMUM = 1.2 

2562 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2563 " --> Final size {finSize}" 

2564 

2565 def __init__(self, sr): 

2566 self.itsNoProgress = 0 

2567 self.its = 0 

2568 self.minSize = float("inf") 

2569 self.history = [] 

2570 self.reason = "" 

2571 self.startSize = None 

2572 self.finishSize = None 

2573 self.sr = sr 

2574 self.grace_remaining = self.GRACE_ITERATIONS 

2575 

2576 def abortCoalesce(self, prevSize, curSize): 

2577 self.its += 1 

2578 self.history.append(self.HISTORY_STRING.format(its=self.its, 

2579 initSize=prevSize, 

2580 finSize=curSize)) 

2581 

2582 self.finishSize = curSize 

2583 

2584 if self.startSize is None: 

2585 self.startSize = prevSize 

2586 

2587 if curSize < self.minSize: 

2588 self.minSize = curSize 

2589 

2590 if prevSize < self.minSize: 

2591 self.minSize = prevSize 

2592 

2593 if self.its == 1: 

2594 # Skip evaluating conditions on first iteration 

2595 return False 

2596 

2597 if prevSize < curSize: 

2598 self.itsNoProgress += 1 

2599 Util.log("No progress, attempt:" 

2600 " {attempt}".format(attempt=self.itsNoProgress)) 

2601 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2602 else: 

2603 # We made progress 

2604 return False 

2605 

2606 if self.its > self.MAX_ITERATIONS: 

2607 max = self.MAX_ITERATIONS 

2608 self.reason = \ 

2609 "Max iterations ({max}) exceeded".format(max=max) 

2610 return True 

2611 

2612 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS: 

2613 max = self.MAX_ITERATIONS_NO_PROGRESS 

2614 self.reason = \ 

2615 "No progress made for {max} iterations".format(max=max) 

2616 return True 

2617 

2618 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2619 if curSize > maxSizeFromMin: 

2620 self.grace_remaining -= 1 

2621 if self.grace_remaining == 0: 

2622 self.reason = "Unexpected bump in size," \ 

2623 " compared to minimum achieved" 

2624 

2625 return True 

2626 

2627 return False 

2628 

2629 def printSizes(self): 

2630 Util.log("Starting size was {size}" 

2631 .format(size=self.startSize)) 

2632 Util.log("Final size was {size}" 

2633 .format(size=self.finishSize)) 

2634 Util.log("Minimum size achieved was {size}" 

2635 .format(size=self.minSize)) 

2636 

2637 def printReasoning(self): 

2638 Util.log("Aborted coalesce") 

2639 for hist in self.history: 

2640 Util.log(hist) 

2641 Util.log(self.reason) 

2642 self.printSizes() 

2643 

2644 def printSummary(self): 

2645 if self.its == 0: 

2646 return 

2647 

2648 if self.reason: 2648 ↛ 2649line 2648 didn't jump to line 2649, because the condition on line 2648 was never true

2649 Util.log("Aborted coalesce") 

2650 Util.log(self.reason) 

2651 else: 

2652 Util.log("Coalesce summary") 

2653 

2654 Util.log(f"Performed {self.its} iterations") 

2655 self.printSizes() 

2656 

2657 

2658 def _coalesceLeaf(self, vdi): 

2659 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2660 complete due to external changes, namely vdi_delete and vdi_snapshot 

2661 that alter leaf-coalescibility of vdi""" 

2662 tracker = self.CoalesceTracker(self) 

2663 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2664 prevSizePhys = vdi.getSizePhys() 

2665 if not self._snapshotCoalesce(vdi): 2665 ↛ 2666line 2665 didn't jump to line 2666, because the condition on line 2665 was never true

2666 return False 

2667 if tracker.abortCoalesce(prevSizePhys, vdi.getSizePhys()): 

2668 tracker.printReasoning() 

2669 raise util.SMException("VDI {uuid} could not be coalesced" 

2670 .format(uuid=vdi.uuid)) 

2671 tracker.printSummary() 

2672 return self._liveLeafCoalesce(vdi) 

2673 

2674 def calcStorageSpeed(self, startTime, endTime, coalescedSize): 

2675 speed = None 

2676 total_time = endTime - startTime 

2677 if total_time > 0: 

2678 speed = float(coalescedSize) / float(total_time) 

2679 return speed 

2680 

2681 def writeSpeedToFile(self, speed): 

2682 content = [] 

2683 speedFile = None 

2684 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2685 self.lock() 

2686 try: 

2687 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2688 lines = "" 

2689 if not os.path.isfile(path): 

2690 lines = str(speed) + "\n" 

2691 else: 

2692 speedFile = open(path, "r+") 

2693 content = speedFile.readlines() 

2694 content.append(str(speed) + "\n") 

2695 if len(content) > N_RUNNING_AVERAGE: 

2696 del content[0] 

2697 lines = "".join(content) 

2698 

2699 util.atomicFileWrite(path, VAR_RUN, lines) 

2700 finally: 

2701 if speedFile is not None: 

2702 speedFile.close() 

2703 Util.log("Closing file: {myfile}".format(myfile=path)) 

2704 self.unlock() 

2705 

2706 def recordStorageSpeed(self, startTime, endTime, coalescedSize): 

2707 speed = self.calcStorageSpeed(startTime, endTime, coalescedSize) 

2708 if speed is None: 

2709 return 

2710 

2711 self.writeSpeedToFile(speed) 

2712 

2713 def getStorageSpeed(self): 

2714 speedFile = None 

2715 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2716 self.lock() 

2717 try: 

2718 speed = None 

2719 if os.path.isfile(path): 

2720 speedFile = open(path) 

2721 content = speedFile.readlines() 

2722 try: 

2723 content = [float(i) for i in content] 

2724 except ValueError: 

2725 Util.log("Something bad in the speed log:{log}". 

2726 format(log=speedFile.readlines())) 

2727 return speed 

2728 

2729 if len(content): 

2730 speed = sum(content) / float(len(content)) 

2731 if speed <= 0: 2731 ↛ 2733line 2731 didn't jump to line 2733, because the condition on line 2731 was never true

2732 # Defensive, should be impossible. 

2733 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2734 format(speed=speed, uuid=self.uuid)) 

2735 speed = None 

2736 else: 

2737 Util.log("Speed file empty for SR: {uuid}". 

2738 format(uuid=self.uuid)) 

2739 else: 

2740 Util.log("Speed log missing for SR: {uuid}". 

2741 format(uuid=self.uuid)) 

2742 return speed 

2743 finally: 

2744 if not (speedFile is None): 

2745 speedFile.close() 

2746 self.unlock() 

2747 

2748 def _snapshotCoalesce(self, vdi): 

2749 # Note that because we are not holding any locks here, concurrent SM 

2750 # operations may change this tree under our feet. In particular, vdi 

2751 # can be deleted, or it can be snapshotted. 

2752 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2753 Util.log("Single-snapshotting %s" % vdi) 

2754 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2755 try: 

2756 ret = self.xapi.singleSnapshotVDI(vdi) 

2757 Util.log("Single-snapshot returned: %s" % ret) 

2758 except XenAPI.Failure as e: 

2759 if util.isInvalidVDI(e): 

2760 Util.log("The VDI appears to have been concurrently deleted") 

2761 return False 

2762 raise 

2763 self.scanLocked() 

2764 tempSnap = vdi.parent 

2765 if not tempSnap.isCoalesceable(): 

2766 Util.log("The VDI appears to have been concurrently snapshotted") 

2767 return False 

2768 Util.log("Coalescing parent %s" % tempSnap) 

2769 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2770 sizePhys = vdi.getSizePhys() 

2771 self._coalesce(tempSnap) 

2772 if not vdi.isLeafCoalesceable(): 

2773 Util.log("The VDI tree appears to have been altered since") 

2774 return False 

2775 return True 

2776 

2777 def _liveLeafCoalesce(self, vdi: VDI) -> bool: 

2778 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2779 self.lock() 

2780 try: 

2781 self.scan() 

2782 if not self.getVDI(vdi.uuid): 

2783 Util.log("The VDI appears to have been deleted meanwhile") 

2784 return False 

2785 if not vdi.isLeafCoalesceable(): 

2786 Util.log("The VDI is no longer leaf-coalesceable") 

2787 return False 

2788 

2789 uuid = vdi.uuid 

2790 vdi.pause(failfast=True) 

2791 try: 

2792 try: 

2793 # "vdi" object will no longer be valid after this call 

2794 self._create_running_file(vdi) 

2795 self._doCoalesceLeaf(vdi) 

2796 except: 

2797 Util.logException("_doCoalesceLeaf") 

2798 self._handleInterruptedCoalesceLeaf() 

2799 raise 

2800 finally: 

2801 vdi = self.getVDI(uuid) 

2802 if vdi: 

2803 vdi.ensureUnpaused() 

2804 self._delete_running_file(vdi) 

2805 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2806 if vdiOld: 

2807 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2808 self.deleteVDI(vdiOld) 

2809 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2810 finally: 

2811 self.cleanup() 

2812 self.unlock() 

2813 self.logFilter.logState() 

2814 return True 

2815 

2816 def _doCoalesceLeaf(self, vdi: VDI): 

2817 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2818 offline/atomic context""" 

2819 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2820 self._prepareCoalesceLeaf(vdi) 

2821 vdi.parent._setHidden(False) 

2822 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2823 vdi.validate(True) 

2824 vdi.parent.validate(True) 

2825 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2826 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2827 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 2827 ↛ 2828line 2827 didn't jump to line 2828, because the condition on line 2827 was never true

2828 Util.log("Leaf-coalesce forced, will not use timeout") 

2829 timeout = 0 

2830 vdi._coalesceCowImage(timeout) 

2831 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2832 vdi.parent.validate(True) 

2833 #vdi._verifyContents(timeout / 2) 

2834 

2835 # rename 

2836 vdiUuid = vdi.uuid 

2837 oldName = vdi.fileName 

2838 origParentUuid = vdi.parent.uuid 

2839 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2840 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2841 vdi.parent.rename(vdiUuid) 

2842 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2843 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2844 

2845 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2846 # garbage 

2847 

2848 # update the VDI record 

2849 if vdi.parent.vdi_type == VdiType.RAW: 2849 ↛ 2850line 2849 didn't jump to line 2850, because the condition on line 2849 was never true

2850 vdi.parent.setConfig(VDI.DB_VDI_TYPE, VdiType.RAW) 

2851 vdi.parent.delConfig(VDI.DB_VDI_BLOCKS) 

2852 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2853 

2854 self._updateNode(vdi) 

2855 

2856 # delete the obsolete leaf & inflate the parent (in that order, to 

2857 # minimize free space requirements) 

2858 parent = vdi.parent 

2859 vdi._setHidden(True) 

2860 vdi.parent.children = [] 

2861 vdi.parent = None 

2862 

2863 if parent.parent is None: 

2864 parent.delConfig(VDI.DB_VDI_PARENT) 

2865 

2866 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2867 freeSpace = self.getFreeSpace() 

2868 if freeSpace < extraSpace: 2868 ↛ 2871line 2868 didn't jump to line 2871, because the condition on line 2868 was never true

2869 # don't delete unless we need the space: deletion is time-consuming 

2870 # because it requires contacting the slaves, and we're paused here 

2871 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2872 self.deleteVDI(vdi) 

2873 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2874 

2875 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2876 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2877 

2878 self.forgetVDI(origParentUuid) 

2879 self._finishCoalesceLeaf(parent) 

2880 self._updateSlavesOnResize(parent) 

2881 

2882 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

2883 assert(VdiType.isCowImage(parent.vdi_type)) 

2884 extra = child.getSizePhys() - parent.getSizePhys() 

2885 if extra < 0: 2885 ↛ 2886line 2885 didn't jump to line 2886, because the condition on line 2885 was never true

2886 extra = 0 

2887 return extra 

2888 

2889 def _prepareCoalesceLeaf(self, vdi) -> None: 

2890 pass 

2891 

2892 def _updateNode(self, vdi) -> None: 

2893 pass 

2894 

2895 def _finishCoalesceLeaf(self, parent) -> None: 

2896 pass 

2897 

2898 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

2899 pass 

2900 

2901 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: 

2902 pass 

2903 

2904 def _updateSlavesOnResize(self, vdi) -> None: 

2905 pass 

2906 

2907 def _removeStaleVDIs(self, uuidsPresent) -> None: 

2908 for uuid in list(self.vdis.keys()): 

2909 if not uuid in uuidsPresent: 

2910 Util.log("VDI %s disappeared since last scan" % \ 

2911 self.vdis[uuid]) 

2912 del self.vdis[uuid] 

2913 

2914 def _handleInterruptedCoalesceLeaf(self) -> None: 

2915 """An interrupted leaf-coalesce operation may leave the COW tree in an 

2916 inconsistent state. If the old-leaf VDI is still present, we revert the 

2917 operation (in case the original error is persistent); otherwise we must 

2918 finish the operation""" 

2919 pass 

2920 

2921 def _buildTree(self, force): 

2922 self.vdiTrees = [] 

2923 for vdi in self.vdis.values(): 

2924 if vdi.parentUuid: 

2925 parent = self.getVDI(vdi.parentUuid) 

2926 if not parent: 

2927 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2928 self.vdiTrees.append(vdi) 

2929 continue 

2930 if force: 

2931 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2932 (vdi.parentUuid, vdi.uuid)) 

2933 self.vdiTrees.append(vdi) 

2934 continue 

2935 else: 

2936 raise util.SMException("Parent VDI %s of %s not " \ 

2937 "found" % (vdi.parentUuid, vdi.uuid)) 

2938 vdi.parent = parent 

2939 parent.children.append(vdi) 

2940 else: 

2941 self.vdiTrees.append(vdi) 

2942 

2943 

2944class FileSR(SR): 

2945 TYPE = SR.TYPE_FILE 

2946 CACHE_FILE_EXT = ".vhdcache" 

2947 # cache cleanup actions 

2948 CACHE_ACTION_KEEP = 0 

2949 CACHE_ACTION_REMOVE = 1 

2950 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2951 

2952 def __init__(self, uuid, xapi, createLock, force): 

2953 SR.__init__(self, uuid, xapi, createLock, force) 

2954 self.path = "/var/run/sr-mount/%s" % self.uuid 

2955 self.journaler = fjournaler.Journaler(self.path) 

2956 

2957 @override 

2958 def scan(self, force=False) -> None: 

2959 if not util.pathexists(self.path): 

2960 raise util.SMException("directory %s not found!" % self.uuid) 

2961 

2962 uuidsPresent: List[str] = [] 

2963 

2964 for vdi_type in VDI_COW_TYPES: 

2965 scan_result = self._scan(vdi_type, force) 

2966 for uuid, image_info in scan_result.items(): 

2967 vdi = self.getVDI(uuid) 

2968 if not vdi: 

2969 self.logFilter.logNewVDI(uuid) 

2970 vdi = FileVDI(self, uuid, vdi_type) 

2971 self.vdis[uuid] = vdi 

2972 vdi.load(image_info) 

2973 uuidsPresent.extend(scan_result.keys()) 

2974 

2975 rawList = [x for x in os.listdir(self.path) if x.endswith(VdiTypeExtension.RAW)] 

2976 for rawName in rawList: 

2977 uuid = FileVDI.extractUuid(rawName) 

2978 uuidsPresent.append(uuid) 

2979 vdi = self.getVDI(uuid) 

2980 if not vdi: 

2981 self.logFilter.logNewVDI(uuid) 

2982 vdi = FileVDI(self, uuid, VdiType.RAW) 

2983 self.vdis[uuid] = vdi 

2984 self._removeStaleVDIs(uuidsPresent) 

2985 self._buildTree(force) 

2986 self.logFilter.logState() 

2987 self._handleInterruptedCoalesceLeaf() 

2988 

2989 @override 

2990 def getFreeSpace(self) -> int: 

2991 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2992 

2993 @override 

2994 def deleteVDIs(self, vdiList) -> None: 

2995 rootDeleted = False 

2996 for vdi in vdiList: 

2997 if not vdi.parent: 

2998 rootDeleted = True 

2999 break 

3000 SR.deleteVDIs(self, vdiList) 

3001 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

3002 self.xapi.markCacheSRsDirty() 

3003 

3004 @override 

3005 def cleanupCache(self, maxAge=-1) -> int: 

3006 """Clean up IntelliCache cache files. Caches for leaf nodes are 

3007 removed when the leaf node no longer exists or its allow-caching 

3008 attribute is not set. Caches for parent nodes are removed when the 

3009 parent node no longer exists or it hasn't been used in more than 

3010 <maxAge> hours. 

3011 Return number of caches removed. 

3012 """ 

3013 numRemoved = 0 

3014 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

3015 Util.log("Found %d cache files" % len(cacheFiles)) 

3016 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

3017 for cacheFile in cacheFiles: 

3018 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

3019 action = self.CACHE_ACTION_KEEP 

3020 rec = self.xapi.getRecordVDI(uuid) 

3021 if not rec: 

3022 Util.log("Cache %s: VDI doesn't exist" % uuid) 

3023 action = self.CACHE_ACTION_REMOVE 

3024 elif rec["managed"] and not rec["allow_caching"]: 

3025 Util.log("Cache %s: caching disabled" % uuid) 

3026 action = self.CACHE_ACTION_REMOVE 

3027 elif not rec["managed"] and maxAge >= 0: 

3028 lastAccess = datetime.datetime.fromtimestamp( \ 

3029 os.path.getatime(os.path.join(self.path, cacheFile))) 

3030 if lastAccess < cutoff: 

3031 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

3032 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

3033 

3034 if action == self.CACHE_ACTION_KEEP: 

3035 Util.log("Keeping cache %s" % uuid) 

3036 continue 

3037 

3038 lockId = uuid 

3039 parentUuid = None 

3040 if rec and rec["managed"]: 

3041 parentUuid = rec["sm_config"].get("vhd-parent") 

3042 if parentUuid: 

3043 lockId = parentUuid 

3044 

3045 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

3046 cacheLock.acquire() 

3047 try: 

3048 if self._cleanupCache(uuid, action): 

3049 numRemoved += 1 

3050 finally: 

3051 cacheLock.release() 

3052 return numRemoved 

3053 

3054 def _cleanupCache(self, uuid, action): 

3055 assert(action != self.CACHE_ACTION_KEEP) 

3056 rec = self.xapi.getRecordVDI(uuid) 

3057 if rec and rec["allow_caching"]: 

3058 Util.log("Cache %s appears to have become valid" % uuid) 

3059 return False 

3060 

3061 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

3062 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

3063 if tapdisk: 

3064 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

3065 Util.log("Cache %s still in use" % uuid) 

3066 return False 

3067 Util.log("Shutting down tapdisk for %s" % fullPath) 

3068 tapdisk.shutdown() 

3069 

3070 Util.log("Deleting file %s" % fullPath) 

3071 os.unlink(fullPath) 

3072 return True 

3073 

3074 def _isCacheFileName(self, name): 

3075 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

3076 name.endswith(self.CACHE_FILE_EXT) 

3077 

3078 def _scan(self, vdi_type, force): 

3079 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3080 error = False 

3081 pattern = os.path.join(self.path, "*%s" % VDI_TYPE_TO_EXTENSION[vdi_type]) 

3082 scan_result = getCowUtil(vdi_type).getAllInfoFromVG(pattern, FileVDI.extractUuid) 

3083 for uuid, vdiInfo in scan_result.items(): 

3084 if vdiInfo.error: 

3085 error = True 

3086 break 

3087 if not error: 

3088 return scan_result 

3089 Util.log("Scan error on attempt %d" % i) 

3090 if force: 

3091 return scan_result 

3092 raise util.SMException("Scan error") 

3093 

3094 @override 

3095 def deleteVDI(self, vdi) -> None: 

3096 self._checkSlaves(vdi) 

3097 SR.deleteVDI(self, vdi) 

3098 

3099 def _checkSlaves(self, vdi): 

3100 onlineHosts = self.xapi.getOnlineHosts() 

3101 abortFlag = IPCFlag(self.uuid) 

3102 for pbdRecord in self.xapi.getAttachedPBDs(): 

3103 hostRef = pbdRecord["host"] 

3104 if hostRef == self.xapi._hostRef: 

3105 continue 

3106 if abortFlag.test(FLAG_TYPE_ABORT): 

3107 raise AbortException("Aborting due to signal") 

3108 try: 

3109 self._checkSlave(hostRef, vdi) 

3110 except util.CommandException: 

3111 if hostRef in onlineHosts: 

3112 raise 

3113 

3114 def _checkSlave(self, hostRef, vdi): 

3115 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

3116 Util.log("Checking with slave: %s" % repr(call)) 

3117 _host = self.xapi.session.xenapi.host 

3118 text = _host.call_plugin( * call) 

3119 

3120 @override 

3121 def _handleInterruptedCoalesceLeaf(self) -> None: 

3122 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3123 for uuid, parentUuid in entries.items(): 

3124 fileList = os.listdir(self.path) 

3125 childName = uuid + VdiTypeExtension.VHD 

3126 tmpChildName = self.TMP_RENAME_PREFIX + uuid + VdiTypeExtension.VHD 

3127 parentName1 = parentUuid + VdiTypeExtension.VHD 

3128 parentName2 = parentUuid + VdiTypeExtension.RAW 

3129 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

3130 if parentPresent or tmpChildName in fileList: 

3131 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3132 else: 

3133 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3134 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3135 vdi = self.getVDI(uuid) 

3136 if vdi: 

3137 vdi.ensureUnpaused() 

3138 

3139 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3140 Util.log("*** UNDO LEAF-COALESCE") 

3141 parent = self.getVDI(parentUuid) 

3142 if not parent: 

3143 parent = self.getVDI(childUuid) 

3144 if not parent: 

3145 raise util.SMException("Neither %s nor %s found" % \ 

3146 (parentUuid, childUuid)) 

3147 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3148 parent.rename(parentUuid) 

3149 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3150 

3151 child = self.getVDI(childUuid) 

3152 if not child: 

3153 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3154 if not child: 

3155 raise util.SMException("Neither %s nor %s found" % \ 

3156 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3157 Util.log("Renaming child back to %s" % childUuid) 

3158 child.rename(childUuid) 

3159 Util.log("Updating the VDI record") 

3160 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3161 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3162 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3163 

3164 if child.isHidden(): 

3165 child._setHidden(False) 

3166 if not parent.isHidden(): 

3167 parent._setHidden(True) 

3168 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3169 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3170 Util.log("*** leaf-coalesce undo successful") 

3171 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3172 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3173 

3174 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3175 Util.log("*** FINISH LEAF-COALESCE") 

3176 vdi = self.getVDI(childUuid) 

3177 if not vdi: 

3178 Util.log(f"_finishInterruptedCoalesceLeaf, vdi {childUuid} not found, aborting") 

3179 raise util.SMException("VDI %s not found" % childUuid) 

3180 try: 

3181 self.forgetVDI(parentUuid) 

3182 except XenAPI.Failure: 

3183 Util.logException('_finishInterruptedCoalesceLeaf') 

3184 pass 

3185 self._updateSlavesOnResize(vdi) 

3186 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3187 Util.log("*** finished leaf-coalesce successfully") 

3188 

3189 

3190class LVMSR(SR): 

3191 TYPE = SR.TYPE_LVHD 

3192 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

3193 

3194 def __init__(self, uuid, xapi, createLock, force): 

3195 SR.__init__(self, uuid, xapi, createLock, force) 

3196 self.vgName = "%s%s" % (VG_PREFIX, self.uuid) 

3197 self.path = os.path.join(VG_LOCATION, self.vgName) 

3198 

3199 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) 

3200 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) 

3201 lvm_conf = other_conf.get('lvm-conf') if other_conf else None 

3202 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) 

3203 

3204 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

3205 self.journaler = journaler.Journaler(self.lvmCache) 

3206 

3207 @override 

3208 def deleteVDI(self, vdi) -> None: 

3209 if self.lvActivator.get(vdi.uuid, False): 

3210 self.lvActivator.deactivate(vdi.uuid, False) 

3211 self._checkSlaves(vdi) 

3212 SR.deleteVDI(self, vdi) 

3213 

3214 @override 

3215 def forgetVDI(self, vdiUuid) -> None: 

3216 SR.forgetVDI(self, vdiUuid) 

3217 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

3218 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

3219 

3220 @override 

3221 def getFreeSpace(self) -> int: 

3222 stats = lvutil._getVGstats(self.vgName) 

3223 return stats['physical_size'] - stats['physical_utilisation'] 

3224 

3225 @override 

3226 def cleanup(self): 

3227 if not self.lvActivator.deactivateAll(): 

3228 Util.log("ERROR deactivating LVs while cleaning up") 

3229 

3230 @override 

3231 def needUpdateBlockInfo(self) -> bool: 

3232 for vdi in self.vdis.values(): 

3233 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3234 continue 

3235 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3236 return True 

3237 return False 

3238 

3239 @override 

3240 def updateBlockInfo(self) -> None: 

3241 numUpdated = 0 

3242 for vdi in self.vdis.values(): 

3243 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3244 continue 

3245 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3246 vdi.updateBlockInfo() 

3247 numUpdated += 1 

3248 if numUpdated: 

3249 # deactivate the LVs back sooner rather than later. If we don't 

3250 # now, by the time this thread gets to deactivations, another one 

3251 # might have leaf-coalesced a node and deleted it, making the child 

3252 # inherit the refcount value and preventing the correct decrement 

3253 self.cleanup() 

3254 

3255 @override 

3256 def scan(self, force=False) -> None: 

3257 vdis = self._scan(force) 

3258 for uuid, vdiInfo in vdis.items(): 

3259 vdi = self.getVDI(uuid) 

3260 if not vdi: 

3261 self.logFilter.logNewVDI(uuid) 

3262 vdi = LVMVDI(self, uuid, vdiInfo.vdiType) 

3263 self.vdis[uuid] = vdi 

3264 vdi.load(vdiInfo) 

3265 self._removeStaleVDIs(vdis.keys()) 

3266 self._buildTree(force) 

3267 self.logFilter.logState() 

3268 self._handleInterruptedCoalesceLeaf() 

3269 

3270 def _scan(self, force): 

3271 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3272 error = False 

3273 self.lvmCache.refresh() 

3274 vdis = LvmCowUtil.getVDIInfo(self.lvmCache) 

3275 for uuid, vdiInfo in vdis.items(): 

3276 if vdiInfo.scanError: 

3277 error = True 

3278 break 

3279 if not error: 

3280 return vdis 

3281 Util.log("Scan error, retrying (%d)" % i) 

3282 if force: 

3283 return vdis 

3284 raise util.SMException("Scan error") 

3285 

3286 @override 

3287 def _removeStaleVDIs(self, uuidsPresent) -> None: 

3288 for uuid in list(self.vdis.keys()): 

3289 if not uuid in uuidsPresent: 

3290 Util.log("VDI %s disappeared since last scan" % \ 

3291 self.vdis[uuid]) 

3292 del self.vdis[uuid] 

3293 if self.lvActivator.get(uuid, False): 

3294 self.lvActivator.remove(uuid, False) 

3295 

3296 @override 

3297 def _liveLeafCoalesce(self, vdi) -> bool: 

3298 """If the parent is raw and the child was resized (virt. size), then 

3299 we'll need to resize the parent, which can take a while due to zeroing 

3300 out of the extended portion of the LV. Do it before pausing the child 

3301 to avoid a protracted downtime""" 

3302 if not VdiType.isCowImage(vdi.parent.vdi_type) and vdi.sizeVirt > vdi.parent.sizeVirt: 

3303 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3304 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

3305 

3306 return SR._liveLeafCoalesce(self, vdi) 

3307 

3308 @override 

3309 def _prepareCoalesceLeaf(self, vdi) -> None: 

3310 vdi._activateChain() 

3311 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3312 vdi.deflate() 

3313 vdi.inflateParentForCoalesce() 

3314 

3315 @override 

3316 def _updateNode(self, vdi) -> None: 

3317 # fix the refcounts: the remaining node should inherit the binary 

3318 # refcount from the leaf (because if it was online, it should remain 

3319 # refcounted as such), but the normal refcount from the parent (because 

3320 # this node is really the parent node) - minus 1 if it is online (since 

3321 # non-leaf nodes increment their normal counts when they are online and 

3322 # we are now a leaf, storing that 1 in the binary refcount). 

3323 ns = NS_PREFIX_LVM + self.uuid 

3324 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

3325 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

3326 pCnt = pCnt - cBcnt 

3327 assert(pCnt >= 0) 

3328 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

3329 

3330 @override 

3331 def _finishCoalesceLeaf(self, parent) -> None: 

3332 if not parent.isSnapshot() or parent.isAttachedRW(): 

3333 parent.inflateFully() 

3334 else: 

3335 parent.deflate() 

3336 

3337 @override 

3338 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3339 return parent.lvmcowutil.calcVolumeSize(parent.sizeVirt) - parent.sizeLV 

3340 

3341 @override 

3342 def _handleInterruptedCoalesceLeaf(self) -> None: 

3343 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3344 for uuid, parentUuid in entries.items(): 

3345 undo = False 

3346 for prefix in LV_PREFIX.values(): 

3347 parentLV = prefix + parentUuid 

3348 undo = self.lvmCache.checkLV(parentLV) 

3349 if undo: 

3350 break 

3351 

3352 if not undo: 

3353 for prefix in LV_PREFIX.values(): 

3354 tmpChildLV = prefix + uuid 

3355 undo = self.lvmCache.checkLV(tmpChildLV) 

3356 if undo: 

3357 break 

3358 

3359 if undo: 

3360 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3361 else: 

3362 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3363 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3364 vdi = self.getVDI(uuid) 

3365 if vdi: 

3366 vdi.ensureUnpaused() 

3367 

3368 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3369 Util.log("*** UNDO LEAF-COALESCE") 

3370 parent = self.getVDI(parentUuid) 

3371 if not parent: 

3372 parent = self.getVDI(childUuid) 

3373 if not parent: 

3374 raise util.SMException("Neither %s nor %s found" % \ 

3375 (parentUuid, childUuid)) 

3376 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3377 parent.rename(parentUuid) 

3378 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3379 

3380 child = self.getVDI(childUuid) 

3381 if not child: 

3382 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3383 if not child: 

3384 raise util.SMException("Neither %s nor %s found" % \ 

3385 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3386 Util.log("Renaming child back to %s" % childUuid) 

3387 child.rename(childUuid) 

3388 Util.log("Updating the VDI record") 

3389 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3390 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3391 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3392 

3393 # refcount (best effort - assume that it had succeeded if the 

3394 # second rename succeeded; if not, this adjustment will be wrong, 

3395 # leading to a non-deactivation of the LV) 

3396 ns = NS_PREFIX_LVM + self.uuid 

3397 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3398 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3399 pCnt = pCnt + cBcnt 

3400 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3401 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3402 

3403 parent.deflate() 

3404 child.inflateFully() 

3405 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3406 if child.isHidden(): 

3407 child._setHidden(False) 

3408 if not parent.isHidden(): 

3409 parent._setHidden(True) 

3410 if not parent.lvReadonly: 

3411 self.lvmCache.setReadonly(parent.fileName, True) 

3412 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3413 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3414 Util.log("*** leaf-coalesce undo successful") 

3415 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3416 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3417 

3418 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3419 Util.log("*** FINISH LEAF-COALESCE") 

3420 vdi = self.getVDI(childUuid) 

3421 if not vdi: 

3422 raise util.SMException("VDI %s not found" % childUuid) 

3423 vdi.inflateFully() 

3424 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3425 try: 

3426 self.forgetVDI(parentUuid) 

3427 except XenAPI.Failure: 

3428 pass 

3429 self._updateSlavesOnResize(vdi) 

3430 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3431 Util.log("*** finished leaf-coalesce successfully") 

3432 

3433 def _checkSlaves(self, vdi): 

3434 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3435 try to check all slaves, including those that the Agent believes are 

3436 offline, but ignore failures for offline hosts. This is to avoid cases 

3437 where the Agent thinks a host is offline but the host is up.""" 

3438 args = {"vgName": self.vgName, 

3439 "action1": "deactivateNoRefcount", 

3440 "lvName1": vdi.fileName, 

3441 "action2": "cleanupLockAndRefcount", 

3442 "uuid2": vdi.uuid, 

3443 "ns2": NS_PREFIX_LVM + self.uuid} 

3444 onlineHosts = self.xapi.getOnlineHosts() 

3445 abortFlag = IPCFlag(self.uuid) 

3446 for pbdRecord in self.xapi.getAttachedPBDs(): 

3447 hostRef = pbdRecord["host"] 

3448 if hostRef == self.xapi._hostRef: 

3449 continue 

3450 if abortFlag.test(FLAG_TYPE_ABORT): 

3451 raise AbortException("Aborting due to signal") 

3452 Util.log("Checking with slave %s (path %s)" % ( 

3453 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3454 try: 

3455 self.xapi.ensureInactive(hostRef, args) 

3456 except XenAPI.Failure: 

3457 if hostRef in onlineHosts: 

3458 raise 

3459 

3460 @override 

3461 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

3462 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3463 if not slaves: 

3464 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3465 child) 

3466 return 

3467 

3468 tmpName = child.vdi_type + self.TMP_RENAME_PREFIX + child.uuid 

3469 args = {"vgName": self.vgName, 

3470 "action1": "deactivateNoRefcount", 

3471 "lvName1": tmpName, 

3472 "action2": "deactivateNoRefcount", 

3473 "lvName2": child.fileName, 

3474 "action3": "refresh", 

3475 "lvName3": child.fileName, 

3476 "action4": "refresh", 

3477 "lvName4": parent.fileName} 

3478 for slave in slaves: 

3479 Util.log("Updating %s, %s, %s on slave %s" % \ 

3480 (tmpName, child.fileName, parent.fileName, 

3481 self.xapi.getRecordHost(slave)['hostname'])) 

3482 text = self.xapi.session.xenapi.host.call_plugin( \ 

3483 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3484 Util.log("call-plugin returned: '%s'" % text) 

3485 

3486 @override 

3487 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: 

3488 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3489 if not slaves: 

3490 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3491 return 

3492 

3493 args = {"vgName": self.vgName, 

3494 "action1": "deactivateNoRefcount", 

3495 "lvName1": oldNameLV, 

3496 "action2": "refresh", 

3497 "lvName2": vdi.fileName, 

3498 "action3": "cleanupLockAndRefcount", 

3499 "uuid3": origParentUuid, 

3500 "ns3": NS_PREFIX_LVM + self.uuid} 

3501 for slave in slaves: 

3502 Util.log("Updating %s to %s on slave %s" % \ 

3503 (oldNameLV, vdi.fileName, 

3504 self.xapi.getRecordHost(slave)['hostname'])) 

3505 text = self.xapi.session.xenapi.host.call_plugin( \ 

3506 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3507 Util.log("call-plugin returned: '%s'" % text) 

3508 

3509 @override 

3510 def _updateSlavesOnResize(self, vdi) -> None: 

3511 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3512 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3513 if not slaves: 

3514 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3515 return 

3516 LvmCowUtil.refreshVolumeOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3517 vdi.fileName, vdi.uuid, slaves) 

3518 

3519 

3520class LinstorSR(SR): 

3521 TYPE = SR.TYPE_LINSTOR 

3522 

3523 def __init__(self, uuid, xapi, createLock, force): 

3524 if not LINSTOR_AVAILABLE: 

3525 raise util.SMException( 

3526 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3527 ) 

3528 

3529 SR.__init__(self, uuid, xapi, createLock, force) 

3530 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3531 

3532 class LinstorProxy: 

3533 def __init__(self, sr: LinstorSR) -> None: 

3534 self.sr = sr 

3535 

3536 def __getattr__(self, attr: str) -> Any: 

3537 assert self.sr, "Cannot use `LinstorProxy` without valid `LinstorVolumeManager` instance" 

3538 return getattr(self.sr._linstor, attr) 

3539 

3540 self._linstor_proxy = LinstorProxy(self) 

3541 self._reloadLinstor(journaler_only=True) 

3542 

3543 @override 

3544 def deleteVDI(self, vdi) -> None: 

3545 self._checkSlaves(vdi) 

3546 SR.deleteVDI(self, vdi) 

3547 

3548 @override 

3549 def getFreeSpace(self) -> int: 

3550 return self._linstor.max_volume_size_allowed 

3551 

3552 @override 

3553 def scan(self, force=False) -> None: 

3554 all_vdi_info = self._scan(force) 

3555 for uuid, vdiInfo in all_vdi_info.items(): 

3556 # When vdiInfo is None, the VDI is RAW. 

3557 vdi = self.getVDI(uuid) 

3558 if not vdi: 

3559 self.logFilter.logNewVDI(uuid) 

3560 vdi = LinstorVDI(self, uuid, vdiInfo.vdiType if vdiInfo else VdiType.RAW) 

3561 self.vdis[uuid] = vdi 

3562 if vdiInfo: 

3563 vdi.load(vdiInfo) 

3564 self._removeStaleVDIs(all_vdi_info.keys()) 

3565 self._buildTree(force) 

3566 self.logFilter.logState() 

3567 self._handleInterruptedCoalesceLeaf() 

3568 

3569 @override 

3570 def pauseVDIs(self, vdiList) -> None: 

3571 self._linstor.ensure_volume_list_is_not_locked( 

3572 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3573 ) 

3574 return super(LinstorSR, self).pauseVDIs(vdiList) 

3575 

3576 def _reloadLinstor(self, journaler_only=False): 

3577 session = self.xapi.session 

3578 host_ref = util.get_this_host_ref(session) 

3579 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3580 

3581 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3582 if pbd is None: 

3583 raise util.SMException('Failed to find PBD') 

3584 

3585 dconf = session.xenapi.PBD.get_device_config(pbd) 

3586 group_name = dconf['group-name'] 

3587 

3588 controller_uri = get_controller_uri() 

3589 self.journaler = LinstorJournaler( 

3590 controller_uri, group_name, logger=util.SMlog 

3591 ) 

3592 

3593 if journaler_only: 

3594 return 

3595 

3596 self._linstor = LinstorVolumeManager( 

3597 controller_uri, 

3598 group_name, 

3599 repair=True, 

3600 logger=util.SMlog 

3601 ) 

3602 

3603 def _scan(self, force): 

3604 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3605 self._reloadLinstor() 

3606 error = False 

3607 try: 

3608 all_vdi_info = self._load_vdi_info() 

3609 for uuid, vdiInfo in all_vdi_info.items(): 

3610 if vdiInfo and vdiInfo.error: 

3611 error = True 

3612 break 

3613 if not error: 

3614 return all_vdi_info 

3615 Util.log('Scan error, retrying ({})'.format(i)) 

3616 except Exception as e: 

3617 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3618 Util.log(traceback.format_exc()) 

3619 

3620 if force: 

3621 return all_vdi_info 

3622 raise util.SMException('Scan error') 

3623 

3624 def _load_vdi_info(self): 

3625 all_vdi_info = {} 

3626 

3627 # TODO: Ensure metadata contains the right info. 

3628 

3629 all_volume_info = self._linstor.get_volumes_with_info() 

3630 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3631 for vdi_uuid, volume_info in all_volume_info.items(): 

3632 vdi_type = VdiType.RAW 

3633 try: 

3634 volume_metadata = volumes_metadata[vdi_uuid] 

3635 if not volume_info.name and not list(volume_metadata.items()): 

3636 continue # Ignore it, probably deleted. 

3637 

3638 if vdi_uuid.startswith('DELETED_'): 

3639 # Assume it's really a RAW volume of a failed snap without COW header/footer. 

3640 # We must remove this VDI now without adding it in the VDI list. 

3641 # Otherwise `Relinking` calls and other actions can be launched on it. 

3642 # We don't want that... 

3643 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3644 

3645 self.lock() 

3646 try: 

3647 self._linstor.destroy_volume(vdi_uuid) 

3648 try: 

3649 self.forgetVDI(vdi_uuid) 

3650 except: 

3651 pass 

3652 except Exception as e: 

3653 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3654 finally: 

3655 self.unlock() 

3656 continue 

3657 

3658 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3659 volume_name = self._linstor.get_volume_name(vdi_uuid) 

3660 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): 

3661 # Always RAW! 

3662 info = None 

3663 elif VdiType.isCowImage(vdi_type): 

3664 info = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type).get_info(vdi_uuid) 

3665 else: 

3666 # Ensure it's not a COW image... 

3667 linstorcowutil = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type) 

3668 try: 

3669 info = linstorcowutil.get_info(vdi_uuid) 

3670 except: 

3671 try: 

3672 linstorcowutil.force_repair( 

3673 self._linstor.get_device_path(vdi_uuid) 

3674 ) 

3675 info = linstorcowutil.get_info(vdi_uuid) 

3676 except: 

3677 info = None 

3678 

3679 except Exception as e: 

3680 Util.log( 

3681 ' [VDI {}: failed to load VDI info]: {}' 

3682 .format(vdi_uuid, e) 

3683 ) 

3684 info = CowImageInfo(vdi_uuid) 

3685 info.error = 1 

3686 

3687 if info: 

3688 info.vdiType = vdi_type 

3689 

3690 all_vdi_info[vdi_uuid] = info 

3691 

3692 return all_vdi_info 

3693 

3694 @override 

3695 def _prepareCoalesceLeaf(self, vdi) -> None: 

3696 vdi._activateChain() 

3697 vdi.deflate() 

3698 vdi._inflateParentForCoalesce() 

3699 

3700 @override 

3701 def _finishCoalesceLeaf(self, parent) -> None: 

3702 if not parent.isSnapshot() or parent.isAttachedRW(): 

3703 parent.inflateFully() 

3704 else: 

3705 parent.deflate() 

3706 

3707 @override 

3708 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3709 return LinstorCowUtil( 

3710 self.xapi.session, self._linstor, parent.vdi_type 

3711 ).compute_volume_size(parent.sizeVirt) - parent.getDrbdSize() 

3712 

3713 def _hasValidDevicePath(self, uuid): 

3714 try: 

3715 self._linstor.get_device_path(uuid) 

3716 except Exception: 

3717 # TODO: Maybe log exception. 

3718 return False 

3719 return True 

3720 

3721 @override 

3722 def _liveLeafCoalesce(self, vdi) -> bool: 

3723 self.lock() 

3724 try: 

3725 self._linstor.ensure_volume_is_not_locked( 

3726 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3727 ) 

3728 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3729 finally: 

3730 self.unlock() 

3731 

3732 @override 

3733 def _handleInterruptedCoalesceLeaf(self) -> None: 

3734 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3735 for uuid, parentUuid in entries.items(): 

3736 if self._hasValidDevicePath(parentUuid) or \ 

3737 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3738 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3739 else: 

3740 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3741 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3742 vdi = self.getVDI(uuid) 

3743 if vdi: 

3744 vdi.ensureUnpaused() 

3745 

3746 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3747 Util.log('*** UNDO LEAF-COALESCE') 

3748 parent = self.getVDI(parentUuid) 

3749 if not parent: 

3750 parent = self.getVDI(childUuid) 

3751 if not parent: 

3752 raise util.SMException( 

3753 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3754 ) 

3755 Util.log( 

3756 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3757 ) 

3758 parent.rename(parentUuid) 

3759 

3760 child = self.getVDI(childUuid) 

3761 if not child: 

3762 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3763 if not child: 

3764 raise util.SMException( 

3765 'Neither {} nor {} found'.format( 

3766 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3767 ) 

3768 ) 

3769 Util.log('Renaming child back to {}'.format(childUuid)) 

3770 child.rename(childUuid) 

3771 Util.log('Updating the VDI record') 

3772 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3773 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3774 

3775 # TODO: Maybe deflate here. 

3776 

3777 if child.isHidden(): 

3778 child._setHidden(False) 

3779 if not parent.isHidden(): 

3780 parent._setHidden(True) 

3781 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3782 Util.log('*** leaf-coalesce undo successful') 

3783 

3784 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3785 Util.log('*** FINISH LEAF-COALESCE') 

3786 vdi = self.getVDI(childUuid) 

3787 if not vdi: 

3788 raise util.SMException('VDI {} not found'.format(childUuid)) 

3789 # TODO: Maybe inflate. 

3790 try: 

3791 self.forgetVDI(parentUuid) 

3792 except XenAPI.Failure: 

3793 pass 

3794 self._updateSlavesOnResize(vdi) 

3795 Util.log('*** finished leaf-coalesce successfully') 

3796 

3797 def _checkSlaves(self, vdi): 

3798 try: 

3799 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3800 for openers in all_openers.values(): 

3801 for opener in openers.values(): 

3802 if opener['process-name'] != 'tapdisk': 

3803 raise util.SMException( 

3804 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3805 ) 

3806 except LinstorVolumeManagerError as e: 

3807 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3808 raise 

3809 

3810 

3811################################################################################ 

3812# 

3813# Helpers 

3814# 

3815def daemonize(): 

3816 pid = os.fork() 

3817 if pid: 

3818 os.waitpid(pid, 0) 

3819 Util.log("New PID [%d]" % pid) 

3820 return False 

3821 os.chdir("/") 

3822 os.setsid() 

3823 pid = os.fork() 

3824 if pid: 

3825 Util.log("Will finish as PID [%d]" % pid) 

3826 os._exit(0) 

3827 for fd in [0, 1, 2]: 

3828 try: 

3829 os.close(fd) 

3830 except OSError: 

3831 pass 

3832 # we need to fill those special fd numbers or pread won't work 

3833 sys.stdin = open("/dev/null", 'r') 

3834 sys.stderr = open("/dev/null", 'w') 

3835 sys.stdout = open("/dev/null", 'w') 

3836 # As we're a new process we need to clear the lock objects 

3837 lock.Lock.clearAll() 

3838 return True 

3839 

3840 

3841def normalizeType(type): 

3842 if type in LVMSR.SUBTYPES: 

3843 type = SR.TYPE_LVHD 

3844 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3845 # temporary while LVHD is symlinked as LVM 

3846 type = SR.TYPE_LVHD 

3847 if type in [ 

3848 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3849 "moosefs", "xfs", "zfs", "largeblock" 

3850 ]: 

3851 type = SR.TYPE_FILE 

3852 if type in ["linstor"]: 

3853 type = SR.TYPE_LINSTOR 

3854 if type not in SR.TYPES: 

3855 raise util.SMException("Unsupported SR type: %s" % type) 

3856 return type 

3857 

3858GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3859 

3860 

3861def _gc_init_file(sr_uuid): 

3862 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3863 

3864 

3865def _create_init_file(sr_uuid): 

3866 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3867 with open(os.path.join(_gc_init_file(sr_uuid)), 'w+') as f: 

3868 f.write('1') 

3869 

3870 

3871def _gcLoopPause(sr, dryRun=False, immediate=False): 

3872 if immediate: 

3873 return 

3874 

3875 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3876 # point will just return. Otherwise, fall back on an abortable sleep. 

3877 

3878 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3879 

3880 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3880 ↛ exitline 3880 didn't jump to the function exit

3881 lambda *args: None) 

3882 elif os.path.exists(_gc_init_file(sr.uuid)): 

3883 def abortTest(): 

3884 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3885 

3886 # If time.sleep hangs we are in deep trouble, however for 

3887 # completeness we set the timeout of the abort thread to 

3888 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3889 Util.log("GC active, about to go quiet") 

3890 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3890 ↛ exitline 3890 didn't run the lambda on line 3890

3891 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3892 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3893 Util.log("GC active, quiet period ended") 

3894 

3895 

3896def _gcLoop(sr, dryRun=False, immediate=False): 

3897 if not lockGCActive.acquireNoblock(): 3897 ↛ 3898line 3897 didn't jump to line 3898, because the condition on line 3897 was never true

3898 Util.log("Another GC instance already active, exiting") 

3899 return 

3900 

3901 # Check we're still attached after acquiring locks 

3902 if not sr.xapi.isPluggedHere(): 

3903 Util.log("SR no longer attached, exiting") 

3904 return 

3905 

3906 # Clean up Intellicache files 

3907 sr.cleanupCache() 

3908 

3909 # Track how many we do 

3910 coalesced = 0 

3911 task_status = "success" 

3912 try: 

3913 # Check if any work needs to be done 

3914 if not sr.xapi.isPluggedHere(): 3914 ↛ 3915line 3914 didn't jump to line 3915, because the condition on line 3914 was never true

3915 Util.log("SR no longer attached, exiting") 

3916 return 

3917 sr.scanLocked() 

3918 if not sr.hasWork(): 

3919 Util.log("No work, exiting") 

3920 return 

3921 sr.xapi.create_task( 

3922 "Garbage Collection", 

3923 "Garbage collection for SR %s" % sr.uuid) 

3924 _gcLoopPause(sr, dryRun, immediate=immediate) 

3925 while True: 

3926 if SIGTERM: 

3927 Util.log("Term requested") 

3928 return 

3929 

3930 if not sr.xapi.isPluggedHere(): 3930 ↛ 3931line 3930 didn't jump to line 3931, because the condition on line 3930 was never true

3931 Util.log("SR no longer attached, exiting") 

3932 break 

3933 sr.scanLocked() 

3934 if not sr.hasWork(): 

3935 Util.log("No work, exiting") 

3936 break 

3937 

3938 if not lockGCRunning.acquireNoblock(): 3938 ↛ 3939line 3938 didn't jump to line 3939, because the condition on line 3938 was never true

3939 Util.log("Unable to acquire GC running lock.") 

3940 return 

3941 try: 

3942 if not sr.gcEnabled(): 3942 ↛ 3943line 3942 didn't jump to line 3943, because the condition on line 3942 was never true

3943 break 

3944 

3945 sr.xapi.update_task_progress("done", coalesced) 

3946 

3947 sr.cleanupCoalesceJournals() 

3948 # Create the init file here in case startup is waiting on it 

3949 _create_init_file(sr.uuid) 

3950 sr.scanLocked() 

3951 sr.updateBlockInfo() 

3952 

3953 howmany = len(sr.findGarbage()) 

3954 if howmany > 0: 

3955 Util.log("Found %d orphaned vdis" % howmany) 

3956 sr.lock() 

3957 try: 

3958 sr.garbageCollect(dryRun) 

3959 finally: 

3960 sr.unlock() 

3961 sr.xapi.srUpdate() 

3962 

3963 candidate = sr.findCoalesceable() 

3964 if candidate: 

3965 util.fistpoint.activate( 

3966 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3967 sr.coalesce(candidate, dryRun) 

3968 sr.xapi.srUpdate() 

3969 coalesced += 1 

3970 continue 

3971 

3972 candidate = sr.findLeafCoalesceable() 

3973 if candidate: 3973 ↛ 3980line 3973 didn't jump to line 3980, because the condition on line 3973 was never false

3974 sr.coalesceLeaf(candidate, dryRun) 

3975 sr.xapi.srUpdate() 

3976 coalesced += 1 

3977 continue 

3978 

3979 finally: 

3980 lockGCRunning.release() 3980 ↛ 3985line 3980 didn't jump to line 3985, because the break on line 3943 wasn't executed

3981 except: 

3982 task_status = "failure" 

3983 raise 

3984 finally: 

3985 sr.xapi.set_task_status(task_status) 

3986 Util.log("GC process exiting, no work left") 

3987 _create_init_file(sr.uuid) 

3988 lockGCActive.release() 

3989 

3990 

3991def _gc(session, srUuid, dryRun=False, immediate=False): 

3992 init(srUuid) 

3993 sr = SR.getInstance(srUuid, session) 

3994 if not sr.gcEnabled(False): 3994 ↛ 3995line 3994 didn't jump to line 3995, because the condition on line 3994 was never true

3995 return 

3996 

3997 try: 

3998 _gcLoop(sr, dryRun, immediate=immediate) 

3999 finally: 

4000 sr.check_no_space_candidates() 

4001 sr.cleanup() 

4002 sr.logFilter.logState() 

4003 del sr.xapi 

4004 

4005 

4006def _abort(srUuid, soft=False): 

4007 """Aborts an GC/coalesce. 

4008 

4009 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

4010 soft: If set to True and there is a pending abort signal, the function 

4011 doesn't do anything. If set to False, a new abort signal is issued. 

4012 

4013 returns: If soft is set to False, we return True holding lockGCActive. If 

4014 soft is set to False and an abort signal is pending, we return False 

4015 without holding lockGCActive. An exception is raised in case of error.""" 

4016 Util.log("=== SR %s: abort ===" % (srUuid)) 

4017 init(srUuid) 

4018 if not lockGCActive.acquireNoblock(): 

4019 gotLock = False 

4020 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

4021 abortFlag = IPCFlag(srUuid) 

4022 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

4023 return False 

4024 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

4025 gotLock = lockGCActive.acquireNoblock() 

4026 if gotLock: 

4027 break 

4028 time.sleep(SR.LOCK_RETRY_INTERVAL) 

4029 abortFlag.clear(FLAG_TYPE_ABORT) 

4030 if not gotLock: 

4031 raise util.CommandException(code=errno.ETIMEDOUT, 

4032 reason="SR %s: error aborting existing process" % srUuid) 

4033 return True 

4034 

4035 

4036def init(srUuid): 

4037 global lockGCRunning 

4038 if not lockGCRunning: 4038 ↛ 4039line 4038 didn't jump to line 4039, because the condition on line 4038 was never true

4039 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid) 

4040 global lockGCActive 

4041 if not lockGCActive: 4041 ↛ 4042line 4041 didn't jump to line 4042, because the condition on line 4041 was never true

4042 lockGCActive = LockActive(srUuid) 

4043 

4044 

4045class LockActive: 

4046 """ 

4047 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

4048 if another process holds the SR lock. 

4049 """ 

4050 def __init__(self, srUuid): 

4051 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

4052 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, srUuid) 

4053 

4054 def acquireNoblock(self): 

4055 self._srLock.acquire() 

4056 

4057 try: 

4058 return self._lock.acquireNoblock() 

4059 finally: 

4060 self._srLock.release() 

4061 

4062 def release(self): 

4063 self._lock.release() 

4064 

4065 

4066def usage(): 

4067 output = """Garbage collect and/or coalesce COW images in a COW-based SR 

4068 

4069Parameters: 

4070 -u --uuid UUID SR UUID 

4071 and one of: 

4072 -g --gc garbage collect, coalesce, and repeat while there is work 

4073 -G --gc_force garbage collect once, aborting any current operations 

4074 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

4075 max_age hours 

4076 -a --abort abort any currently running operation (GC or coalesce) 

4077 -q --query query the current state (GC'ing, coalescing or not running) 

4078 -x --disable disable GC/coalesce (will be in effect until you exit) 

4079 -t --debug see Debug below 

4080 

4081Options: 

4082 -b --background run in background (return immediately) (valid for -g only) 

4083 -f --force continue in the presence of COW images with errors (when doing 

4084 GC, this might cause removal of any such images) (only valid 

4085 for -G) (DANGEROUS) 

4086 

4087Debug: 

4088 The --debug parameter enables manipulation of LVHD VDIs for debugging 

4089 purposes. ** NEVER USE IT ON A LIVE VM ** 

4090 The following parameters are required: 

4091 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

4092 "deflate". 

4093 -v --vdi_uuid VDI UUID 

4094 """ 

4095 #-d --dry-run don't actually perform any SR-modifying operations 

4096 print(output) 

4097 Util.log("(Invalid usage)") 

4098 sys.exit(1) 

4099 

4100 

4101############################################################################## 

4102# 

4103# API 

4104# 

4105def abort(srUuid, soft=False): 

4106 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

4107 """ 

4108 if _abort(srUuid, soft): 

4109 Util.log("abort: releasing the process lock") 

4110 lockGCActive.release() 

4111 return True 

4112 else: 

4113 return False 

4114 

4115 

4116def gc(session, srUuid, inBackground, dryRun=False): 

4117 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

4118 immediately if inBackground=True. 

4119 

4120 The following algorithm is used: 

4121 1. If we are already GC'ing in this SR, return 

4122 2. If we are already coalescing a VDI pair: 

4123 a. Scan the SR and determine if the VDI pair is GC'able 

4124 b. If the pair is not GC'able, return 

4125 c. If the pair is GC'able, abort coalesce 

4126 3. Scan the SR 

4127 4. If there is nothing to collect, nor to coalesce, return 

4128 5. If there is something to collect, GC all, then goto 3 

4129 6. If there is something to coalesce, coalesce one pair, then goto 3 

4130 """ 

4131 Util.log("=== SR %s: gc ===" % srUuid) 

4132 

4133 signal.signal(signal.SIGTERM, receiveSignal) 

4134 

4135 if inBackground: 

4136 if daemonize(): 4136 ↛ exitline 4136 didn't return from function 'gc', because the condition on line 4136 was never false

4137 # we are now running in the background. Catch & log any errors 

4138 # because there is no other way to propagate them back at this 

4139 # point 

4140 

4141 try: 

4142 _gc(None, srUuid, dryRun) 

4143 except AbortException: 

4144 Util.log("Aborted") 

4145 except Exception: 

4146 Util.logException("gc") 

4147 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

4148 os._exit(0) 

4149 else: 

4150 _gc(session, srUuid, dryRun, immediate=True) 

4151 

4152 

4153def start_gc(session, sr_uuid): 

4154 """ 

4155 This function is used to try to start a backgrounded GC session by forking 

4156 the current process. If using the systemd version, call start_gc_service() instead. 

4157 """ 

4158 # don't bother if an instance already running (this is just an 

4159 # optimization to reduce the overhead of forking a new process if we 

4160 # don't have to, but the process will check the lock anyways) 

4161 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid) 

4162 if not lockRunning.acquireNoblock(): 

4163 if should_preempt(session, sr_uuid): 

4164 util.SMlog("Aborting currently-running coalesce of garbage VDI") 

4165 try: 

4166 if not abort(sr_uuid, soft=True): 

4167 util.SMlog("The GC has already been scheduled to re-start") 

4168 except util.CommandException as e: 

4169 if e.code != errno.ETIMEDOUT: 

4170 raise 

4171 util.SMlog('failed to abort the GC') 

4172 else: 

4173 util.SMlog("A GC instance already running, not kicking") 

4174 return 

4175 else: 

4176 lockRunning.release() 

4177 

4178 util.SMlog(f"Starting GC file is {__file__}") 

4179 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'], 

4180 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4181 

4182def start_gc_service(sr_uuid, wait=False): 

4183 """ 

4184 This starts the templated systemd service which runs GC on the given SR UUID. 

4185 If the service was already started, this is a no-op. 

4186 

4187 Because the service is a one-shot with RemainAfterExit=no, when called with 

4188 wait=True this will run the service synchronously and will not return until the 

4189 run has finished. This is used to force a run of the GC instead of just kicking it 

4190 in the background. 

4191 """ 

4192 sr_uuid_esc = sr_uuid.replace("-", "\\x2d") 

4193 util.SMlog(f"Kicking SMGC@{sr_uuid}...") 

4194 cmd=[ "/usr/bin/systemctl", "--quiet" ] 

4195 if not wait: 4195 ↛ 4197line 4195 didn't jump to line 4197, because the condition on line 4195 was never false

4196 cmd.append("--no-block") 

4197 cmd += ["start", f"SMGC@{sr_uuid_esc}"] 

4198 subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4199 

4200 

4201def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

4202 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

4203 the SR lock is held. 

4204 The following algorithm is used: 

4205 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

4206 2. Scan the SR 

4207 3. GC 

4208 4. return 

4209 """ 

4210 Util.log("=== SR %s: gc_force ===" % srUuid) 

4211 init(srUuid) 

4212 sr = SR.getInstance(srUuid, session, lockSR, True) 

4213 if not lockGCActive.acquireNoblock(): 

4214 abort(srUuid) 

4215 else: 

4216 Util.log("Nothing was running, clear to proceed") 

4217 

4218 if force: 

4219 Util.log("FORCED: will continue even if there are COW image errors") 

4220 sr.scanLocked(force) 

4221 sr.cleanupCoalesceJournals() 

4222 

4223 try: 

4224 sr.cleanupCache() 

4225 sr.garbageCollect(dryRun) 

4226 finally: 

4227 sr.cleanup() 

4228 sr.logFilter.logState() 

4229 lockGCActive.release() 

4230 

4231 

4232def get_state(srUuid): 

4233 """Return whether GC/coalesce is currently running or not. This asks systemd for 

4234 the state of the templated SMGC service and will return True if it is "activating" 

4235 or "running" (for completeness, as in practice it will never achieve the latter state) 

4236 """ 

4237 sr_uuid_esc = srUuid.replace("-", "\\x2d") 

4238 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"] 

4239 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4240 state = result.stdout.decode('utf-8').rstrip() 

4241 if state == "activating" or state == "running": 

4242 return True 

4243 return False 

4244 

4245 

4246def should_preempt(session, srUuid): 

4247 sr = SR.getInstance(srUuid, session) 

4248 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

4249 if len(entries) == 0: 

4250 return False 

4251 elif len(entries) > 1: 

4252 raise util.SMException("More than one coalesce entry: " + str(entries)) 

4253 sr.scanLocked() 

4254 coalescedUuid = entries.popitem()[0] 

4255 garbage = sr.findGarbage() 

4256 for vdi in garbage: 

4257 if vdi.uuid == coalescedUuid: 

4258 return True 

4259 return False 

4260 

4261 

4262def get_coalesceable_leaves(session, srUuid, vdiUuids): 

4263 coalesceable = [] 

4264 sr = SR.getInstance(srUuid, session) 

4265 sr.scanLocked() 

4266 for uuid in vdiUuids: 

4267 vdi = sr.getVDI(uuid) 

4268 if not vdi: 

4269 raise util.SMException("VDI %s not found" % uuid) 

4270 if vdi.isLeafCoalesceable(): 

4271 coalesceable.append(uuid) 

4272 return coalesceable 

4273 

4274 

4275def cache_cleanup(session, srUuid, maxAge): 

4276 sr = SR.getInstance(srUuid, session) 

4277 return sr.cleanupCache(maxAge) 

4278 

4279 

4280def debug(sr_uuid, cmd, vdi_uuid): 

4281 Util.log("Debug command: %s" % cmd) 

4282 sr = SR.getInstance(sr_uuid, None) 

4283 if not isinstance(sr, LVMSR): 

4284 print("Error: not an LVHD SR") 

4285 return 

4286 sr.scanLocked() 

4287 vdi = sr.getVDI(vdi_uuid) 

4288 if not vdi: 

4289 print("Error: VDI %s not found") 

4290 return 

4291 print("Running %s on SR %s" % (cmd, sr)) 

4292 print("VDI before: %s" % vdi) 

4293 if cmd == "activate": 

4294 vdi._activate() 

4295 print("VDI file: %s" % vdi.path) 

4296 if cmd == "deactivate": 

4297 ns = NS_PREFIX_LVM + sr.uuid 

4298 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

4299 if cmd == "inflate": 

4300 vdi.inflateFully() 

4301 sr.cleanup() 

4302 if cmd == "deflate": 

4303 vdi.deflate() 

4304 sr.cleanup() 

4305 sr.scanLocked() 

4306 print("VDI after: %s" % vdi) 

4307 

4308 

4309def abort_optional_reenable(uuid): 

4310 print("Disabling GC/coalesce for %s" % uuid) 

4311 ret = _abort(uuid) 

4312 input("Press enter to re-enable...") 

4313 print("GC/coalesce re-enabled") 

4314 lockGCRunning.release() 

4315 if ret: 

4316 lockGCActive.release() 

4317 

4318 

4319############################################################################## 

4320# 

4321# CLI 

4322# 

4323def main(): 

4324 action = "" 

4325 maxAge = 0 

4326 uuid = "" 

4327 background = False 

4328 force = False 

4329 dryRun = False 

4330 debug_cmd = "" 

4331 vdi_uuid = "" 

4332 shortArgs = "gGc:aqxu:bfdt:v:" 

4333 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

4334 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

4335 

4336 try: 

4337 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

4338 except getopt.GetoptError: 

4339 usage() 

4340 for o, a in opts: 

4341 if o in ("-g", "--gc"): 

4342 action = "gc" 

4343 if o in ("-G", "--gc_force"): 

4344 action = "gc_force" 

4345 if o in ("-c", "--clean_cache"): 

4346 action = "clean_cache" 

4347 maxAge = int(a) 

4348 if o in ("-a", "--abort"): 

4349 action = "abort" 

4350 if o in ("-q", "--query"): 

4351 action = "query" 

4352 if o in ("-x", "--disable"): 

4353 action = "disable" 

4354 if o in ("-u", "--uuid"): 

4355 uuid = a 

4356 if o in ("-b", "--background"): 

4357 background = True 

4358 if o in ("-f", "--force"): 

4359 force = True 

4360 if o in ("-d", "--dry-run"): 

4361 Util.log("Dry run mode") 

4362 dryRun = True 

4363 if o in ("-t", "--debug"): 

4364 action = "debug" 

4365 debug_cmd = a 

4366 if o in ("-v", "--vdi_uuid"): 

4367 vdi_uuid = a 

4368 

4369 if not action or not uuid: 

4370 usage() 

4371 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

4372 action != "debug" and (debug_cmd or vdi_uuid): 

4373 usage() 

4374 

4375 if action != "query" and action != "debug": 

4376 print("All output goes to log") 

4377 

4378 if action == "gc": 

4379 gc(None, uuid, background, dryRun) 

4380 elif action == "gc_force": 

4381 gc_force(None, uuid, force, dryRun, True) 

4382 elif action == "clean_cache": 

4383 cache_cleanup(None, uuid, maxAge) 

4384 elif action == "abort": 

4385 abort(uuid) 

4386 elif action == "query": 

4387 print("Currently running: %s" % get_state(uuid)) 

4388 elif action == "disable": 

4389 abort_optional_reenable(uuid) 

4390 elif action == "debug": 

4391 debug(uuid, debug_cmd, vdi_uuid) 

4392 

4393 

4394if __name__ == '__main__': 4394 ↛ 4395line 4394 didn't jump to line 4395, because the condition on line 4394 was never true

4395 main()