Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect COW-based SR's in the background 

19# 

20 

21from sm_typing import Any, Optional, List, override 

22 

23import os 

24import os.path 

25import sys 

26import time 

27import signal 

28import subprocess 

29import getopt 

30import datetime 

31import traceback 

32import base64 

33import zlib 

34import errno 

35import stat 

36 

37import XenAPI # pylint: disable=import-error 

38import util 

39import lvutil 

40import lvmcache 

41import journaler 

42import fjournaler 

43import lock 

44import blktap2 

45import xs_errors 

46from refcounter import RefCounter 

47from ipc import IPCFlag 

48from lvmanager import LVActivator 

49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

50from functools import reduce 

51from time import monotonic as _time 

52 

53from constants import NS_PREFIX_LVM, VG_LOCATION, VG_PREFIX 

54from cowutil import CowImageInfo, CowUtil, getCowUtil 

55from lvmcowutil import LV_PREFIX, LvmCowUtil 

56from vditype import VdiType, VdiTypeExtension, VDI_COW_TYPES, VDI_TYPE_TO_EXTENSION 

57 

58try: 

59 from linstorcowutil import LinstorCowUtil 

60 from linstorjournaler import LinstorJournaler 

61 from linstorvolumemanager import get_controller_uri 

62 from linstorvolumemanager import LinstorVolumeManager 

63 from linstorvolumemanager import LinstorVolumeManagerError 

64 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

65 

66 LINSTOR_AVAILABLE = True 

67except ImportError: 

68 LINSTOR_AVAILABLE = False 

69 

70# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

71# possible due to lvhd_stop_using_() not working correctly. However, we leave 

72# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

73# record for use by the offline tool (which makes the operation safe by pausing 

74# the VM first) 

75AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

76 

77FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

78 

79# process "lock", used simply as an indicator that a process already exists 

80# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

81# check for the fact by trying the lock). 

82lockGCRunning = None 

83 

84# process "lock" to indicate that the GC process has been activated but may not 

85# yet be running, stops a second process from being started. 

86LOCK_TYPE_GC_ACTIVE = "gc_active" 

87lockGCActive = None 

88 

89# Default coalesce error rate limit, in messages per minute. A zero value 

90# disables throttling, and a negative value disables error reporting. 

91DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

92 

93COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

94COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

95VAR_RUN = "/var/run/" 

96SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

97 

98N_RUNNING_AVERAGE = 10 

99 

100NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

101 

102# Signal Handler 

103SIGTERM = False 

104 

105 

106class AbortException(util.SMException): 

107 pass 

108 

109 

110def receiveSignal(signalNumber, frame): 

111 global SIGTERM 

112 

113 util.SMlog("GC: recieved SIGTERM") 

114 SIGTERM = True 

115 return 

116 

117 

118################################################################################ 

119# 

120# Util 

121# 

122class Util: 

123 RET_RC = 1 

124 RET_STDOUT = 2 

125 RET_STDERR = 4 

126 

127 UUID_LEN = 36 

128 

129 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

130 

131 @staticmethod 

132 def log(text) -> None: 

133 util.SMlog(text, ident="SMGC") 

134 

135 @staticmethod 

136 def logException(tag): 

137 info = sys.exc_info() 

138 if info[0] == SystemExit: 138 ↛ 140line 138 didn't jump to line 140, because the condition on line 138 was never true

139 # this should not be happening when catching "Exception", but it is 

140 sys.exit(0) 

141 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

142 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

143 Util.log(" ***********************") 

144 Util.log(" * E X C E P T I O N *") 

145 Util.log(" ***********************") 

146 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

147 Util.log(tb) 

148 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

149 

150 @staticmethod 

151 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

152 "Execute a subprocess, then return its return code, stdout, stderr" 

153 proc = subprocess.Popen(args, 

154 stdin=subprocess.PIPE, \ 

155 stdout=subprocess.PIPE, \ 

156 stderr=subprocess.PIPE, \ 

157 shell=True, \ 

158 close_fds=True) 

159 (stdout, stderr) = proc.communicate(inputtext) 

160 stdout = str(stdout) 

161 stderr = str(stderr) 

162 rc = proc.returncode 

163 if log: 

164 Util.log("`%s`: %s" % (args, rc)) 

165 if type(expectedRC) != type([]): 

166 expectedRC = [expectedRC] 

167 if not rc in expectedRC: 

168 reason = stderr.strip() 

169 if stdout.strip(): 

170 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

171 Util.log("Failed: %s" % reason) 

172 raise util.CommandException(rc, args, reason) 

173 

174 if ret == Util.RET_RC: 

175 return rc 

176 if ret == Util.RET_STDERR: 

177 return stderr 

178 return stdout 

179 

180 @staticmethod 

181 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): 

182 """execute func in a separate thread and kill it if abortTest signals 

183 so""" 

184 abortSignaled = abortTest() # check now before we clear resultFlag 

185 resultFlag = IPCFlag(ns) 

186 resultFlag.clearAll() 

187 pid = os.fork() 

188 if pid: 

189 startTime = _time() 

190 try: 

191 while True: 

192 if resultFlag.test("success"): 

193 Util.log(" Child process completed successfully") 

194 resultFlag.clear("success") 

195 return 

196 if resultFlag.test("failure"): 

197 resultFlag.clear("failure") 

198 raise util.SMException("Child process exited with error") 

199 if abortTest() or abortSignaled or SIGTERM: 

200 os.killpg(pid, signal.SIGKILL) 

201 raise AbortException("Aborting due to signal") 

202 if timeOut and _time() - startTime > timeOut: 

203 os.killpg(pid, signal.SIGKILL) 

204 resultFlag.clearAll() 

205 raise util.SMException("Timed out") 

206 time.sleep(pollInterval) 

207 finally: 

208 wait_pid = 0 

209 rc = -1 

210 count = 0 

211 while wait_pid == 0 and count < 10: 

212 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

213 if wait_pid == 0: 

214 time.sleep(2) 

215 count += 1 

216 

217 if wait_pid == 0: 

218 Util.log("runAbortable: wait for process completion timed out") 

219 else: 

220 os.setpgrp() 

221 try: 

222 if func() == ret: 

223 resultFlag.set("success") 

224 else: 

225 resultFlag.set("failure") 

226 except Exception as e: 

227 Util.log("Child process failed with : (%s)" % e) 

228 resultFlag.set("failure") 

229 Util.logException("This exception has occured") 

230 os._exit(0) 

231 

232 @staticmethod 

233 def num2str(number): 

234 for prefix in ("G", "M", "K"): 234 ↛ 237line 234 didn't jump to line 237, because the loop on line 234 didn't complete

235 if number >= Util.PREFIX[prefix]: 

236 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

237 return "%s" % number 

238 

239 @staticmethod 

240 def numBits(val): 

241 count = 0 

242 while val: 

243 count += val & 1 

244 val = val >> 1 

245 return count 

246 

247 @staticmethod 

248 def countBits(bitmap1, bitmap2): 

249 """return bit count in the bitmap produced by ORing the two bitmaps""" 

250 len1 = len(bitmap1) 

251 len2 = len(bitmap2) 

252 lenLong = len1 

253 lenShort = len2 

254 bitmapLong = bitmap1 

255 if len2 > len1: 

256 lenLong = len2 

257 lenShort = len1 

258 bitmapLong = bitmap2 

259 

260 count = 0 

261 for i in range(lenShort): 

262 val = bitmap1[i] | bitmap2[i] 

263 count += Util.numBits(val) 

264 

265 for i in range(i + 1, lenLong): 

266 val = bitmapLong[i] 

267 count += Util.numBits(val) 

268 return count 

269 

270 @staticmethod 

271 def getThisScript(): 

272 thisScript = util.get_real_path(__file__) 

273 if thisScript.endswith(".pyc"): 

274 thisScript = thisScript[:-1] 

275 return thisScript 

276 

277 

278################################################################################ 

279# 

280# XAPI 

281# 

282class XAPI: 

283 USER = "root" 

284 PLUGIN_ON_SLAVE = "on-slave" 

285 

286 CONFIG_SM = 0 

287 CONFIG_OTHER = 1 

288 CONFIG_ON_BOOT = 2 

289 CONFIG_ALLOW_CACHING = 3 

290 

291 CONFIG_NAME = { 

292 CONFIG_SM: "sm-config", 

293 CONFIG_OTHER: "other-config", 

294 CONFIG_ON_BOOT: "on-boot", 

295 CONFIG_ALLOW_CACHING: "allow_caching" 

296 } 

297 

298 class LookupError(util.SMException): 

299 pass 

300 

301 @staticmethod 

302 def getSession(): 

303 session = XenAPI.xapi_local() 

304 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

305 return session 

306 

307 def __init__(self, session, srUuid): 

308 self.sessionPrivate = False 

309 self.session = session 

310 if self.session is None: 

311 self.session = self.getSession() 

312 self.sessionPrivate = True 

313 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

314 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

315 self.hostUuid = util.get_this_host() 

316 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

317 self.task = None 

318 self.task_progress = {"coalescable": 0, "done": 0} 

319 

320 def __del__(self): 

321 if self.sessionPrivate: 

322 self.session.xenapi.session.logout() 

323 

324 @property 

325 def srRef(self): 

326 return self._srRef 

327 

328 def isPluggedHere(self): 

329 pbds = self.getAttachedPBDs() 

330 for pbdRec in pbds: 

331 if pbdRec["host"] == self._hostRef: 

332 return True 

333 return False 

334 

335 def poolOK(self): 

336 host_recs = self.session.xenapi.host.get_all_records() 

337 for host_ref, host_rec in host_recs.items(): 

338 if not host_rec["enabled"]: 

339 Util.log("Host %s not enabled" % host_rec["uuid"]) 

340 return False 

341 return True 

342 

343 def isMaster(self): 

344 if self.srRecord["shared"]: 

345 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

346 return pool["master"] == self._hostRef 

347 else: 

348 pbds = self.getAttachedPBDs() 

349 if len(pbds) < 1: 

350 raise util.SMException("Local SR not attached") 

351 elif len(pbds) > 1: 

352 raise util.SMException("Local SR multiply attached") 

353 return pbds[0]["host"] == self._hostRef 

354 

355 def getAttachedPBDs(self): 

356 """Return PBD records for all PBDs of this SR that are currently 

357 attached""" 

358 attachedPBDs = [] 

359 pbds = self.session.xenapi.PBD.get_all_records() 

360 for pbdRec in pbds.values(): 

361 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

362 attachedPBDs.append(pbdRec) 

363 return attachedPBDs 

364 

365 def getOnlineHosts(self): 

366 return util.get_online_hosts(self.session) 

367 

368 def ensureInactive(self, hostRef, args): 

369 text = self.session.xenapi.host.call_plugin( \ 

370 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

371 Util.log("call-plugin returned: '%s'" % text) 

372 

373 def getRecordHost(self, hostRef): 

374 return self.session.xenapi.host.get_record(hostRef) 

375 

376 def _getRefVDI(self, uuid): 

377 return self.session.xenapi.VDI.get_by_uuid(uuid) 

378 

379 def getRefVDI(self, vdi): 

380 return self._getRefVDI(vdi.uuid) 

381 

382 def getRecordVDI(self, uuid): 

383 try: 

384 ref = self._getRefVDI(uuid) 

385 return self.session.xenapi.VDI.get_record(ref) 

386 except XenAPI.Failure: 

387 return None 

388 

389 def singleSnapshotVDI(self, vdi): 

390 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

391 {"type": "internal"}) 

392 

393 def forgetVDI(self, srUuid, vdiUuid): 

394 """Forget the VDI, but handle the case where the VDI has already been 

395 forgotten (i.e. ignore errors)""" 

396 try: 

397 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

398 self.session.xenapi.VDI.forget(vdiRef) 

399 except XenAPI.Failure: 

400 pass 

401 

402 def getConfigVDI(self, vdi, key): 

403 kind = vdi.CONFIG_TYPE[key] 

404 if kind == self.CONFIG_SM: 

405 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

406 elif kind == self.CONFIG_OTHER: 

407 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

408 elif kind == self.CONFIG_ON_BOOT: 

409 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

410 elif kind == self.CONFIG_ALLOW_CACHING: 

411 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

412 else: 

413 assert(False) 

414 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

415 return cfg 

416 

417 def removeFromConfigVDI(self, vdi, key): 

418 kind = vdi.CONFIG_TYPE[key] 

419 if kind == self.CONFIG_SM: 

420 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

421 elif kind == self.CONFIG_OTHER: 

422 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

423 else: 

424 assert(False) 

425 

426 def addToConfigVDI(self, vdi, key, val): 

427 kind = vdi.CONFIG_TYPE[key] 

428 if kind == self.CONFIG_SM: 

429 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

430 elif kind == self.CONFIG_OTHER: 

431 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

432 else: 

433 assert(False) 

434 

435 def isSnapshot(self, vdi): 

436 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

437 

438 def markCacheSRsDirty(self): 

439 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

440 'field "local_cache_enabled" = "true"') 

441 for sr_ref in sr_refs: 

442 Util.log("Marking SR %s dirty" % sr_ref) 

443 util.set_dirty(self.session, sr_ref) 

444 

445 def srUpdate(self): 

446 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

447 abortFlag = IPCFlag(self.srRecord["uuid"]) 

448 task = self.session.xenapi.Async.SR.update(self._srRef) 

449 cancelTask = True 

450 try: 

451 for i in range(60): 

452 status = self.session.xenapi.task.get_status(task) 

453 if not status == "pending": 

454 Util.log("SR.update_asynch status changed to [%s]" % status) 

455 cancelTask = False 

456 return 

457 if abortFlag.test(FLAG_TYPE_ABORT): 

458 Util.log("Abort signalled during srUpdate, cancelling task...") 

459 try: 

460 self.session.xenapi.task.cancel(task) 

461 cancelTask = False 

462 Util.log("Task cancelled") 

463 except: 

464 pass 

465 return 

466 time.sleep(1) 

467 finally: 

468 if cancelTask: 

469 self.session.xenapi.task.cancel(task) 

470 self.session.xenapi.task.destroy(task) 

471 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

472 

473 def update_task(self): 

474 self.session.xenapi.task.set_other_config( 

475 self.task, 

476 { 

477 "applies_to": self._srRef 

478 }) 

479 total = self.task_progress['coalescable'] + self.task_progress['done'] 

480 if (total > 0): 

481 self.session.xenapi.task.set_progress( 

482 self.task, float(self.task_progress['done']) / total) 

483 

484 def create_task(self, label, description): 

485 self.task = self.session.xenapi.task.create(label, description) 

486 self.update_task() 

487 

488 def update_task_progress(self, key, value): 

489 self.task_progress[key] = value 

490 if self.task: 

491 self.update_task() 

492 

493 def set_task_status(self, status): 

494 if self.task: 

495 self.session.xenapi.task.set_status(self.task, status) 

496 

497 

498################################################################################ 

499# 

500# VDI 

501# 

502class VDI(object): 

503 """Object representing a VDI of a COW-based SR""" 

504 

505 POLL_INTERVAL = 1 

506 POLL_TIMEOUT = 30 

507 DEVICE_MAJOR = 202 

508 

509 # config keys & values 

510 DB_VDI_PARENT = "vhd-parent" 

511 DB_VDI_TYPE = "vdi_type" 

512 DB_VDI_BLOCKS = "vhd-blocks" 

513 DB_VDI_PAUSED = "paused" 

514 DB_VDI_RELINKING = "relinking" 

515 DB_VDI_ACTIVATING = "activating" 

516 DB_GC = "gc" 

517 DB_COALESCE = "coalesce" 

518 DB_LEAFCLSC = "leaf-coalesce" # config key 

519 DB_GC_NO_SPACE = "gc_no_space" 

520 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

521 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

522 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

523 # no space to snap-coalesce or unable to keep 

524 # up with VDI. This is not used by the SM, it 

525 # might be used by external components. 

526 DB_ONBOOT = "on-boot" 

527 ONBOOT_RESET = "reset" 

528 DB_ALLOW_CACHING = "allow_caching" 

529 

530 CONFIG_TYPE = { 

531 DB_VDI_PARENT: XAPI.CONFIG_SM, 

532 DB_VDI_TYPE: XAPI.CONFIG_SM, 

533 DB_VDI_BLOCKS: XAPI.CONFIG_SM, 

534 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

535 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

536 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

537 DB_GC: XAPI.CONFIG_OTHER, 

538 DB_COALESCE: XAPI.CONFIG_OTHER, 

539 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

540 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

541 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

542 DB_GC_NO_SPACE: XAPI.CONFIG_SM 

543 } 

544 

545 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

546 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

547 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

548 # feasibility of leaf coalesce 

549 

550 JRN_RELINK = "relink" # journal entry type for relinking children 

551 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

552 JRN_LEAF = "leaf" # used in coalesce-leaf 

553 

554 STR_TREE_INDENT = 4 

555 

556 def __init__(self, sr, uuid, vdi_type): 

557 self.sr = sr 

558 self.scanError = True 

559 self.uuid = uuid 

560 self.vdi_type = vdi_type 

561 self.fileName = "" 

562 self.parentUuid = "" 

563 self.sizeVirt = -1 

564 self._sizePhys = -1 

565 self._sizeAllocated = -1 

566 self.hidden = False 

567 self.parent = None 

568 self.children = [] 

569 self._vdiRef = None 

570 self.cowutil = getCowUtil(vdi_type) 

571 self._clearRef() 

572 

573 @staticmethod 

574 def extractUuid(path): 

575 raise NotImplementedError("Implement in sub class") 

576 

577 def load(self, info=None) -> None: 

578 """Load VDI info""" 

579 pass 

580 

581 def getDriverName(self) -> str: 

582 return self.vdi_type 

583 

584 def getRef(self): 

585 if self._vdiRef is None: 

586 self._vdiRef = self.sr.xapi.getRefVDI(self) 

587 return self._vdiRef 

588 

589 def getConfig(self, key, default=None): 

590 config = self.sr.xapi.getConfigVDI(self, key) 

591 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 591 ↛ 592line 591 didn't jump to line 592, because the condition on line 591 was never true

592 val = config 

593 else: 

594 val = config.get(key) 

595 if val: 

596 return val 

597 return default 

598 

599 def setConfig(self, key, val): 

600 self.sr.xapi.removeFromConfigVDI(self, key) 

601 self.sr.xapi.addToConfigVDI(self, key, val) 

602 Util.log("Set %s = %s for %s" % (key, val, self)) 

603 

604 def delConfig(self, key): 

605 self.sr.xapi.removeFromConfigVDI(self, key) 

606 Util.log("Removed %s from %s" % (key, self)) 

607 

608 def ensureUnpaused(self): 

609 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

610 Util.log("Unpausing VDI %s" % self) 

611 self.unpause() 

612 

613 def pause(self, failfast=False) -> None: 

614 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

615 self.uuid, failfast): 

616 raise util.SMException("Failed to pause VDI %s" % self) 

617 

618 def _report_tapdisk_unpause_error(self): 

619 try: 

620 xapi = self.sr.xapi.session.xenapi 

621 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

622 msg_name = "failed to unpause tapdisk" 

623 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

624 "VMs using this tapdisk have lost access " \ 

625 "to the corresponding disk(s)" % self.uuid 

626 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

627 except Exception as e: 

628 util.SMlog("failed to generate message: %s" % e) 

629 

630 def unpause(self): 

631 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

632 self.uuid): 

633 self._report_tapdisk_unpause_error() 

634 raise util.SMException("Failed to unpause VDI %s" % self) 

635 

636 def refresh(self, ignoreNonexistent=True): 

637 """Pause-unpause in one step""" 

638 self.sr.lock() 

639 try: 

640 try: 

641 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 641 ↛ 643line 641 didn't jump to line 643, because the condition on line 641 was never true

642 self.sr.uuid, self.uuid): 

643 self._report_tapdisk_unpause_error() 

644 raise util.SMException("Failed to refresh %s" % self) 

645 except XenAPI.Failure as e: 

646 if util.isInvalidVDI(e) and ignoreNonexistent: 

647 Util.log("VDI %s not found, ignoring" % self) 

648 return 

649 raise 

650 finally: 

651 self.sr.unlock() 

652 

653 def isSnapshot(self): 

654 return self.sr.xapi.isSnapshot(self) 

655 

656 def isAttachedRW(self): 

657 return util.is_attached_rw( 

658 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

659 

660 def getVDIBlocks(self): 

661 val = self.updateBlockInfo() 

662 bitmap = zlib.decompress(base64.b64decode(val)) 

663 return bitmap 

664 

665 def isCoalesceable(self): 

666 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

667 return not self.scanError and \ 

668 self.parent and \ 

669 len(self.parent.children) == 1 and \ 

670 self.hidden and \ 

671 len(self.children) > 0 

672 

673 def isLeafCoalesceable(self): 

674 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

675 return not self.scanError and \ 

676 self.parent and \ 

677 len(self.parent.children) == 1 and \ 

678 not self.hidden and \ 

679 len(self.children) == 0 

680 

681 def canLiveCoalesce(self, speed): 

682 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

683 isLeafCoalesceable() already""" 

684 feasibleSize = False 

685 allowedDownTime = \ 

686 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

687 allocated_size = self.getAllocatedSize() 

688 if speed: 

689 feasibleSize = \ 

690 allocated_size // speed < allowedDownTime 

691 else: 

692 feasibleSize = \ 

693 allocated_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

694 

695 return (feasibleSize or 

696 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

697 

698 def getAllPrunable(self): 

699 if len(self.children) == 0: # base case 

700 # it is possible to have a hidden leaf that was recently coalesced 

701 # onto its parent, its children already relinked but not yet 

702 # reloaded - in which case it may not be garbage collected yet: 

703 # some tapdisks could still be using the file. 

704 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

705 return [] 

706 if not self.scanError and self.hidden: 

707 return [self] 

708 return [] 

709 

710 thisPrunable = True 

711 vdiList = [] 

712 for child in self.children: 

713 childList = child.getAllPrunable() 

714 vdiList.extend(childList) 

715 if child not in childList: 

716 thisPrunable = False 

717 

718 # We can destroy the current VDI if all childs are hidden BUT the 

719 # current VDI must be hidden too to do that! 

720 # Example in this case (after a failed live leaf coalesce): 

721 # 

722 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

723 # SMGC: [32436] b5458d61(1.000G/4.127M) 

724 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

725 # 

726 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

727 # Normally we are not in this function when the delete action is 

728 # executed but in `_liveLeafCoalesce`. 

729 

730 if not self.scanError and not self.hidden and thisPrunable: 

731 vdiList.append(self) 

732 return vdiList 

733 

734 def getSizePhys(self) -> int: 

735 return self._sizePhys 

736 

737 def getAllocatedSize(self) -> int: 

738 return self._sizeAllocated 

739 

740 def getTreeRoot(self): 

741 "Get the root of the tree that self belongs to" 

742 root = self 

743 while root.parent: 

744 root = root.parent 

745 return root 

746 

747 def getTreeHeight(self): 

748 "Get the height of the subtree rooted at self" 

749 if len(self.children) == 0: 

750 return 1 

751 

752 maxChildHeight = 0 

753 for child in self.children: 

754 childHeight = child.getTreeHeight() 

755 if childHeight > maxChildHeight: 

756 maxChildHeight = childHeight 

757 

758 return maxChildHeight + 1 

759 

760 def getAllLeaves(self) -> List["VDI"]: 

761 "Get all leaf nodes in the subtree rooted at self" 

762 if len(self.children) == 0: 

763 return [self] 

764 

765 leaves = [] 

766 for child in self.children: 

767 leaves.extend(child.getAllLeaves()) 

768 return leaves 

769 

770 def updateBlockInfo(self) -> Optional[str]: 

771 val = base64.b64encode(self._queryCowBlocks()).decode() 

772 self.setConfig(VDI.DB_VDI_BLOCKS, val) 

773 return val 

774 

775 def rename(self, uuid) -> None: 

776 "Rename the VDI file" 

777 assert(not self.sr.vdis.get(uuid)) 

778 self._clearRef() 

779 oldUuid = self.uuid 

780 self.uuid = uuid 

781 self.children = [] 

782 # updating the children themselves is the responsibility of the caller 

783 del self.sr.vdis[oldUuid] 

784 self.sr.vdis[self.uuid] = self 

785 

786 def delete(self) -> None: 

787 "Physically delete the VDI" 

788 lock.Lock.cleanup(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

789 lock.Lock.cleanupAll(self.uuid) 

790 self._clear() 

791 

792 def getParent(self) -> str: 

793 return self.cowutil.getParent(self.path, lambda x: x.strip()) 793 ↛ exitline 793 didn't run the lambda on line 793

794 

795 def repair(self, parent) -> None: 

796 self.cowutil.repair(parent) 

797 

798 @override 

799 def __str__(self) -> str: 

800 strHidden = "" 

801 if self.hidden: 801 ↛ 802line 801 didn't jump to line 802, because the condition on line 801 was never true

802 strHidden = "*" 

803 strSizeVirt = "?" 

804 if self.sizeVirt > 0: 804 ↛ 805line 804 didn't jump to line 805, because the condition on line 804 was never true

805 strSizeVirt = Util.num2str(self.sizeVirt) 

806 strSizePhys = "?" 

807 if self._sizePhys > 0: 807 ↛ 808line 807 didn't jump to line 808, because the condition on line 807 was never true

808 strSizePhys = "/%s" % Util.num2str(self._sizePhys) 

809 strSizeAllocated = "?" 

810 if self._sizeAllocated >= 0: 

811 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

812 strType = "[{}]".format(self.vdi_type) 

813 

814 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

815 strSizePhys, strSizeAllocated, strType) 

816 

817 def validate(self, fast=False) -> None: 

818 if self.cowutil.check(self.path, fast=fast) != CowUtil.CheckResult.Success: 818 ↛ 819line 818 didn't jump to line 819, because the condition on line 818 was never true

819 raise util.SMException("COW image %s corrupted" % self) 

820 

821 def _clear(self): 

822 self.uuid = "" 

823 self.path = "" 

824 self.parentUuid = "" 

825 self.parent = None 

826 self._clearRef() 

827 

828 def _clearRef(self): 

829 self._vdiRef = None 

830 

831 def _call_plug_cancel(self, hostRef): 

832 args = {"path": self.path, "vdi_type": self.vdi_type} 

833 self.sr.xapi.session.xenapi.host.call_plugin( \ 

834 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_cancel", args) 

835 

836 def _call_plugin_coalesce(self, hostRef): 

837 args = {"path": self.path, "vdi_type": self.vdi_type} 

838 util.SMlog("DAMS: Calling remote coalesce with: {}".format(args)) 

839 ret = self.sr.xapi.session.xenapi.host.call_plugin( \ 

840 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_tapdisk", args) 

841 util.SMlog("DAMS: Remote coalesce returned {}".format(ret)) 

842 

843 def _doCoalesceOnHost(self, hostRef): 

844 self.validate() 

845 self.parent.validate(True) 

846 self.parent._increaseSizeVirt(self.sizeVirt) 

847 self.sr._updateSlavesOnResize(self.parent) 

848 #TODO: We might need to make the LV RW on the slave directly for coalesce? 

849 # Children and parent need to be RW for QCOW2 coalesce, otherwise tapdisk(libqcow) will crash trying to access them 

850 

851 def abortTest(): 

852 file = self.sr._gc_running_file(self) 

853 try: 

854 with open(file, "r") as f: 

855 if not f.read(): 

856 #TODO: Need to call commit cancel on the hostRef if we stop 

857 util.SMlog("DAMS: Cancelling") 

858 self._call_plug_cancel(hostRef) 

859 return True 

860 except OSError as e: 

861 if e.errno == errno.ENOENT: 

862 util.SMlog("File {} does not exist".format(file)) 

863 else: 

864 util.SMlog("IOError: {}".format(e)) 

865 return True 

866 return False 

867 

868 #TODO: Add exception handling here like when callinng in a runAbortable situation_doCoalesceCOWImage 

869 Util.runAbortable(lambda: self._call_plugin_coalesce(hostRef), 

870 None, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

871 

872 self.parent.validate(True) 

873 #self._verifyContents(0) 

874 self.parent.updateBlockInfo() 

875 

876 def _isOpenOnHosts(self) -> Optional[str]: 

877 for pbdRecord in self.sr.xapi.getAttachedPBDs(): 

878 hostRef = pbdRecord["host"] 

879 args = {"path": self.path} 

880 is_openers = util.strtobool(self.sr.xapi.session.xenapi.host.call_plugin( \ 

881 hostRef, XAPI.PLUGIN_ON_SLAVE, "is_openers", args)) 

882 if is_openers: 

883 return hostRef 

884 return None 

885 

886 def _doCoalesce(self) -> None: 

887 """Coalesce self onto parent. Only perform the actual coalescing of 

888 an image, but not the subsequent relinking. We'll do that as the next step, 

889 after reloading the entire SR in case things have changed while we 

890 were coalescing""" 

891 self.validate() 

892 self.parent.validate(True) 

893 self.parent._increaseSizeVirt(self.sizeVirt) 

894 self.sr._updateSlavesOnResize(self.parent) 

895 self._coalesceCowImage(0) 

896 self.parent.validate(True) 

897 #self._verifyContents(0) 

898 self.parent.updateBlockInfo() 

899 

900 def _verifyContents(self, timeOut): 

901 Util.log(" Coalesce verification on %s" % self) 

902 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

903 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

904 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

905 Util.log(" Coalesce verification succeeded") 

906 

907 def _runTapdiskDiff(self): 

908 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

909 (self.getDriverName(), self.path, \ 

910 self.parent.getDriverName(), self.parent.path) 

911 Util.doexec(cmd, 0) 

912 return True 

913 

914 @staticmethod 

915 def _reportCoalesceError(vdi, ce): 

916 """Reports a coalesce error to XenCenter. 

917 

918 vdi: the VDI object on which the coalesce error occured 

919 ce: the CommandException that was raised""" 

920 

921 msg_name = os.strerror(ce.code) 

922 if ce.code == errno.ENOSPC: 

923 # TODO We could add more information here, e.g. exactly how much 

924 # space is required for the particular coalesce, as well as actions 

925 # to be taken by the user and consequences of not taking these 

926 # actions. 

927 msg_body = 'Run out of space while coalescing.' 

928 elif ce.code == errno.EIO: 

929 msg_body = 'I/O error while coalescing.' 

930 else: 

931 msg_body = '' 

932 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

933 % (vdi.sr.uuid, msg_name, msg_body)) 

934 

935 # Create a XenCenter message, but don't spam. 

936 xapi = vdi.sr.xapi.session.xenapi 

937 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

938 oth_cfg = xapi.SR.get_other_config(sr_ref) 

939 if COALESCE_ERR_RATE_TAG in oth_cfg: 

940 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

941 else: 

942 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

943 

944 xcmsg = False 

945 if coalesce_err_rate == 0: 

946 xcmsg = True 

947 elif coalesce_err_rate > 0: 

948 now = datetime.datetime.now() 

949 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

950 if COALESCE_LAST_ERR_TAG in sm_cfg: 

951 # seconds per message (minimum distance in time between two 

952 # messages in seconds) 

953 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

954 last = datetime.datetime.fromtimestamp( 

955 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

956 if now - last >= spm: 

957 xapi.SR.remove_from_sm_config(sr_ref, 

958 COALESCE_LAST_ERR_TAG) 

959 xcmsg = True 

960 else: 

961 xcmsg = True 

962 if xcmsg: 

963 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

964 str(now.strftime('%s'))) 

965 if xcmsg: 

966 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

967 

968 def coalesce(self) -> int: 

969 return self.cowutil.coalesce(self.path) 

970 

971 @staticmethod 

972 def _doCoalesceCowImage(vdi: "VDI"): 

973 try: 

974 startTime = time.time() 

975 allocated_size = vdi.getAllocatedSize() 

976 coalesced_size = vdi.coalesce() 

977 endTime = time.time() 

978 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

979 except util.CommandException as ce: 

980 # We use try/except for the following piece of code because it runs 

981 # in a separate process context and errors will not be caught and 

982 # reported by anyone. 

983 try: 

984 # Report coalesce errors back to user via XC 

985 VDI._reportCoalesceError(vdi, ce) 

986 except Exception as e: 

987 util.SMlog('failed to create XenCenter message: %s' % e) 

988 raise ce 

989 except: 

990 raise 

991 

992 def _vdi_is_raw(self, vdi_path): 

993 """ 

994 Given path to vdi determine if it is raw 

995 """ 

996 uuid = self.extractUuid(vdi_path) 

997 return self.sr.vdis[uuid].vdi_type == VdiType.RAW 

998 

999 def _coalesceCowImage(self, timeOut): 

1000 Util.log(" Running COW coalesce on %s" % self) 

1001 def abortTest(): 

1002 if self.cowutil.isCoalesceableOnRemote(): 

1003 file = self.sr._gc_running_file(self) 

1004 try: 

1005 with open(file, "r") as f: 

1006 if not f.read(): 

1007 return True 

1008 except OSError as e: 

1009 if e.errno == errno.ENOENT: 

1010 util.SMlog("File {} does not exist".format(file)) 

1011 else: 

1012 util.SMlog("IOError: {}".format(e)) 

1013 return True 

1014 return IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1015 

1016 try: 

1017 util.fistpoint.activate_custom_fn( 

1018 "cleanup_coalesceVHD_inject_failure", 

1019 util.inject_failure) 

1020 Util.runAbortable(lambda: VDI._doCoalesceCowImage(self), None, 

1021 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

1022 except: 

1023 # Exception at this phase could indicate a failure in COW coalesce 

1024 # or a kill of COW coalesce by runAbortable due to timeOut 

1025 # Try a repair and reraise the exception 

1026 parent = "" 

1027 try: 

1028 parent = self.getParent() 

1029 if not self._vdi_is_raw(parent): 

1030 # Repair error is logged and ignored. Error reraised later 

1031 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

1032 'parent %s' % (self.uuid, parent)) 

1033 self.repair(parent) 

1034 except Exception as e: 

1035 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

1036 'after failed coalesce on %s, err: %s' % 

1037 (parent, self.path, e)) 

1038 raise 

1039 

1040 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

1041 

1042 def _relinkSkip(self) -> None: 

1043 """Relink children of this VDI to point to the parent of this VDI""" 

1044 abortFlag = IPCFlag(self.sr.uuid) 

1045 for child in self.children: 

1046 if abortFlag.test(FLAG_TYPE_ABORT): 1046 ↛ 1047line 1046 didn't jump to line 1047, because the condition on line 1046 was never true

1047 raise AbortException("Aborting due to signal") 

1048 Util.log(" Relinking %s from %s to %s" % \ 

1049 (child, self, self.parent)) 

1050 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

1051 child._setParent(self.parent) 

1052 self.children = [] 

1053 

1054 def _reloadChildren(self, vdiSkip): 

1055 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

1056 the COW image metadata for this file in any online VDI""" 

1057 abortFlag = IPCFlag(self.sr.uuid) 

1058 for child in self.children: 

1059 if child == vdiSkip: 

1060 continue 

1061 if abortFlag.test(FLAG_TYPE_ABORT): 1061 ↛ 1062line 1061 didn't jump to line 1062, because the condition on line 1061 was never true

1062 raise AbortException("Aborting due to signal") 

1063 Util.log(" Reloading VDI %s" % child) 

1064 child._reload() 

1065 

1066 def _reload(self): 

1067 """Pause & unpause to cause blktap to reload the image metadata""" 

1068 for child in self.children: 1068 ↛ 1069line 1068 didn't jump to line 1069, because the loop on line 1068 never started

1069 child._reload() 

1070 

1071 # only leaves can be attached 

1072 if len(self.children) == 0: 1072 ↛ exitline 1072 didn't return from function '_reload', because the condition on line 1072 was never false

1073 try: 

1074 self.delConfig(VDI.DB_VDI_RELINKING) 

1075 except XenAPI.Failure as e: 

1076 if not util.isInvalidVDI(e): 

1077 raise 

1078 self.refresh() 

1079 

1080 def _tagChildrenForRelink(self): 

1081 if len(self.children) == 0: 

1082 retries = 0 

1083 try: 

1084 while retries < 15: 

1085 retries += 1 

1086 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1087 Util.log("VDI %s is activating, wait to relink" % 

1088 self.uuid) 

1089 else: 

1090 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1091 

1092 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1093 self.delConfig(VDI.DB_VDI_RELINKING) 

1094 Util.log("VDI %s started activating while tagging" % 

1095 self.uuid) 

1096 else: 

1097 return 

1098 time.sleep(2) 

1099 

1100 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1101 except XenAPI.Failure as e: 

1102 if not util.isInvalidVDI(e): 

1103 raise 

1104 

1105 for child in self.children: 

1106 child._tagChildrenForRelink() 

1107 

1108 def _loadInfoParent(self): 

1109 ret = self.cowutil.getParent(self.path, LvmCowUtil.extractUuid) 

1110 if ret: 

1111 self.parentUuid = ret 

1112 

1113 def _setParent(self, parent) -> None: 

1114 self.cowutil.setParent(self.path, parent.path, False) 

1115 self.parent = parent 

1116 self.parentUuid = parent.uuid 

1117 parent.children.append(self) 

1118 try: 

1119 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1120 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1121 (self.uuid, self.parentUuid)) 

1122 except: 

1123 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1124 (self.uuid, self.parentUuid)) 

1125 

1126 def _loadInfoHidden(self) -> None: 

1127 hidden = self.cowutil.getHidden(self.path) 

1128 self.hidden = (hidden != 0) 

1129 

1130 def _setHidden(self, hidden=True) -> None: 

1131 self.cowutil.setHidden(self.path, hidden) 

1132 self.hidden = hidden 

1133 

1134 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1135 """ensure the virtual size of 'self' is at least 'size'. Note that 

1136 resizing a COW image must always be offline and atomically: the file must 

1137 not be open by anyone and no concurrent operations may take place. 

1138 Thus we use the Agent API call for performing paused atomic 

1139 operations. If the caller is already in the atomic context, it must 

1140 call with atomic = False""" 

1141 if self.sizeVirt >= size: 1141 ↛ 1143line 1141 didn't jump to line 1143, because the condition on line 1141 was never false

1142 return 

1143 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1144 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1145 

1146 msize = self.cowutil.getMaxResizeSize(self.path) 

1147 if (size <= msize): 

1148 self.cowutil.setSizeVirtFast(self.path, size) 

1149 else: 

1150 if atomic: 

1151 vdiList = self._getAllSubtree() 

1152 self.sr.lock() 

1153 try: 

1154 self.sr.pauseVDIs(vdiList) 

1155 try: 

1156 self._setSizeVirt(size) 

1157 finally: 

1158 self.sr.unpauseVDIs(vdiList) 

1159 finally: 

1160 self.sr.unlock() 

1161 else: 

1162 self._setSizeVirt(size) 

1163 

1164 self.sizeVirt = self.cowutil.getSizeVirt(self.path) 

1165 

1166 def _setSizeVirt(self, size) -> None: 

1167 """WARNING: do not call this method directly unless all VDIs in the 

1168 subtree are guaranteed to be unplugged (and remain so for the duration 

1169 of the operation): this operation is only safe for offline COW images""" 

1170 jFile = os.path.join(self.sr.path, self.uuid) 

1171 self.cowutil.setSizeVirt(self.path, size, jFile) 

1172 

1173 def _queryCowBlocks(self) -> bytes: 

1174 return self.cowutil.getBlockBitmap(self.path) 

1175 

1176 def _getCoalescedSizeData(self): 

1177 """Get the data size of the resulting image if we coalesce self onto 

1178 parent. We calculate the actual size by using the image block allocation 

1179 information (as opposed to just adding up the two image sizes to get an 

1180 upper bound)""" 

1181 # make sure we don't use stale BAT info from vdi_rec since the child 

1182 # was writable all this time 

1183 self.delConfig(VDI.DB_VDI_BLOCKS) 

1184 blocksChild = self.getVDIBlocks() 

1185 blocksParent = self.parent.getVDIBlocks() 

1186 numBlocks = Util.countBits(blocksChild, blocksParent) 

1187 Util.log("Num combined blocks = %d" % numBlocks) 

1188 sizeData = numBlocks * self.cowutil.getBlockSize(self.path) 

1189 assert(sizeData <= self.sizeVirt) 

1190 return sizeData 

1191 

1192 def _calcExtraSpaceForCoalescing(self) -> int: 

1193 sizeData = self._getCoalescedSizeData() 

1194 sizeCoalesced = sizeData + self.cowutil.calcOverheadBitmap(sizeData) + \ 

1195 self.cowutil.calcOverheadEmpty(self.sizeVirt) 

1196 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1197 return sizeCoalesced - self.parent.getSizePhys() 

1198 

1199 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1200 """How much extra space in the SR will be required to 

1201 [live-]leaf-coalesce this VDI""" 

1202 # the space requirements are the same as for inline coalesce 

1203 return self._calcExtraSpaceForCoalescing() 

1204 

1205 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1206 """How much extra space in the SR will be required to 

1207 snapshot-coalesce this VDI""" 

1208 return self._calcExtraSpaceForCoalescing() + \ 

1209 self.cowutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1210 

1211 def _getAllSubtree(self): 

1212 """Get self and all VDIs in the subtree of self as a flat list""" 

1213 vdiList = [self] 

1214 for child in self.children: 

1215 vdiList.extend(child._getAllSubtree()) 

1216 return vdiList 

1217 

1218 

1219class FileVDI(VDI): 

1220 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1221 

1222 @override 

1223 @staticmethod 

1224 def extractUuid(path): 

1225 fileName = os.path.basename(path) 

1226 return os.path.splitext(fileName)[0] 

1227 

1228 def __init__(self, sr, uuid, vdi_type): 

1229 VDI.__init__(self, sr, uuid, vdi_type) 

1230 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1231 

1232 @override 

1233 def load(self, info=None) -> None: 

1234 if not info: 

1235 if not util.pathexists(self.path): 

1236 raise util.SMException("%s not found" % self.path) 

1237 try: 

1238 info = self.cowutil.getInfo(self.path, self.extractUuid) 

1239 except util.SMException: 

1240 Util.log(" [VDI %s: failed to read COW image metadata]" % self.uuid) 

1241 return 

1242 self.parent = None 

1243 self.children = [] 

1244 self.parentUuid = info.parentUuid 

1245 self.sizeVirt = info.sizeVirt 

1246 self._sizePhys = info.sizePhys 

1247 self._sizeAllocated = info.sizeAllocated 

1248 self.hidden = info.hidden 

1249 self.scanError = False 

1250 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1251 (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])) 

1252 

1253 @override 

1254 def rename(self, uuid) -> None: 

1255 oldPath = self.path 

1256 VDI.rename(self, uuid) 

1257 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1258 self.path = os.path.join(self.sr.path, self.fileName) 

1259 assert(not util.pathexists(self.path)) 

1260 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1261 os.rename(oldPath, self.path) 

1262 

1263 @override 

1264 def delete(self) -> None: 

1265 if len(self.children) > 0: 1265 ↛ 1266line 1265 didn't jump to line 1266, because the condition on line 1265 was never true

1266 raise util.SMException("VDI %s has children, can't delete" % \ 

1267 self.uuid) 

1268 try: 

1269 self.sr.lock() 

1270 try: 

1271 os.unlink(self.path) 

1272 self.sr.forgetVDI(self.uuid) 

1273 finally: 

1274 self.sr.unlock() 

1275 except OSError: 

1276 raise util.SMException("os.unlink(%s) failed" % self.path) 

1277 VDI.delete(self) 

1278 

1279 @override 

1280 def getAllocatedSize(self) -> int: 

1281 if self._sizeAllocated == -1: 1281 ↛ 1282line 1281 didn't jump to line 1282, because the condition on line 1281 was never true

1282 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1283 return self._sizeAllocated 

1284 

1285 

1286class LVMVDI(VDI): 

1287 """Object representing a VDI in an LVM SR""" 

1288 

1289 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1290 

1291 @override 

1292 def load(self, info=None) -> None: 

1293 # `info` is always set. `None` default value is only here to match parent method. 

1294 assert info, "No info given to LVMVDI.load" 

1295 self.parent = None 

1296 self.children = [] 

1297 self._sizePhys = -1 

1298 self._sizeAllocated = -1 

1299 self.scanError = info.scanError 

1300 self.sizeLV = info.sizeLV 

1301 self.sizeVirt = info.sizeVirt 

1302 self.fileName = info.lvName 

1303 self.lvActive = info.lvActive 

1304 self.lvOpen = info.lvOpen 

1305 self.lvReadonly = info.lvReadonly 

1306 self.hidden = info.hidden 

1307 self.parentUuid = info.parentUuid 

1308 self.path = os.path.join(self.sr.path, self.fileName) 

1309 self.lvmcowutil = LvmCowUtil(self.cowutil) 

1310 

1311 @override 

1312 @staticmethod 

1313 def extractUuid(path): 

1314 return LvmCowUtil.extractUuid(path) 

1315 

1316 def inflate(self, size): 

1317 """inflate the LV containing the COW image to 'size'""" 

1318 if not VdiType.isCowImage(self.vdi_type): 

1319 return 

1320 self._activate() 

1321 self.sr.lock() 

1322 try: 

1323 self.lvmcowutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size) 

1324 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1325 finally: 

1326 self.sr.unlock() 

1327 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1328 self._sizePhys = -1 

1329 self._sizeAllocated = -1 

1330 

1331 def deflate(self): 

1332 """deflate the LV containing the image to minimum""" 

1333 if not VdiType.isCowImage(self.vdi_type): 

1334 return 

1335 self._activate() 

1336 self.sr.lock() 

1337 try: 

1338 self.lvmcowutil.deflate(self.sr.lvmCache, self.fileName, self.getSizePhys()) 

1339 finally: 

1340 self.sr.unlock() 

1341 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1342 self._sizePhys = -1 

1343 self._sizeAllocated = -1 

1344 

1345 def inflateFully(self): 

1346 self.inflate(self.lvmcowutil.calcVolumeSize(self.sizeVirt)) 

1347 

1348 def inflateParentForCoalesce(self): 

1349 """Inflate the parent only as much as needed for the purposes of 

1350 coalescing""" 

1351 if not VdiType.isCowImage(self.parent.vdi_type): 

1352 return 

1353 inc = self._calcExtraSpaceForCoalescing() 

1354 if inc > 0: 

1355 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1356 self.parent.inflate(self.parent.sizeLV + inc) 

1357 

1358 @override 

1359 def updateBlockInfo(self) -> Optional[str]: 

1360 if VdiType.isCowImage(self.vdi_type): 

1361 return VDI.updateBlockInfo(self) 

1362 return None 

1363 

1364 @override 

1365 def rename(self, uuid) -> None: 

1366 oldUuid = self.uuid 

1367 oldLVName = self.fileName 

1368 VDI.rename(self, uuid) 

1369 self.fileName = LV_PREFIX[self.vdi_type] + self.uuid 

1370 self.path = os.path.join(self.sr.path, self.fileName) 

1371 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1372 

1373 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1374 if self.sr.lvActivator.get(oldUuid, False): 

1375 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1376 

1377 ns = NS_PREFIX_LVM + self.sr.uuid 

1378 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1379 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1380 RefCounter.reset(oldUuid, ns) 

1381 

1382 @override 

1383 def delete(self) -> None: 

1384 if len(self.children) > 0: 

1385 raise util.SMException("VDI %s has children, can't delete" % \ 

1386 self.uuid) 

1387 self.sr.lock() 

1388 try: 

1389 self.sr.lvmCache.remove(self.fileName) 

1390 self.sr.forgetVDI(self.uuid) 

1391 finally: 

1392 self.sr.unlock() 

1393 RefCounter.reset(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

1394 VDI.delete(self) 

1395 

1396 @override 

1397 def getSizePhys(self) -> int: 

1398 if self._sizePhys == -1: 

1399 self._loadInfoSizePhys() 

1400 return self._sizePhys 

1401 

1402 def _loadInfoSizePhys(self): 

1403 """Get the physical utilization of the COW image file. We do it individually 

1404 (and not using the COW batch scanner) as an optimization: this info is 

1405 relatively expensive and we need it only for VDI's involved in 

1406 coalescing.""" 

1407 if not VdiType.isCowImage(self.vdi_type): 

1408 return 

1409 self._activate() 

1410 self._sizePhys = self.cowutil.getSizePhys(self.path) 

1411 if self._sizePhys <= 0: 

1412 raise util.SMException("phys size of %s = %d" % \ 

1413 (self, self._sizePhys)) 

1414 

1415 @override 

1416 def getAllocatedSize(self) -> int: 

1417 if self._sizeAllocated == -1: 

1418 self._loadInfoSizeAllocated() 

1419 return self._sizeAllocated 

1420 

1421 def _loadInfoSizeAllocated(self): 

1422 """ 

1423 Get the allocated size of the COW volume. 

1424 """ 

1425 if not VdiType.isCowImage(self.vdi_type): 

1426 return 

1427 self._activate() 

1428 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1429 

1430 @override 

1431 def _loadInfoHidden(self) -> None: 

1432 if not VdiType.isCowImage(self.vdi_type): 

1433 self.hidden = self.sr.lvmCache.getHidden(self.fileName) 

1434 else: 

1435 VDI._loadInfoHidden(self) 

1436 

1437 @override 

1438 def _setHidden(self, hidden=True) -> None: 

1439 if not VdiType.isCowImage(self.vdi_type): 

1440 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1441 self.hidden = hidden 

1442 else: 

1443 VDI._setHidden(self, hidden) 

1444 

1445 @override 

1446 def __str__(self) -> str: 

1447 strType = self.vdi_type 

1448 if self.vdi_type == VdiType.RAW: 

1449 strType = "RAW" 

1450 strHidden = "" 

1451 if self.hidden: 

1452 strHidden = "*" 

1453 strSizePhys = "" 

1454 if self._sizePhys > 0: 

1455 strSizePhys = Util.num2str(self._sizePhys) 

1456 strSizeAllocated = "" 

1457 if self._sizeAllocated >= 0: 

1458 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1459 strActive = "n" 

1460 if self.lvActive: 

1461 strActive = "a" 

1462 if self.lvOpen: 

1463 strActive += "o" 

1464 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1465 Util.num2str(self.sizeVirt), strSizePhys, strSizeAllocated, 

1466 Util.num2str(self.sizeLV), strActive) 

1467 

1468 @override 

1469 def validate(self, fast=False) -> None: 

1470 if VdiType.isCowImage(self.vdi_type): 

1471 VDI.validate(self, fast) 

1472 

1473 def _setChainRw(self) -> List[str]: 

1474 """ 

1475 Set the readonly LV and children writable. 

1476 It's needed because the coalesce can be done by tapdisk directly 

1477 and it will need to write parent information for children. 

1478 The VDI we want to coalesce into it's parent need to be writable for libqcow coalesce part. 

1479 Return a list of the LV that were previously readonly to be made RO again after the coalesce. 

1480 """ 

1481 was_ro = [] 

1482 if self.lvReadonly: 

1483 self.sr.lvmCache.setReadonly(self.fileName, False) 

1484 was_ro.append(self.fileName) 

1485 

1486 for child in self.children: 

1487 if child.lvReadonly: 

1488 self.sr.lvmCache.setReadonly(child.fileName, False) 

1489 was_ro.append(child.fileName) 

1490 

1491 return was_ro 

1492 

1493 def _setChainRo(self, was_ro: List[str]) -> None: 

1494 """Set the list of LV in parameters to readonly""" 

1495 for lvName in was_ro: 

1496 self.sr.lvmCache.setReadonly(lvName, True) 

1497 

1498 @override 

1499 def _doCoalesce(self) -> None: 

1500 """LVMVDI parents must first be activated, inflated, and made writable""" 

1501 was_ro = [] 

1502 try: 

1503 self._activateChain() 

1504 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1505 self.parent.validate() 

1506 self.inflateParentForCoalesce() 

1507 was_ro = self._setChainRw() 

1508 VDI._doCoalesce(self) 

1509 finally: 

1510 self.parent._loadInfoSizePhys() 

1511 self.parent.deflate() 

1512 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1513 self._setChainRo(was_ro) 

1514 

1515 @override 

1516 def _setParent(self, parent) -> None: 

1517 self._activate() 

1518 if self.lvReadonly: 

1519 self.sr.lvmCache.setReadonly(self.fileName, False) 

1520 

1521 try: 

1522 self.cowutil.setParent(self.path, parent.path, parent.vdi_type == VdiType.RAW) 

1523 finally: 

1524 if self.lvReadonly: 

1525 self.sr.lvmCache.setReadonly(self.fileName, True) 

1526 self._deactivate() 

1527 self.parent = parent 

1528 self.parentUuid = parent.uuid 

1529 parent.children.append(self) 

1530 try: 

1531 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1532 Util.log("Updated the VDI-parent field for child %s with %s" % \ 

1533 (self.uuid, self.parentUuid)) 

1534 except: 

1535 Util.log("Failed to update the VDI-parent with %s for child %s" % \ 

1536 (self.parentUuid, self.uuid)) 

1537 

1538 def _activate(self): 

1539 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1540 

1541 def _activateChain(self): 

1542 vdi = self 

1543 while vdi: 

1544 vdi._activate() 

1545 vdi = vdi.parent 

1546 

1547 def _deactivate(self): 

1548 self.sr.lvActivator.deactivate(self.uuid, False) 

1549 

1550 @override 

1551 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1552 "ensure the virtual size of 'self' is at least 'size'" 

1553 self._activate() 

1554 if VdiType.isCowImage(self.vdi_type): 

1555 VDI._increaseSizeVirt(self, size, atomic) 

1556 return 

1557 

1558 # raw VDI case 

1559 offset = self.sizeLV 

1560 if self.sizeVirt < size: 

1561 oldSize = self.sizeLV 

1562 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1563 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1564 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1565 offset = oldSize 

1566 unfinishedZero = False 

1567 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1568 if jval: 

1569 unfinishedZero = True 

1570 offset = int(jval) 

1571 length = self.sizeLV - offset 

1572 if not length: 

1573 return 

1574 

1575 if unfinishedZero: 

1576 Util.log(" ==> Redoing unfinished zeroing out") 

1577 else: 

1578 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1579 str(offset)) 

1580 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1581 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1582 func = lambda: util.zeroOut(self.path, offset, length) 

1583 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1584 VDI.POLL_INTERVAL, 0) 

1585 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1586 

1587 @override 

1588 def _setSizeVirt(self, size) -> None: 

1589 """WARNING: do not call this method directly unless all VDIs in the 

1590 subtree are guaranteed to be unplugged (and remain so for the duration 

1591 of the operation): this operation is only safe for offline COW images.""" 

1592 self._activate() 

1593 jFile = self.lvmcowutil.createResizeJournal(self.sr.lvmCache, self.uuid) 

1594 try: 

1595 self.lvmcowutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size, jFile) 

1596 finally: 

1597 self.lvmcowutil.destroyResizeJournal(self.sr.lvmCache, self.uuid) 

1598 

1599 @override 

1600 def _queryCowBlocks(self) -> bytes: 

1601 self._activate() 

1602 return VDI._queryCowBlocks(self) 

1603 

1604 @override 

1605 def _calcExtraSpaceForCoalescing(self) -> int: 

1606 if not VdiType.isCowImage(self.parent.vdi_type): 

1607 return 0 # raw parents are never deflated in the first place 

1608 sizeCoalesced = self.lvmcowutil.calcVolumeSize(self._getCoalescedSizeData()) 

1609 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1610 return sizeCoalesced - self.parent.sizeLV 

1611 

1612 @override 

1613 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1614 """How much extra space in the SR will be required to 

1615 [live-]leaf-coalesce this VDI""" 

1616 # we can deflate the leaf to minimize the space requirements 

1617 deflateDiff = self.sizeLV - lvutil.calcSizeLV(self.getSizePhys()) 

1618 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1619 

1620 @override 

1621 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1622 return self._calcExtraSpaceForCoalescing() + \ 

1623 lvutil.calcSizeLV(self.getSizePhys()) 

1624 

1625 

1626class LinstorVDI(VDI): 

1627 """Object representing a VDI in a LINSTOR SR""" 

1628 

1629 VOLUME_LOCK_TIMEOUT = 30 

1630 

1631 @override 

1632 def load(self, info=None) -> None: 

1633 self.parentUuid = info.parentUuid 

1634 self.scanError = True 

1635 self.parent = None 

1636 self.children = [] 

1637 

1638 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1639 self.path = self.sr._linstor.build_device_path(self.fileName) 

1640 self.linstorcowutil = LinstorCowUtil(self.sr.xapi.session, self.sr._linstor, info.vdiType) 

1641 

1642 if not info: 

1643 try: 

1644 info = self.linstorcowutil.get_info(self.uuid) 

1645 except util.SMException: 

1646 Util.log( 

1647 ' [VDI {}: failed to read COW image metadata]'.format(self.uuid) 

1648 ) 

1649 return 

1650 

1651 self.parentUuid = info.parentUuid 

1652 self.sizeVirt = info.sizeVirt 

1653 self._sizePhys = -1 

1654 self._sizeAllocated = -1 

1655 self.drbd_size = -1 

1656 self.hidden = info.hidden 

1657 self.scanError = False 

1658 

1659 @override 

1660 def getSizePhys(self, fetch=False) -> int: 

1661 if self._sizePhys < 0 or fetch: 

1662 self._sizePhys = self.linstorcowutil.get_size_phys(self.uuid) 

1663 return self._sizePhys 

1664 

1665 def getDrbdSize(self, fetch=False): 

1666 if self.drbd_size < 0 or fetch: 

1667 self.drbd_size = self.linstorcowutil.get_drbd_size(self.uuid) 

1668 return self.drbd_size 

1669 

1670 @override 

1671 def getAllocatedSize(self) -> int: 

1672 if self._sizeAllocated == -1: 

1673 if VdiType.isCowImage(self.vdi_type): 

1674 self._sizeAllocated = self.linstorcowutil.get_allocated_size(self.uuid) 

1675 return self._sizeAllocated 

1676 

1677 def inflate(self, size): 

1678 if not VdiType.isCowImage(self.vdi_type): 

1679 return 

1680 self.sr.lock() 

1681 try: 

1682 # Ensure we use the real DRBD size and not the cached one. 

1683 # Why? Because this attribute can be changed if volume is resized by user. 

1684 self.drbd_size = self.getDrbdSize(fetch=True) 

1685 self.linstorcowutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1686 finally: 

1687 self.sr.unlock() 

1688 self.drbd_size = -1 

1689 self._sizePhys = -1 

1690 self._sizeAllocated = -1 

1691 

1692 def deflate(self): 

1693 if not VdiType.isCowImage(self.vdi_type): 

1694 return 

1695 self.sr.lock() 

1696 try: 

1697 # Ensure we use the real sizes and not the cached info. 

1698 self.drbd_size = self.getDrbdSize(fetch=True) 

1699 self._sizePhys = self.getSizePhys(fetch=True) 

1700 self.linstorcowutil.force_deflate(self.path, self._sizePhys, self.drbd_size, zeroize=False) 

1701 finally: 

1702 self.sr.unlock() 

1703 self.drbd_size = -1 

1704 self._sizePhys = -1 

1705 self._sizeAllocated = -1 

1706 

1707 def inflateFully(self): 

1708 if VdiType.isCowImage(self.vdi_type): 

1709 self.inflate(self.linstorcowutil.compute_volume_size(self.sizeVirt)) 

1710 

1711 @override 

1712 def rename(self, uuid) -> None: 

1713 Util.log('Renaming {} -> {} (path={})'.format( 

1714 self.uuid, uuid, self.path 

1715 )) 

1716 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1717 VDI.rename(self, uuid) 

1718 

1719 @override 

1720 def delete(self) -> None: 

1721 if len(self.children) > 0: 

1722 raise util.SMException( 

1723 'VDI {} has children, can\'t delete'.format(self.uuid) 

1724 ) 

1725 self.sr.lock() 

1726 try: 

1727 self.sr._linstor.destroy_volume(self.uuid) 

1728 self.sr.forgetVDI(self.uuid) 

1729 finally: 

1730 self.sr.unlock() 

1731 VDI.delete(self) 

1732 

1733 @override 

1734 def validate(self, fast=False) -> None: 

1735 if VdiType.isCowImage(self.vdi_type) and self.linstorcowutil.check(self.uuid, fast=fast) != CowUtil.CheckResult.Success: 

1736 raise util.SMException('COW image {} corrupted'.format(self)) 

1737 

1738 @override 

1739 def pause(self, failfast=False) -> None: 

1740 self.sr._linstor.ensure_volume_is_not_locked( 

1741 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1742 ) 

1743 return super(LinstorVDI, self).pause(failfast) 

1744 

1745 @override 

1746 def coalesce(self) -> int: 

1747 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1748 # Using another exception we can't execute the next coalesce calls. 

1749 return self.linstorcowutil.force_coalesce(self.path) 

1750 

1751 @override 

1752 def getParent(self) -> str: 

1753 return self.linstorcowutil.get_parent( 

1754 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1755 ) 

1756 

1757 @override 

1758 def repair(self, parent_uuid) -> None: 

1759 self.linstorcowutil.force_repair( 

1760 self.sr._linstor.get_device_path(parent_uuid) 

1761 ) 

1762 

1763 @override 

1764 def _relinkSkip(self) -> None: 

1765 abortFlag = IPCFlag(self.sr.uuid) 

1766 for child in self.children: 

1767 if abortFlag.test(FLAG_TYPE_ABORT): 

1768 raise AbortException('Aborting due to signal') 

1769 Util.log( 

1770 ' Relinking {} from {} to {}'.format( 

1771 child, self, self.parent 

1772 ) 

1773 ) 

1774 

1775 session = child.sr.xapi.session 

1776 sr_uuid = child.sr.uuid 

1777 vdi_uuid = child.uuid 

1778 try: 

1779 self.sr._linstor.ensure_volume_is_not_locked( 

1780 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1781 ) 

1782 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1783 child._setParent(self.parent) 

1784 finally: 

1785 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1786 self.children = [] 

1787 

1788 @override 

1789 def _setParent(self, parent) -> None: 

1790 self.sr._linstor.get_device_path(self.uuid) 

1791 self.linstorcowutil.force_parent(self.path, parent.path) 

1792 self.parent = parent 

1793 self.parentUuid = parent.uuid 

1794 parent.children.append(self) 

1795 try: 

1796 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1797 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1798 (self.uuid, self.parentUuid)) 

1799 except: 

1800 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1801 (self.uuid, self.parentUuid)) 

1802 

1803 @override 

1804 def _doCoalesce(self) -> None: 

1805 try: 

1806 self._activateChain() 

1807 self.parent.validate() 

1808 self._inflateParentForCoalesce() 

1809 VDI._doCoalesce(self) 

1810 finally: 

1811 self.parent.deflate() 

1812 

1813 def _activateChain(self): 

1814 vdi = self 

1815 while vdi: 

1816 try: 

1817 p = self.sr._linstor.get_device_path(vdi.uuid) 

1818 except Exception as e: 

1819 # Use SMException to skip coalesce. 

1820 # Otherwise the GC is stopped... 

1821 raise util.SMException(str(e)) 

1822 vdi = vdi.parent 

1823 

1824 @override 

1825 def _setHidden(self, hidden=True) -> None: 

1826 HIDDEN_TAG = 'hidden' 

1827 

1828 if not VdiType.isCowImage(self.vdi_type): 

1829 self.sr._linstor.update_volume_metadata(self.uuid, { 

1830 HIDDEN_TAG: hidden 

1831 }) 

1832 self.hidden = hidden 

1833 else: 

1834 VDI._setHidden(self, hidden) 

1835 

1836 @override 

1837 def _increaseSizeVirt(self, size, atomic=True): 

1838 if self.vdi_type == VdiType.RAW: 

1839 offset = self.drbd_size 

1840 if self.sizeVirt < size: 

1841 oldSize = self.drbd_size 

1842 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) 

1843 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) 

1844 self.sr._linstor.resize_volume(self.uuid, self.drbd_size) 

1845 offset = oldSize 

1846 unfinishedZero = False 

1847 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) 

1848 if jval: 

1849 unfinishedZero = True 

1850 offset = int(jval) 

1851 length = self.drbd_size - offset 

1852 if not length: 

1853 return 

1854 

1855 if unfinishedZero: 

1856 Util.log(" ==> Redoing unfinished zeroing out") 

1857 else: 

1858 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) 

1859 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1860 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1861 func = lambda: util.zeroOut(self.path, offset, length) 

1862 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

1863 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) 

1864 return 

1865 

1866 if self.sizeVirt >= size: 

1867 return 

1868 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1869 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1870 

1871 msize = self.linstorcowutil.get_max_resize_size(self.uuid) * 1024 * 1024 

1872 if (size <= msize): 

1873 self.linstorcowutil.set_size_virt_fast(self.path, size) 

1874 else: 

1875 if atomic: 

1876 vdiList = self._getAllSubtree() 

1877 self.sr.lock() 

1878 try: 

1879 self.sr.pauseVDIs(vdiList) 

1880 try: 

1881 self._setSizeVirt(size) 

1882 finally: 

1883 self.sr.unpauseVDIs(vdiList) 

1884 finally: 

1885 self.sr.unlock() 

1886 else: 

1887 self._setSizeVirt(size) 

1888 

1889 self.sizeVirt = self.linstorcowutil.get_size_virt(self.uuid) 

1890 

1891 @override 

1892 def _setSizeVirt(self, size) -> None: 

1893 jfile = self.uuid + '-jvhd' 

1894 self.sr._linstor.create_volume( 

1895 jfile, self.cowutil.getResizeJournalSize(), persistent=False, volume_name=jfile 

1896 ) 

1897 try: 

1898 self.inflate(self.linstorcowutil.compute_volume_size(size)) 

1899 self.linstorcowutil.set_size_virt(self.path, size, jfile) 

1900 finally: 

1901 try: 

1902 self.sr._linstor.destroy_volume(jfile) 

1903 except Exception: 

1904 # We can ignore it, in any case this volume is not persistent. 

1905 pass 

1906 

1907 @override 

1908 def _queryCowBlocks(self) -> bytes: 

1909 return self.linstorcowutil.get_block_bitmap(self.uuid) 

1910 

1911 def _inflateParentForCoalesce(self): 

1912 if not VdiType.isCowImage(self.parent.vdi_type): 

1913 return 

1914 inc = self._calcExtraSpaceForCoalescing() 

1915 if inc > 0: 

1916 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1917 

1918 @override 

1919 def _calcExtraSpaceForCoalescing(self) -> int: 

1920 if not VdiType.isCowImage(self.parent.vdi_type): 

1921 return 0 

1922 size_coalesced = self.linstorcowutil.compute_volume_size(self._getCoalescedSizeData()) 

1923 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1924 return size_coalesced - self.parent.getDrbdSize() 

1925 

1926 @override 

1927 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1928 assert self.getDrbdSize() > 0 

1929 assert self.getSizePhys() > 0 

1930 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1931 assert deflate_diff >= 0 

1932 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1933 

1934 @override 

1935 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1936 assert self.getSizePhys() > 0 

1937 return self._calcExtraSpaceForCoalescing() + \ 

1938 LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1939 

1940################################################################################ 

1941# 

1942# SR 

1943# 

1944class SR(object): 

1945 class LogFilter: 

1946 def __init__(self, sr): 

1947 self.sr = sr 

1948 self.stateLogged = False 

1949 self.prevState = {} 

1950 self.currState = {} 

1951 

1952 def logState(self): 

1953 changes = "" 

1954 self.currState.clear() 

1955 for vdi in self.sr.vdiTrees: 

1956 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1957 if not self.prevState.get(vdi.uuid) or \ 

1958 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1959 changes += self.currState[vdi.uuid] 

1960 

1961 for uuid in self.prevState: 

1962 if not self.currState.get(uuid): 

1963 changes += "Tree %s gone\n" % uuid 

1964 

1965 result = "SR %s (%d VDIs in %d COW trees): " % \ 

1966 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1967 

1968 if len(changes) > 0: 

1969 if self.stateLogged: 

1970 result += "showing only COW trees that changed:" 

1971 result += "\n%s" % changes 

1972 else: 

1973 result += "no changes" 

1974 

1975 for line in result.split("\n"): 

1976 Util.log("%s" % line) 

1977 self.prevState.clear() 

1978 for key, val in self.currState.items(): 

1979 self.prevState[key] = val 

1980 self.stateLogged = True 

1981 

1982 def logNewVDI(self, uuid): 

1983 if self.stateLogged: 

1984 Util.log("Found new VDI when scanning: %s" % uuid) 

1985 

1986 def _getTreeStr(self, vdi, indent=8): 

1987 treeStr = "%s%s\n" % (" " * indent, vdi) 

1988 for child in vdi.children: 

1989 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

1990 return treeStr 

1991 

1992 TYPE_FILE = "file" 

1993 TYPE_LVHD = "lvhd" 

1994 TYPE_LINSTOR = "linstor" 

1995 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

1996 

1997 LOCK_RETRY_INTERVAL = 3 

1998 LOCK_RETRY_ATTEMPTS = 20 

1999 LOCK_RETRY_ATTEMPTS_LOCK = 100 

2000 

2001 SCAN_RETRY_ATTEMPTS = 3 

2002 

2003 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

2004 TMP_RENAME_PREFIX = "OLD_" 

2005 

2006 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

2007 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

2008 

2009 @staticmethod 

2010 def getInstance(uuid, xapiSession, createLock=True, force=False): 

2011 xapi = XAPI(xapiSession, uuid) 

2012 type = normalizeType(xapi.srRecord["type"]) 

2013 if type == SR.TYPE_FILE: 

2014 return FileSR(uuid, xapi, createLock, force) 

2015 elif type == SR.TYPE_LVHD: 

2016 return LVMSR(uuid, xapi, createLock, force) 

2017 elif type == SR.TYPE_LINSTOR: 

2018 return LinstorSR(uuid, xapi, createLock, force) 

2019 raise util.SMException("SR type %s not recognized" % type) 

2020 

2021 def __init__(self, uuid, xapi, createLock, force): 

2022 self.logFilter = self.LogFilter(self) 

2023 self.uuid = uuid 

2024 self.path = "" 

2025 self.name = "" 

2026 self.vdis = {} 

2027 self.vdiTrees = [] 

2028 self.journaler = None 

2029 self.xapi = xapi 

2030 self._locked = 0 

2031 self._srLock = None 

2032 if createLock: 2032 ↛ 2033line 2032 didn't jump to line 2033, because the condition on line 2032 was never true

2033 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, self.uuid) 

2034 else: 

2035 Util.log("Requested no SR locking") 

2036 self.name = self.xapi.srRecord["name_label"] 

2037 self._failedCoalesceTargets = [] 

2038 

2039 if not self.xapi.isPluggedHere(): 

2040 if force: 2040 ↛ 2041line 2040 didn't jump to line 2041, because the condition on line 2040 was never true

2041 Util.log("SR %s not attached on this host, ignoring" % uuid) 

2042 else: 

2043 if not self.wait_for_plug(): 

2044 raise util.SMException("SR %s not attached on this host" % uuid) 

2045 

2046 if force: 2046 ↛ 2047line 2046 didn't jump to line 2047, because the condition on line 2046 was never true

2047 Util.log("Not checking if we are Master (SR %s)" % uuid) 

2048 elif not self.xapi.isMaster(): 2048 ↛ 2049line 2048 didn't jump to line 2049, because the condition on line 2048 was never true

2049 raise util.SMException("This host is NOT master, will not run") 

2050 

2051 self.no_space_candidates = {} 

2052 

2053 def msg_cleared(self, xapi_session, msg_ref): 

2054 try: 

2055 msg = xapi_session.xenapi.message.get_record(msg_ref) 

2056 except XenAPI.Failure: 

2057 return True 

2058 

2059 return msg is None 

2060 

2061 def check_no_space_candidates(self): 

2062 xapi_session = self.xapi.getSession() 

2063 

2064 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE) 

2065 if self.no_space_candidates: 

2066 if msg_id is None or self.msg_cleared(xapi_session, msg_id): 

2067 util.SMlog("Could not coalesce due to a lack of space " 

2068 f"in SR {self.uuid}") 

2069 msg_body = ("Unable to perform data coalesce due to a lack " 

2070 f"of space in SR {self.uuid}") 

2071 msg_id = xapi_session.xenapi.message.create( 

2072 'SM_GC_NO_SPACE', 

2073 3, 

2074 "SR", 

2075 self.uuid, 

2076 msg_body) 

2077 xapi_session.xenapi.SR.remove_from_sm_config( 

2078 self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2079 xapi_session.xenapi.SR.add_to_sm_config( 

2080 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id) 

2081 

2082 for candidate in self.no_space_candidates.values(): 

2083 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id) 

2084 elif msg_id is not None: 

2085 # Everything was coalescable, remove the message 

2086 xapi_session.xenapi.message.destroy(msg_id) 

2087 

2088 def clear_no_space_msg(self, vdi): 

2089 msg_id = None 

2090 try: 

2091 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE) 

2092 except XenAPI.Failure: 

2093 pass 

2094 

2095 self.no_space_candidates.pop(vdi.uuid, None) 

2096 if msg_id is not None: 2096 ↛ exitline 2096 didn't return from function 'clear_no_space_msg', because the condition on line 2096 was never false

2097 vdi.delConfig(VDI.DB_GC_NO_SPACE) 

2098 

2099 

2100 def wait_for_plug(self): 

2101 for _ in range(1, 10): 

2102 time.sleep(2) 

2103 if self.xapi.isPluggedHere(): 

2104 return True 

2105 return False 

2106 

2107 def gcEnabled(self, refresh=True): 

2108 if refresh: 

2109 self.xapi.srRecord = \ 

2110 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

2111 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

2112 Util.log("GC is disabled for this SR, abort") 

2113 return False 

2114 return True 

2115 

2116 def scan(self, force=False) -> None: 

2117 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

2118 update VDI objects if they already exist""" 

2119 pass 

2120 

2121 def scanLocked(self, force=False): 

2122 self.lock() 

2123 try: 

2124 self.scan(force) 

2125 finally: 

2126 self.unlock() 

2127 

2128 def getVDI(self, uuid): 

2129 return self.vdis.get(uuid) 

2130 

2131 def hasWork(self): 

2132 if len(self.findGarbage()) > 0: 

2133 return True 

2134 if self.findCoalesceable(): 

2135 return True 

2136 if self.findLeafCoalesceable(): 

2137 return True 

2138 if self.needUpdateBlockInfo(): 

2139 return True 

2140 return False 

2141 

2142 def findCoalesceable(self): 

2143 """Find a coalesceable VDI. Return a vdi that should be coalesced 

2144 (choosing one among all coalesceable candidates according to some 

2145 criteria) or None if there is no VDI that could be coalesced""" 

2146 

2147 candidates = [] 

2148 

2149 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

2150 if srSwitch == "false": 

2151 Util.log("Coalesce disabled for this SR") 

2152 return candidates 

2153 

2154 # finish any VDI for which a relink journal entry exists first 

2155 journals = self.journaler.getAll(VDI.JRN_RELINK) 

2156 for uuid in journals: 

2157 vdi = self.getVDI(uuid) 

2158 if vdi and vdi not in self._failedCoalesceTargets: 

2159 return vdi 

2160 

2161 for vdi in self.vdis.values(): 

2162 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

2163 candidates.append(vdi) 

2164 Util.log("%s is coalescable" % vdi.uuid) 

2165 

2166 self.xapi.update_task_progress("coalescable", len(candidates)) 

2167 

2168 # pick one in the tallest tree 

2169 treeHeight = dict() 

2170 for c in candidates: 

2171 height = c.getTreeRoot().getTreeHeight() 

2172 if treeHeight.get(height): 

2173 treeHeight[height].append(c) 

2174 else: 

2175 treeHeight[height] = [c] 

2176 

2177 freeSpace = self.getFreeSpace() 

2178 heights = list(treeHeight.keys()) 

2179 heights.sort(reverse=True) 

2180 for h in heights: 

2181 for c in treeHeight[h]: 

2182 spaceNeeded = c._calcExtraSpaceForCoalescing() 

2183 if spaceNeeded <= freeSpace: 

2184 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

2185 self.clear_no_space_msg(c) 

2186 return c 

2187 else: 

2188 self.no_space_candidates[c.uuid] = c 

2189 Util.log("No space to coalesce %s (free space: %d)" % \ 

2190 (c, freeSpace)) 

2191 return None 

2192 

2193 def getSwitch(self, key): 

2194 return self.xapi.srRecord["other_config"].get(key) 

2195 

2196 def forbiddenBySwitch(self, switch, condition, fail_msg): 

2197 srSwitch = self.getSwitch(switch) 

2198 ret = False 

2199 if srSwitch: 

2200 ret = srSwitch == condition 

2201 

2202 if ret: 

2203 Util.log(fail_msg) 

2204 

2205 return ret 

2206 

2207 def leafCoalesceForbidden(self): 

2208 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

2209 "false", 

2210 "Coalesce disabled for this SR") or 

2211 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

2212 VDI.LEAFCLSC_DISABLED, 

2213 "Leaf-coalesce disabled for this SR")) 

2214 

2215 def findLeafCoalesceable(self): 

2216 """Find leaf-coalesceable VDIs in each COW tree""" 

2217 

2218 candidates = [] 

2219 if self.leafCoalesceForbidden(): 

2220 return candidates 

2221 

2222 self.gatherLeafCoalesceable(candidates) 

2223 

2224 self.xapi.update_task_progress("coalescable", len(candidates)) 

2225 

2226 freeSpace = self.getFreeSpace() 

2227 for candidate in candidates: 

2228 # check the space constraints to see if leaf-coalesce is actually 

2229 # feasible for this candidate 

2230 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

2231 spaceNeededLive = spaceNeeded 

2232 if spaceNeeded > freeSpace: 

2233 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

2234 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

2235 spaceNeeded = spaceNeededLive 

2236 

2237 if spaceNeeded <= freeSpace: 

2238 Util.log("Leaf-coalesce candidate: %s" % candidate) 

2239 self.clear_no_space_msg(candidate) 

2240 return candidate 

2241 else: 

2242 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

2243 (candidate, freeSpace)) 

2244 if spaceNeededLive <= freeSpace: 

2245 Util.log("...but enough space if skip snap-coalesce") 

2246 candidate.setConfig(VDI.DB_LEAFCLSC, 

2247 VDI.LEAFCLSC_OFFLINE) 

2248 self.no_space_candidates[candidate.uuid] = candidate 

2249 

2250 return None 

2251 

2252 def gatherLeafCoalesceable(self, candidates): 

2253 for vdi in self.vdis.values(): 

2254 if not vdi.isLeafCoalesceable(): 

2255 continue 

2256 if vdi in self._failedCoalesceTargets: 

2257 continue 

2258 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2259 Util.log("Skipping reset-on-boot %s" % vdi) 

2260 continue 

2261 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2262 Util.log("Skipping allow_caching=true %s" % vdi) 

2263 continue 

2264 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2265 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2266 continue 

2267 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2268 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2269 continue 

2270 candidates.append(vdi) 

2271 

2272 def coalesce(self, vdi, dryRun=False): 

2273 """Coalesce vdi onto parent""" 

2274 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2275 if dryRun: 2275 ↛ 2276line 2275 didn't jump to line 2276, because the condition on line 2275 was never true

2276 return 

2277 

2278 try: 

2279 self._coalesce(vdi) 

2280 except util.SMException as e: 

2281 if isinstance(e, AbortException): 2281 ↛ 2282line 2281 didn't jump to line 2282, because the condition on line 2281 was never true

2282 self.cleanup() 

2283 raise 

2284 else: 

2285 self._failedCoalesceTargets.append(vdi) 

2286 Util.logException("coalesce") 

2287 Util.log("Coalesce failed, skipping") 

2288 self.cleanup() 

2289 

2290 def coalesceLeaf(self, vdi, dryRun=False): 

2291 """Leaf-coalesce vdi onto parent""" 

2292 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2293 if dryRun: 

2294 return 

2295 

2296 try: 

2297 uuid = vdi.uuid 

2298 try: 

2299 # "vdi" object will no longer be valid after this call 

2300 self._coalesceLeaf(vdi) 

2301 finally: 

2302 vdi = self.getVDI(uuid) 

2303 if vdi: 

2304 vdi.delConfig(vdi.DB_LEAFCLSC) 

2305 except AbortException: 

2306 self.cleanup() 

2307 raise 

2308 except (util.SMException, XenAPI.Failure) as e: 

2309 self._failedCoalesceTargets.append(vdi) 

2310 Util.logException("leaf-coalesce") 

2311 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2312 self.cleanup() 

2313 

2314 def garbageCollect(self, dryRun=False): 

2315 vdiList = self.findGarbage() 

2316 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2317 for vdi in vdiList: 

2318 Util.log(" %s" % vdi) 

2319 if not dryRun: 

2320 self.deleteVDIs(vdiList) 

2321 self.cleanupJournals(dryRun) 

2322 

2323 def findGarbage(self): 

2324 vdiList = [] 

2325 for vdi in self.vdiTrees: 

2326 vdiList.extend(vdi.getAllPrunable()) 

2327 return vdiList 

2328 

2329 def deleteVDIs(self, vdiList) -> None: 

2330 for vdi in vdiList: 

2331 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2332 raise AbortException("Aborting due to signal") 

2333 Util.log("Deleting unlinked VDI %s" % vdi) 

2334 self.deleteVDI(vdi) 

2335 

2336 def deleteVDI(self, vdi) -> None: 

2337 assert(len(vdi.children) == 0) 

2338 del self.vdis[vdi.uuid] 

2339 if vdi.parent: 2339 ↛ 2341line 2339 didn't jump to line 2341, because the condition on line 2339 was never false

2340 vdi.parent.children.remove(vdi) 

2341 if vdi in self.vdiTrees: 2341 ↛ 2342line 2341 didn't jump to line 2342, because the condition on line 2341 was never true

2342 self.vdiTrees.remove(vdi) 

2343 vdi.delete() 

2344 

2345 def forgetVDI(self, vdiUuid) -> None: 

2346 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2347 

2348 def pauseVDIs(self, vdiList) -> None: 

2349 paused = [] 

2350 failed = False 

2351 for vdi in vdiList: 

2352 try: 

2353 vdi.pause() 

2354 paused.append(vdi) 

2355 except: 

2356 Util.logException("pauseVDIs") 

2357 failed = True 

2358 break 

2359 

2360 if failed: 

2361 self.unpauseVDIs(paused) 

2362 raise util.SMException("Failed to pause VDIs") 

2363 

2364 def unpauseVDIs(self, vdiList): 

2365 failed = False 

2366 for vdi in vdiList: 

2367 try: 

2368 vdi.unpause() 

2369 except: 

2370 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2371 failed = True 

2372 if failed: 

2373 raise util.SMException("Failed to unpause VDIs") 

2374 

2375 def getFreeSpace(self) -> int: 

2376 return 0 

2377 

2378 def cleanup(self): 

2379 Util.log("In cleanup") 

2380 return 

2381 

2382 @override 

2383 def __str__(self) -> str: 

2384 if self.name: 

2385 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2386 else: 

2387 ret = "%s" % self.uuid 

2388 return ret 

2389 

2390 def lock(self): 

2391 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2392 signal to avoid deadlocking (trying to acquire the SR lock while the 

2393 lock is held by a process that is trying to abort us)""" 

2394 if not self._srLock: 

2395 return 

2396 

2397 if self._locked == 0: 

2398 abortFlag = IPCFlag(self.uuid) 

2399 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2400 if self._srLock.acquireNoblock(): 

2401 self._locked += 1 

2402 return 

2403 if abortFlag.test(FLAG_TYPE_ABORT): 

2404 raise AbortException("Abort requested") 

2405 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2406 raise util.SMException("Unable to acquire the SR lock") 

2407 

2408 self._locked += 1 

2409 

2410 def unlock(self): 

2411 if not self._srLock: 2411 ↛ 2413line 2411 didn't jump to line 2413, because the condition on line 2411 was never false

2412 return 

2413 assert(self._locked > 0) 

2414 self._locked -= 1 

2415 if self._locked == 0: 

2416 self._srLock.release() 

2417 

2418 def needUpdateBlockInfo(self) -> bool: 

2419 for vdi in self.vdis.values(): 

2420 if vdi.scanError or len(vdi.children) == 0: 

2421 continue 

2422 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2423 return True 

2424 return False 

2425 

2426 def updateBlockInfo(self) -> None: 

2427 for vdi in self.vdis.values(): 

2428 if vdi.scanError or len(vdi.children) == 0: 

2429 continue 

2430 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2431 vdi.updateBlockInfo() 

2432 

2433 def cleanupCoalesceJournals(self): 

2434 """Remove stale coalesce VDI indicators""" 

2435 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2436 for uuid, jval in entries.items(): 

2437 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2438 

2439 def cleanupJournals(self, dryRun=False): 

2440 """delete journal entries for non-existing VDIs""" 

2441 for t in [LVMVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2442 entries = self.journaler.getAll(t) 

2443 for uuid, jval in entries.items(): 

2444 if self.getVDI(uuid): 

2445 continue 

2446 if t == SR.JRN_CLONE: 

2447 baseUuid, clonUuid = jval.split("_") 

2448 if self.getVDI(baseUuid): 

2449 continue 

2450 Util.log(" Deleting stale '%s' journal entry for %s " 

2451 "(%s)" % (t, uuid, jval)) 

2452 if not dryRun: 

2453 self.journaler.remove(t, uuid) 

2454 

2455 def cleanupCache(self, maxAge=-1) -> int: 

2456 return 0 

2457 

2458 def _hasLeavesAttachedOn(self, vdi: VDI): 

2459 leaves = vdi.getAllLeaves() 

2460 leaves_vdi = [leaf.uuid for leaf in leaves] 

2461 return util.get_hosts_attached_on(self.xapi.session, leaves_vdi) 

2462 

2463 def _gc_running_file(self, vdi: VDI): 

2464 run_file = "gc_running_{}".format(vdi.uuid) 

2465 return os.path.join(NON_PERSISTENT_DIR, str(self.uuid), run_file) 

2466 

2467 def _create_running_file(self, vdi: VDI): 

2468 with open(self._gc_running_file(vdi), "w") as f: 

2469 f.write("1") 

2470 

2471 def _delete_running_file(self, vdi: VDI): 

2472 os.unlink(self._gc_running_file(vdi)) 

2473 

2474 def _coalesce(self, vdi: VDI): 

2475 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2475 ↛ 2478line 2475 didn't jump to line 2478, because the condition on line 2475 was never true

2476 # this means we had done the actual coalescing already and just 

2477 # need to finish relinking and/or refreshing the children 

2478 Util.log("==> Coalesce apparently already done: skipping") 

2479 else: 

2480 # JRN_COALESCE is used to check which VDI is being coalesced in 

2481 # order to decide whether to abort the coalesce. We remove the 

2482 # journal as soon as the COW coalesce step is done, because we 

2483 # don't expect the rest of the process to take long 

2484 

2485 if os.path.exists(self._gc_running_file(vdi)): 2485 ↛ 2486line 2485 didn't jump to line 2486, because the condition on line 2485 was never true

2486 util.SMlog("gc_running already exist for {}. Ignoring...".format(self.uuid)) 

2487 

2488 self._create_running_file(vdi) 

2489 

2490 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2491 host_refs = self._hasLeavesAttachedOn(vdi) 

2492 #TODO: this check of multiple host_refs should be done earlier in `is_coalesceable` to avoid stopping this late every time 

2493 if len(host_refs) > 1: 2493 ↛ 2494line 2493 didn't jump to line 2494, because the condition on line 2493 was never true

2494 util.SMlog("Not coalesceable, chain activated more than once") 

2495 raise Exception("Not coalesceable, chain activated more than once") #TODO: Use correct error 

2496 

2497 try: 

2498 if host_refs and vdi.cowutil.isCoalesceableOnRemote(): 2498 ↛ 2500line 2498 didn't jump to line 2500, because the condition on line 2498 was never true

2499 #Leaf opened on another host, we need to call online coalesce 

2500 util.SMlog("Remote coalesce for {}".format(vdi.path)) 

2501 vdi._doCoalesceOnHost(list(host_refs)[0]) 

2502 else: 

2503 util.SMlog("Offline coalesce for {}".format(vdi.path)) 

2504 vdi._doCoalesce() 

2505 except Exception as e: 

2506 util.SMlog("EXCEPTION while coalescing: {}".format(e)) 

2507 self._delete_running_file(vdi) 

2508 raise 

2509 

2510 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2511 self._delete_running_file(vdi) 

2512 

2513 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2514 

2515 # we now need to relink the children: lock the SR to prevent ops 

2516 # like SM.clone from manipulating the VDIs we'll be relinking and 

2517 # rescan the SR first in case the children changed since the last 

2518 # scan 

2519 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2520 

2521 self.lock() 

2522 try: 

2523 vdi.parent._tagChildrenForRelink() 

2524 self.scan() 

2525 vdi._relinkSkip() #TODO: We could check if the parent is already the right one before doing the relink, or we could do the relink a second time, it doesn't seem to cause issues 

2526 finally: 

2527 self.unlock() 

2528 # Reload the children to leave things consistent 

2529 vdi.parent._reloadChildren(vdi) 

2530 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2531 

2532 self.deleteVDI(vdi) 

2533 

2534 class CoalesceTracker: 

2535 GRACE_ITERATIONS = 2 

2536 MAX_ITERATIONS_NO_PROGRESS = 3 

2537 MAX_ITERATIONS = 10 

2538 MAX_INCREASE_FROM_MINIMUM = 1.2 

2539 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2540 " --> Final size {finSize}" 

2541 

2542 def __init__(self, sr): 

2543 self.itsNoProgress = 0 

2544 self.its = 0 

2545 self.minSize = float("inf") 

2546 self.history = [] 

2547 self.reason = "" 

2548 self.startSize = None 

2549 self.finishSize = None 

2550 self.sr = sr 

2551 self.grace_remaining = self.GRACE_ITERATIONS 

2552 

2553 def abortCoalesce(self, prevSize, curSize): 

2554 self.its += 1 

2555 self.history.append(self.HISTORY_STRING.format(its=self.its, 

2556 initSize=prevSize, 

2557 finSize=curSize)) 

2558 

2559 self.finishSize = curSize 

2560 

2561 if self.startSize is None: 

2562 self.startSize = prevSize 

2563 

2564 if curSize < self.minSize: 

2565 self.minSize = curSize 

2566 

2567 if prevSize < self.minSize: 

2568 self.minSize = prevSize 

2569 

2570 if self.its == 1: 

2571 # Skip evaluating conditions on first iteration 

2572 return False 

2573 

2574 if prevSize < curSize: 

2575 self.itsNoProgress += 1 

2576 Util.log("No progress, attempt:" 

2577 " {attempt}".format(attempt=self.itsNoProgress)) 

2578 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2579 else: 

2580 # We made progress 

2581 return False 

2582 

2583 if self.its > self.MAX_ITERATIONS: 

2584 max = self.MAX_ITERATIONS 

2585 self.reason = \ 

2586 "Max iterations ({max}) exceeded".format(max=max) 

2587 return True 

2588 

2589 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS: 

2590 max = self.MAX_ITERATIONS_NO_PROGRESS 

2591 self.reason = \ 

2592 "No progress made for {max} iterations".format(max=max) 

2593 return True 

2594 

2595 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2596 if curSize > maxSizeFromMin: 

2597 self.grace_remaining -= 1 

2598 if self.grace_remaining == 0: 

2599 self.reason = "Unexpected bump in size," \ 

2600 " compared to minimum achieved" 

2601 

2602 return True 

2603 

2604 return False 

2605 

2606 def printSizes(self): 

2607 Util.log("Starting size was {size}" 

2608 .format(size=self.startSize)) 

2609 Util.log("Final size was {size}" 

2610 .format(size=self.finishSize)) 

2611 Util.log("Minimum size achieved was {size}" 

2612 .format(size=self.minSize)) 

2613 

2614 def printReasoning(self): 

2615 Util.log("Aborted coalesce") 

2616 for hist in self.history: 

2617 Util.log(hist) 

2618 Util.log(self.reason) 

2619 self.printSizes() 

2620 

2621 def printSummary(self): 

2622 if self.its == 0: 

2623 return 

2624 

2625 if self.reason: 2625 ↛ 2626line 2625 didn't jump to line 2626, because the condition on line 2625 was never true

2626 Util.log("Aborted coalesce") 

2627 Util.log(self.reason) 

2628 else: 

2629 Util.log("Coalesce summary") 

2630 

2631 Util.log(f"Performed {self.its} iterations") 

2632 self.printSizes() 

2633 

2634 

2635 def _coalesceLeaf(self, vdi): 

2636 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2637 complete due to external changes, namely vdi_delete and vdi_snapshot 

2638 that alter leaf-coalescibility of vdi""" 

2639 tracker = self.CoalesceTracker(self) 

2640 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2641 prevSizePhys = vdi.getSizePhys() 

2642 if not self._snapshotCoalesce(vdi): 2642 ↛ 2643line 2642 didn't jump to line 2643, because the condition on line 2642 was never true

2643 return False 

2644 if tracker.abortCoalesce(prevSizePhys, vdi.getSizePhys()): 

2645 tracker.printReasoning() 

2646 raise util.SMException("VDI {uuid} could not be coalesced" 

2647 .format(uuid=vdi.uuid)) 

2648 tracker.printSummary() 

2649 return self._liveLeafCoalesce(vdi) 

2650 

2651 def calcStorageSpeed(self, startTime, endTime, coalescedSize): 

2652 speed = None 

2653 total_time = endTime - startTime 

2654 if total_time > 0: 

2655 speed = float(coalescedSize) / float(total_time) 

2656 return speed 

2657 

2658 def writeSpeedToFile(self, speed): 

2659 content = [] 

2660 speedFile = None 

2661 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2662 self.lock() 

2663 try: 

2664 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2665 lines = "" 

2666 if not os.path.isfile(path): 

2667 lines = str(speed) + "\n" 

2668 else: 

2669 speedFile = open(path, "r+") 

2670 content = speedFile.readlines() 

2671 content.append(str(speed) + "\n") 

2672 if len(content) > N_RUNNING_AVERAGE: 

2673 del content[0] 

2674 lines = "".join(content) 

2675 

2676 util.atomicFileWrite(path, VAR_RUN, lines) 

2677 finally: 

2678 if speedFile is not None: 

2679 speedFile.close() 

2680 Util.log("Closing file: {myfile}".format(myfile=path)) 

2681 self.unlock() 

2682 

2683 def recordStorageSpeed(self, startTime, endTime, coalescedSize): 

2684 speed = self.calcStorageSpeed(startTime, endTime, coalescedSize) 

2685 if speed is None: 

2686 return 

2687 

2688 self.writeSpeedToFile(speed) 

2689 

2690 def getStorageSpeed(self): 

2691 speedFile = None 

2692 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2693 self.lock() 

2694 try: 

2695 speed = None 

2696 if os.path.isfile(path): 

2697 speedFile = open(path) 

2698 content = speedFile.readlines() 

2699 try: 

2700 content = [float(i) for i in content] 

2701 except ValueError: 

2702 Util.log("Something bad in the speed log:{log}". 

2703 format(log=speedFile.readlines())) 

2704 return speed 

2705 

2706 if len(content): 

2707 speed = sum(content) / float(len(content)) 

2708 if speed <= 0: 2708 ↛ 2710line 2708 didn't jump to line 2710, because the condition on line 2708 was never true

2709 # Defensive, should be impossible. 

2710 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2711 format(speed=speed, uuid=self.uuid)) 

2712 speed = None 

2713 else: 

2714 Util.log("Speed file empty for SR: {uuid}". 

2715 format(uuid=self.uuid)) 

2716 else: 

2717 Util.log("Speed log missing for SR: {uuid}". 

2718 format(uuid=self.uuid)) 

2719 return speed 

2720 finally: 

2721 if not (speedFile is None): 

2722 speedFile.close() 

2723 self.unlock() 

2724 

2725 def _snapshotCoalesce(self, vdi): 

2726 # Note that because we are not holding any locks here, concurrent SM 

2727 # operations may change this tree under our feet. In particular, vdi 

2728 # can be deleted, or it can be snapshotted. 

2729 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2730 Util.log("Single-snapshotting %s" % vdi) 

2731 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2732 try: 

2733 ret = self.xapi.singleSnapshotVDI(vdi) 

2734 Util.log("Single-snapshot returned: %s" % ret) 

2735 except XenAPI.Failure as e: 

2736 if util.isInvalidVDI(e): 

2737 Util.log("The VDI appears to have been concurrently deleted") 

2738 return False 

2739 raise 

2740 self.scanLocked() 

2741 tempSnap = vdi.parent 

2742 if not tempSnap.isCoalesceable(): 

2743 Util.log("The VDI appears to have been concurrently snapshotted") 

2744 return False 

2745 Util.log("Coalescing parent %s" % tempSnap) 

2746 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2747 sizePhys = vdi.getSizePhys() 

2748 self._coalesce(tempSnap) 

2749 if not vdi.isLeafCoalesceable(): 

2750 Util.log("The VDI tree appears to have been altered since") 

2751 return False 

2752 return True 

2753 

2754 def _liveLeafCoalesce(self, vdi: VDI) -> bool: 

2755 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2756 self.lock() 

2757 try: 

2758 self.scan() 

2759 if not self.getVDI(vdi.uuid): 

2760 Util.log("The VDI appears to have been deleted meanwhile") 

2761 return False 

2762 if not vdi.isLeafCoalesceable(): 

2763 Util.log("The VDI is no longer leaf-coalesceable") 

2764 return False 

2765 

2766 uuid = vdi.uuid 

2767 vdi.pause(failfast=True) 

2768 try: 

2769 try: 

2770 # "vdi" object will no longer be valid after this call 

2771 self._create_running_file(vdi) 

2772 self._doCoalesceLeaf(vdi) 

2773 except: 

2774 Util.logException("_doCoalesceLeaf") 

2775 self._handleInterruptedCoalesceLeaf() 

2776 raise 

2777 finally: 

2778 vdi = self.getVDI(uuid) 

2779 if vdi: 

2780 vdi.ensureUnpaused() 

2781 self._delete_running_file(vdi) 

2782 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2783 if vdiOld: 

2784 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2785 self.deleteVDI(vdiOld) 

2786 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2787 finally: 

2788 self.cleanup() 

2789 self.unlock() 

2790 self.logFilter.logState() 

2791 return True 

2792 

2793 def _doCoalesceLeaf(self, vdi: VDI): 

2794 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2795 offline/atomic context""" 

2796 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2797 self._prepareCoalesceLeaf(vdi) 

2798 vdi.parent._setHidden(False) 

2799 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2800 vdi.validate(True) 

2801 vdi.parent.validate(True) 

2802 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2803 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2804 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 

2805 Util.log("Leaf-coalesce forced, will not use timeout") 

2806 timeout = 0 

2807 vdi._coalesceCowImage(timeout) 

2808 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2809 vdi.parent.validate(True) 

2810 #vdi._verifyContents(timeout / 2) 

2811 

2812 # rename 

2813 vdiUuid = vdi.uuid 

2814 oldName = vdi.fileName 

2815 origParentUuid = vdi.parent.uuid 

2816 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2817 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2818 vdi.parent.rename(vdiUuid) 

2819 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2820 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2821 

2822 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2823 # garbage 

2824 

2825 # update the VDI record 

2826 vdi.parent.delConfig(VDI.DB_VDI_PARENT) 

2827 if vdi.parent.vdi_type == VdiType.RAW: 

2828 vdi.parent.setConfig(VDI.DB_VDI_TYPE, VdiType.RAW) 

2829 vdi.parent.delConfig(VDI.DB_VDI_BLOCKS) 

2830 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2831 

2832 self._updateNode(vdi) 

2833 

2834 # delete the obsolete leaf & inflate the parent (in that order, to 

2835 # minimize free space requirements) 

2836 parent = vdi.parent 

2837 vdi._setHidden(True) 

2838 vdi.parent.children = [] 

2839 vdi.parent = None 

2840 

2841 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2842 freeSpace = self.getFreeSpace() 

2843 if freeSpace < extraSpace: 

2844 # don't delete unless we need the space: deletion is time-consuming 

2845 # because it requires contacting the slaves, and we're paused here 

2846 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2847 self.deleteVDI(vdi) 

2848 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2849 

2850 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2851 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2852 

2853 self.forgetVDI(origParentUuid) 

2854 self._finishCoalesceLeaf(parent) 

2855 self._updateSlavesOnResize(parent) 

2856 

2857 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

2858 assert(VdiType.isCowImage(parent.vdi_type)) 

2859 extra = child.getSizePhys() - parent.getSizePhys() 

2860 if extra < 0: 

2861 extra = 0 

2862 return extra 

2863 

2864 def _prepareCoalesceLeaf(self, vdi) -> None: 

2865 pass 

2866 

2867 def _updateNode(self, vdi) -> None: 

2868 pass 

2869 

2870 def _finishCoalesceLeaf(self, parent) -> None: 

2871 pass 

2872 

2873 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

2874 pass 

2875 

2876 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: 

2877 pass 

2878 

2879 def _updateSlavesOnResize(self, vdi) -> None: 

2880 pass 

2881 

2882 def _removeStaleVDIs(self, uuidsPresent) -> None: 

2883 for uuid in list(self.vdis.keys()): 

2884 if not uuid in uuidsPresent: 

2885 Util.log("VDI %s disappeared since last scan" % \ 

2886 self.vdis[uuid]) 

2887 del self.vdis[uuid] 

2888 

2889 def _handleInterruptedCoalesceLeaf(self) -> None: 

2890 """An interrupted leaf-coalesce operation may leave the COW tree in an 

2891 inconsistent state. If the old-leaf VDI is still present, we revert the 

2892 operation (in case the original error is persistent); otherwise we must 

2893 finish the operation""" 

2894 pass 

2895 

2896 def _buildTree(self, force): 

2897 self.vdiTrees = [] 

2898 for vdi in self.vdis.values(): 

2899 if vdi.parentUuid: 

2900 parent = self.getVDI(vdi.parentUuid) 

2901 if not parent: 

2902 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2903 self.vdiTrees.append(vdi) 

2904 continue 

2905 if force: 

2906 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2907 (vdi.parentUuid, vdi.uuid)) 

2908 self.vdiTrees.append(vdi) 

2909 continue 

2910 else: 

2911 raise util.SMException("Parent VDI %s of %s not " \ 

2912 "found" % (vdi.parentUuid, vdi.uuid)) 

2913 vdi.parent = parent 

2914 parent.children.append(vdi) 

2915 else: 

2916 self.vdiTrees.append(vdi) 

2917 

2918 

2919class FileSR(SR): 

2920 TYPE = SR.TYPE_FILE 

2921 CACHE_FILE_EXT = ".vhdcache" 

2922 # cache cleanup actions 

2923 CACHE_ACTION_KEEP = 0 

2924 CACHE_ACTION_REMOVE = 1 

2925 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2926 

2927 def __init__(self, uuid, xapi, createLock, force): 

2928 SR.__init__(self, uuid, xapi, createLock, force) 

2929 self.path = "/var/run/sr-mount/%s" % self.uuid 

2930 self.journaler = fjournaler.Journaler(self.path) 

2931 

2932 @override 

2933 def scan(self, force=False) -> None: 

2934 if not util.pathexists(self.path): 

2935 raise util.SMException("directory %s not found!" % self.uuid) 

2936 

2937 uuidsPresent: List[str] = [] 

2938 

2939 for vdi_type in VDI_COW_TYPES: 

2940 scan_result = self._scan(vdi_type, force) 

2941 for uuid, image_info in scan_result.items(): 

2942 vdi = self.getVDI(uuid) 

2943 if not vdi: 

2944 self.logFilter.logNewVDI(uuid) 

2945 vdi = FileVDI(self, uuid, vdi_type) 

2946 self.vdis[uuid] = vdi 

2947 vdi.load(image_info) 

2948 uuidsPresent.extend(scan_result.keys()) 

2949 

2950 rawList = [x for x in os.listdir(self.path) if x.endswith(VdiTypeExtension.RAW)] 

2951 for rawName in rawList: 

2952 uuid = FileVDI.extractUuid(rawName) 

2953 uuidsPresent.append(uuid) 

2954 vdi = self.getVDI(uuid) 

2955 if not vdi: 

2956 self.logFilter.logNewVDI(uuid) 

2957 vdi = FileVDI(self, uuid, VdiType.RAW) 

2958 self.vdis[uuid] = vdi 

2959 self._removeStaleVDIs(uuidsPresent) 

2960 self._buildTree(force) 

2961 self.logFilter.logState() 

2962 self._handleInterruptedCoalesceLeaf() 

2963 

2964 @override 

2965 def getFreeSpace(self) -> int: 

2966 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2967 

2968 @override 

2969 def deleteVDIs(self, vdiList) -> None: 

2970 rootDeleted = False 

2971 for vdi in vdiList: 

2972 if not vdi.parent: 

2973 rootDeleted = True 

2974 break 

2975 SR.deleteVDIs(self, vdiList) 

2976 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

2977 self.xapi.markCacheSRsDirty() 

2978 

2979 @override 

2980 def cleanupCache(self, maxAge=-1) -> int: 

2981 """Clean up IntelliCache cache files. Caches for leaf nodes are 

2982 removed when the leaf node no longer exists or its allow-caching 

2983 attribute is not set. Caches for parent nodes are removed when the 

2984 parent node no longer exists or it hasn't been used in more than 

2985 <maxAge> hours. 

2986 Return number of caches removed. 

2987 """ 

2988 numRemoved = 0 

2989 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

2990 Util.log("Found %d cache files" % len(cacheFiles)) 

2991 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

2992 for cacheFile in cacheFiles: 

2993 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

2994 action = self.CACHE_ACTION_KEEP 

2995 rec = self.xapi.getRecordVDI(uuid) 

2996 if not rec: 

2997 Util.log("Cache %s: VDI doesn't exist" % uuid) 

2998 action = self.CACHE_ACTION_REMOVE 

2999 elif rec["managed"] and not rec["allow_caching"]: 

3000 Util.log("Cache %s: caching disabled" % uuid) 

3001 action = self.CACHE_ACTION_REMOVE 

3002 elif not rec["managed"] and maxAge >= 0: 

3003 lastAccess = datetime.datetime.fromtimestamp( \ 

3004 os.path.getatime(os.path.join(self.path, cacheFile))) 

3005 if lastAccess < cutoff: 

3006 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

3007 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

3008 

3009 if action == self.CACHE_ACTION_KEEP: 

3010 Util.log("Keeping cache %s" % uuid) 

3011 continue 

3012 

3013 lockId = uuid 

3014 parentUuid = None 

3015 if rec and rec["managed"]: 

3016 parentUuid = rec["sm_config"].get("vhd-parent") 

3017 if parentUuid: 

3018 lockId = parentUuid 

3019 

3020 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

3021 cacheLock.acquire() 

3022 try: 

3023 if self._cleanupCache(uuid, action): 

3024 numRemoved += 1 

3025 finally: 

3026 cacheLock.release() 

3027 return numRemoved 

3028 

3029 def _cleanupCache(self, uuid, action): 

3030 assert(action != self.CACHE_ACTION_KEEP) 

3031 rec = self.xapi.getRecordVDI(uuid) 

3032 if rec and rec["allow_caching"]: 

3033 Util.log("Cache %s appears to have become valid" % uuid) 

3034 return False 

3035 

3036 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

3037 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

3038 if tapdisk: 

3039 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

3040 Util.log("Cache %s still in use" % uuid) 

3041 return False 

3042 Util.log("Shutting down tapdisk for %s" % fullPath) 

3043 tapdisk.shutdown() 

3044 

3045 Util.log("Deleting file %s" % fullPath) 

3046 os.unlink(fullPath) 

3047 return True 

3048 

3049 def _isCacheFileName(self, name): 

3050 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

3051 name.endswith(self.CACHE_FILE_EXT) 

3052 

3053 def _scan(self, vdi_type, force): 

3054 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3055 error = False 

3056 pattern = os.path.join(self.path, "*%s" % VDI_TYPE_TO_EXTENSION[vdi_type]) 

3057 scan_result = getCowUtil(vdi_type).getAllInfoFromVG(pattern, FileVDI.extractUuid) 

3058 for uuid, vdiInfo in scan_result.items(): 

3059 if vdiInfo.error: 

3060 error = True 

3061 break 

3062 if not error: 

3063 return scan_result 

3064 Util.log("Scan error on attempt %d" % i) 

3065 if force: 

3066 return scan_result 

3067 raise util.SMException("Scan error") 

3068 

3069 @override 

3070 def deleteVDI(self, vdi) -> None: 

3071 self._checkSlaves(vdi) 

3072 SR.deleteVDI(self, vdi) 

3073 

3074 def _checkSlaves(self, vdi): 

3075 onlineHosts = self.xapi.getOnlineHosts() 

3076 abortFlag = IPCFlag(self.uuid) 

3077 for pbdRecord in self.xapi.getAttachedPBDs(): 

3078 hostRef = pbdRecord["host"] 

3079 if hostRef == self.xapi._hostRef: 

3080 continue 

3081 if abortFlag.test(FLAG_TYPE_ABORT): 

3082 raise AbortException("Aborting due to signal") 

3083 try: 

3084 self._checkSlave(hostRef, vdi) 

3085 except util.CommandException: 

3086 if hostRef in onlineHosts: 

3087 raise 

3088 

3089 def _checkSlave(self, hostRef, vdi): 

3090 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

3091 Util.log("Checking with slave: %s" % repr(call)) 

3092 _host = self.xapi.session.xenapi.host 

3093 text = _host.call_plugin( * call) 

3094 

3095 @override 

3096 def _handleInterruptedCoalesceLeaf(self) -> None: 

3097 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3098 for uuid, parentUuid in entries.items(): 

3099 fileList = os.listdir(self.path) 

3100 childName = uuid + VdiTypeExtension.VHD 

3101 tmpChildName = self.TMP_RENAME_PREFIX + uuid + VdiTypeExtension.VHD 

3102 parentName1 = parentUuid + VdiTypeExtension.VHD 

3103 parentName2 = parentUuid + VdiTypeExtension.RAW 

3104 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

3105 if parentPresent or tmpChildName in fileList: 

3106 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3107 else: 

3108 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3109 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3110 vdi = self.getVDI(uuid) 

3111 if vdi: 

3112 vdi.ensureUnpaused() 

3113 

3114 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3115 Util.log("*** UNDO LEAF-COALESCE") 

3116 parent = self.getVDI(parentUuid) 

3117 if not parent: 

3118 parent = self.getVDI(childUuid) 

3119 if not parent: 

3120 raise util.SMException("Neither %s nor %s found" % \ 

3121 (parentUuid, childUuid)) 

3122 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3123 parent.rename(parentUuid) 

3124 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3125 

3126 child = self.getVDI(childUuid) 

3127 if not child: 

3128 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3129 if not child: 

3130 raise util.SMException("Neither %s nor %s found" % \ 

3131 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3132 Util.log("Renaming child back to %s" % childUuid) 

3133 child.rename(childUuid) 

3134 Util.log("Updating the VDI record") 

3135 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3136 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3137 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3138 

3139 if child.hidden: 

3140 child._setHidden(False) 

3141 if not parent.hidden: 

3142 parent._setHidden(True) 

3143 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3144 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3145 Util.log("*** leaf-coalesce undo successful") 

3146 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3147 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3148 

3149 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3150 Util.log("*** FINISH LEAF-COALESCE") 

3151 vdi = self.getVDI(childUuid) 

3152 if not vdi: 

3153 raise util.SMException("VDI %s not found" % childUuid) 

3154 try: 

3155 self.forgetVDI(parentUuid) 

3156 except XenAPI.Failure: 

3157 pass 

3158 self._updateSlavesOnResize(vdi) 

3159 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3160 Util.log("*** finished leaf-coalesce successfully") 

3161 

3162 

3163class LVMSR(SR): 

3164 TYPE = SR.TYPE_LVHD 

3165 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

3166 

3167 def __init__(self, uuid, xapi, createLock, force): 

3168 SR.__init__(self, uuid, xapi, createLock, force) 

3169 self.vgName = "%s%s" % (VG_PREFIX, self.uuid) 

3170 self.path = os.path.join(VG_LOCATION, self.vgName) 

3171 

3172 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) 

3173 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) 

3174 lvm_conf = other_conf.get('lvm-conf') if other_conf else None 

3175 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) 

3176 

3177 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

3178 self.journaler = journaler.Journaler(self.lvmCache) 

3179 

3180 @override 

3181 def deleteVDI(self, vdi) -> None: 

3182 if self.lvActivator.get(vdi.uuid, False): 

3183 self.lvActivator.deactivate(vdi.uuid, False) 

3184 self._checkSlaves(vdi) 

3185 SR.deleteVDI(self, vdi) 

3186 

3187 @override 

3188 def forgetVDI(self, vdiUuid) -> None: 

3189 SR.forgetVDI(self, vdiUuid) 

3190 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

3191 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

3192 

3193 @override 

3194 def getFreeSpace(self) -> int: 

3195 stats = lvutil._getVGstats(self.vgName) 

3196 return stats['physical_size'] - stats['physical_utilisation'] 

3197 

3198 @override 

3199 def cleanup(self): 

3200 if not self.lvActivator.deactivateAll(): 

3201 Util.log("ERROR deactivating LVs while cleaning up") 

3202 

3203 @override 

3204 def needUpdateBlockInfo(self) -> bool: 

3205 for vdi in self.vdis.values(): 

3206 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3207 continue 

3208 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3209 return True 

3210 return False 

3211 

3212 @override 

3213 def updateBlockInfo(self) -> None: 

3214 numUpdated = 0 

3215 for vdi in self.vdis.values(): 

3216 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3217 continue 

3218 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3219 vdi.updateBlockInfo() 

3220 numUpdated += 1 

3221 if numUpdated: 

3222 # deactivate the LVs back sooner rather than later. If we don't 

3223 # now, by the time this thread gets to deactivations, another one 

3224 # might have leaf-coalesced a node and deleted it, making the child 

3225 # inherit the refcount value and preventing the correct decrement 

3226 self.cleanup() 

3227 

3228 @override 

3229 def scan(self, force=False) -> None: 

3230 vdis = self._scan(force) 

3231 for uuid, vdiInfo in vdis.items(): 

3232 vdi = self.getVDI(uuid) 

3233 if not vdi: 

3234 self.logFilter.logNewVDI(uuid) 

3235 vdi = LVMVDI(self, uuid, vdiInfo.vdiType) 

3236 self.vdis[uuid] = vdi 

3237 vdi.load(vdiInfo) 

3238 self._removeStaleVDIs(vdis.keys()) 

3239 self._buildTree(force) 

3240 self.logFilter.logState() 

3241 self._handleInterruptedCoalesceLeaf() 

3242 

3243 def _scan(self, force): 

3244 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3245 error = False 

3246 self.lvmCache.refresh() 

3247 vdis = LvmCowUtil.getVDIInfo(self.lvmCache) 

3248 for uuid, vdiInfo in vdis.items(): 

3249 if vdiInfo.scanError: 

3250 error = True 

3251 break 

3252 if not error: 

3253 return vdis 

3254 Util.log("Scan error, retrying (%d)" % i) 

3255 if force: 

3256 return vdis 

3257 raise util.SMException("Scan error") 

3258 

3259 @override 

3260 def _removeStaleVDIs(self, uuidsPresent) -> None: 

3261 for uuid in list(self.vdis.keys()): 

3262 if not uuid in uuidsPresent: 

3263 Util.log("VDI %s disappeared since last scan" % \ 

3264 self.vdis[uuid]) 

3265 del self.vdis[uuid] 

3266 if self.lvActivator.get(uuid, False): 

3267 self.lvActivator.remove(uuid, False) 

3268 

3269 @override 

3270 def _liveLeafCoalesce(self, vdi) -> bool: 

3271 """If the parent is raw and the child was resized (virt. size), then 

3272 we'll need to resize the parent, which can take a while due to zeroing 

3273 out of the extended portion of the LV. Do it before pausing the child 

3274 to avoid a protracted downtime""" 

3275 if not VdiType.isCowImage(vdi.parent.vdi_type) and vdi.sizeVirt > vdi.parent.sizeVirt: 

3276 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3277 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

3278 

3279 return SR._liveLeafCoalesce(self, vdi) 

3280 

3281 @override 

3282 def _prepareCoalesceLeaf(self, vdi) -> None: 

3283 vdi._activateChain() 

3284 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3285 vdi.deflate() 

3286 vdi.inflateParentForCoalesce() 

3287 

3288 @override 

3289 def _updateNode(self, vdi) -> None: 

3290 # fix the refcounts: the remaining node should inherit the binary 

3291 # refcount from the leaf (because if it was online, it should remain 

3292 # refcounted as such), but the normal refcount from the parent (because 

3293 # this node is really the parent node) - minus 1 if it is online (since 

3294 # non-leaf nodes increment their normal counts when they are online and 

3295 # we are now a leaf, storing that 1 in the binary refcount). 

3296 ns = NS_PREFIX_LVM + self.uuid 

3297 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

3298 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

3299 pCnt = pCnt - cBcnt 

3300 assert(pCnt >= 0) 

3301 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

3302 

3303 @override 

3304 def _finishCoalesceLeaf(self, parent) -> None: 

3305 if not parent.isSnapshot() or parent.isAttachedRW(): 

3306 parent.inflateFully() 

3307 else: 

3308 parent.deflate() 

3309 

3310 @override 

3311 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3312 return parent.lvmcowutil.calcVolumeSize(parent.sizeVirt) - parent.sizeLV 

3313 

3314 @override 

3315 def _handleInterruptedCoalesceLeaf(self) -> None: 

3316 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3317 for uuid, parentUuid in entries.items(): 

3318 undo = False 

3319 for prefix in LV_PREFIX.values(): 

3320 parentLV = prefix + parentUuid 

3321 undo = self.lvmCache.checkLV(parentLV) 

3322 if undo: 

3323 break 

3324 

3325 if not undo: 

3326 for prefix in LV_PREFIX.values(): 

3327 tmpChildLV = prefix + uuid 

3328 undo = self.lvmCache.checkLV(tmpChildLV) 

3329 if undo: 

3330 break 

3331 

3332 if undo: 

3333 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3334 else: 

3335 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3336 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3337 vdi = self.getVDI(uuid) 

3338 if vdi: 

3339 vdi.ensureUnpaused() 

3340 

3341 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3342 Util.log("*** UNDO LEAF-COALESCE") 

3343 parent = self.getVDI(parentUuid) 

3344 if not parent: 

3345 parent = self.getVDI(childUuid) 

3346 if not parent: 

3347 raise util.SMException("Neither %s nor %s found" % \ 

3348 (parentUuid, childUuid)) 

3349 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3350 parent.rename(parentUuid) 

3351 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3352 

3353 child = self.getVDI(childUuid) 

3354 if not child: 

3355 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3356 if not child: 

3357 raise util.SMException("Neither %s nor %s found" % \ 

3358 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3359 Util.log("Renaming child back to %s" % childUuid) 

3360 child.rename(childUuid) 

3361 Util.log("Updating the VDI record") 

3362 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3363 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3364 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3365 

3366 # refcount (best effort - assume that it had succeeded if the 

3367 # second rename succeeded; if not, this adjustment will be wrong, 

3368 # leading to a non-deactivation of the LV) 

3369 ns = NS_PREFIX_LVM + self.uuid 

3370 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3371 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3372 pCnt = pCnt + cBcnt 

3373 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3374 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3375 

3376 parent.deflate() 

3377 child.inflateFully() 

3378 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3379 if child.hidden: 

3380 child._setHidden(False) 

3381 if not parent.hidden: 

3382 parent._setHidden(True) 

3383 if not parent.lvReadonly: 

3384 self.lvmCache.setReadonly(parent.fileName, True) 

3385 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3386 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3387 Util.log("*** leaf-coalesce undo successful") 

3388 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3389 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3390 

3391 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3392 Util.log("*** FINISH LEAF-COALESCE") 

3393 vdi = self.getVDI(childUuid) 

3394 if not vdi: 

3395 raise util.SMException("VDI %s not found" % childUuid) 

3396 vdi.inflateFully() 

3397 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3398 try: 

3399 self.forgetVDI(parentUuid) 

3400 except XenAPI.Failure: 

3401 pass 

3402 self._updateSlavesOnResize(vdi) 

3403 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3404 Util.log("*** finished leaf-coalesce successfully") 

3405 

3406 def _checkSlaves(self, vdi): 

3407 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3408 try to check all slaves, including those that the Agent believes are 

3409 offline, but ignore failures for offline hosts. This is to avoid cases 

3410 where the Agent thinks a host is offline but the host is up.""" 

3411 args = {"vgName": self.vgName, 

3412 "action1": "deactivateNoRefcount", 

3413 "lvName1": vdi.fileName, 

3414 "action2": "cleanupLockAndRefcount", 

3415 "uuid2": vdi.uuid, 

3416 "ns2": NS_PREFIX_LVM + self.uuid} 

3417 onlineHosts = self.xapi.getOnlineHosts() 

3418 abortFlag = IPCFlag(self.uuid) 

3419 for pbdRecord in self.xapi.getAttachedPBDs(): 

3420 hostRef = pbdRecord["host"] 

3421 if hostRef == self.xapi._hostRef: 

3422 continue 

3423 if abortFlag.test(FLAG_TYPE_ABORT): 

3424 raise AbortException("Aborting due to signal") 

3425 Util.log("Checking with slave %s (path %s)" % ( 

3426 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3427 try: 

3428 self.xapi.ensureInactive(hostRef, args) 

3429 except XenAPI.Failure: 

3430 if hostRef in onlineHosts: 

3431 raise 

3432 

3433 @override 

3434 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

3435 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3436 if not slaves: 

3437 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3438 child) 

3439 return 

3440 

3441 tmpName = child.vdi_type + self.TMP_RENAME_PREFIX + child.uuid 

3442 args = {"vgName": self.vgName, 

3443 "action1": "deactivateNoRefcount", 

3444 "lvName1": tmpName, 

3445 "action2": "deactivateNoRefcount", 

3446 "lvName2": child.fileName, 

3447 "action3": "refresh", 

3448 "lvName3": child.fileName, 

3449 "action4": "refresh", 

3450 "lvName4": parent.fileName} 

3451 for slave in slaves: 

3452 Util.log("Updating %s, %s, %s on slave %s" % \ 

3453 (tmpName, child.fileName, parent.fileName, 

3454 self.xapi.getRecordHost(slave)['hostname'])) 

3455 text = self.xapi.session.xenapi.host.call_plugin( \ 

3456 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3457 Util.log("call-plugin returned: '%s'" % text) 

3458 

3459 @override 

3460 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: 

3461 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3462 if not slaves: 

3463 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3464 return 

3465 

3466 args = {"vgName": self.vgName, 

3467 "action1": "deactivateNoRefcount", 

3468 "lvName1": oldNameLV, 

3469 "action2": "refresh", 

3470 "lvName2": vdi.fileName, 

3471 "action3": "cleanupLockAndRefcount", 

3472 "uuid3": origParentUuid, 

3473 "ns3": NS_PREFIX_LVM + self.uuid} 

3474 for slave in slaves: 

3475 Util.log("Updating %s to %s on slave %s" % \ 

3476 (oldNameLV, vdi.fileName, 

3477 self.xapi.getRecordHost(slave)['hostname'])) 

3478 text = self.xapi.session.xenapi.host.call_plugin( \ 

3479 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3480 Util.log("call-plugin returned: '%s'" % text) 

3481 

3482 @override 

3483 def _updateSlavesOnResize(self, vdi) -> None: 

3484 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3485 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3486 if not slaves: 

3487 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3488 return 

3489 LvmCowUtil.refreshVolumeOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3490 vdi.fileName, vdi.uuid, slaves) 

3491 

3492 

3493class LinstorSR(SR): 

3494 TYPE = SR.TYPE_LINSTOR 

3495 

3496 def __init__(self, uuid, xapi, createLock, force): 

3497 if not LINSTOR_AVAILABLE: 

3498 raise util.SMException( 

3499 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3500 ) 

3501 

3502 SR.__init__(self, uuid, xapi, createLock, force) 

3503 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3504 

3505 class LinstorProxy: 

3506 def __init__(self, sr: LinstorSR) -> None: 

3507 self.sr = sr 

3508 

3509 def __getattr__(self, attr: str) -> Any: 

3510 assert self.sr, "Cannot use `LinstorProxy` without valid `LinstorVolumeManager` instance" 

3511 return getattr(self.sr._linstor, attr) 

3512 

3513 self._linstor_proxy = LinstorProxy(self) 

3514 self._reloadLinstor(journaler_only=True) 

3515 

3516 @override 

3517 def deleteVDI(self, vdi) -> None: 

3518 self._checkSlaves(vdi) 

3519 SR.deleteVDI(self, vdi) 

3520 

3521 @override 

3522 def getFreeSpace(self) -> int: 

3523 return self._linstor.max_volume_size_allowed 

3524 

3525 @override 

3526 def scan(self, force=False) -> None: 

3527 all_vdi_info = self._scan(force) 

3528 for uuid, vdiInfo in all_vdi_info.items(): 

3529 # When vdiInfo is None, the VDI is RAW. 

3530 vdi = self.getVDI(uuid) 

3531 if not vdi: 

3532 self.logFilter.logNewVDI(uuid) 

3533 vdi = LinstorVDI(self, uuid, vdiInfo.vdiType) 

3534 self.vdis[uuid] = vdi 

3535 if vdiInfo: 

3536 vdi.load(vdiInfo) 

3537 self._removeStaleVDIs(all_vdi_info.keys()) 

3538 self._buildTree(force) 

3539 self.logFilter.logState() 

3540 self._handleInterruptedCoalesceLeaf() 

3541 

3542 @override 

3543 def pauseVDIs(self, vdiList) -> None: 

3544 self._linstor.ensure_volume_list_is_not_locked( 

3545 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3546 ) 

3547 return super(LinstorSR, self).pauseVDIs(vdiList) 

3548 

3549 def _reloadLinstor(self, journaler_only=False): 

3550 session = self.xapi.session 

3551 host_ref = util.get_this_host_ref(session) 

3552 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3553 

3554 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3555 if pbd is None: 

3556 raise util.SMException('Failed to find PBD') 

3557 

3558 dconf = session.xenapi.PBD.get_device_config(pbd) 

3559 group_name = dconf['group-name'] 

3560 

3561 controller_uri = get_controller_uri() 

3562 self.journaler = LinstorJournaler( 

3563 controller_uri, group_name, logger=util.SMlog 

3564 ) 

3565 

3566 if journaler_only: 

3567 return 

3568 

3569 self._linstor = LinstorVolumeManager( 

3570 controller_uri, 

3571 group_name, 

3572 repair=True, 

3573 logger=util.SMlog 

3574 ) 

3575 

3576 def _scan(self, force): 

3577 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3578 self._reloadLinstor() 

3579 error = False 

3580 try: 

3581 all_vdi_info = self._load_vdi_info() 

3582 for uuid, vdiInfo in all_vdi_info.items(): 

3583 if vdiInfo and vdiInfo.error: 

3584 error = True 

3585 break 

3586 if not error: 

3587 return all_vdi_info 

3588 Util.log('Scan error, retrying ({})'.format(i)) 

3589 except Exception as e: 

3590 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3591 Util.log(traceback.format_exc()) 

3592 

3593 if force: 

3594 return all_vdi_info 

3595 raise util.SMException('Scan error') 

3596 

3597 def _load_vdi_info(self): 

3598 all_vdi_info = {} 

3599 

3600 # TODO: Ensure metadata contains the right info. 

3601 

3602 all_volume_info = self._linstor.get_volumes_with_info() 

3603 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3604 for vdi_uuid, volume_info in all_volume_info.items(): 

3605 try: 

3606 volume_metadata = volumes_metadata[vdi_uuid] 

3607 if not volume_info.name and not list(volume_metadata.items()): 

3608 continue # Ignore it, probably deleted. 

3609 

3610 if vdi_uuid.startswith('DELETED_'): 

3611 # Assume it's really a RAW volume of a failed snap without COW header/footer. 

3612 # We must remove this VDI now without adding it in the VDI list. 

3613 # Otherwise `Relinking` calls and other actions can be launched on it. 

3614 # We don't want that... 

3615 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3616 

3617 self.lock() 

3618 try: 

3619 self._linstor.destroy_volume(vdi_uuid) 

3620 try: 

3621 self.forgetVDI(vdi_uuid) 

3622 except: 

3623 pass 

3624 except Exception as e: 

3625 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3626 finally: 

3627 self.unlock() 

3628 continue 

3629 

3630 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3631 volume_name = self._linstor.get_volume_name(vdi_uuid) 

3632 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): 

3633 # Always RAW! 

3634 info = None 

3635 elif VdiType.isCowImage(vdi_type): 

3636 info = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type).get_info(vdi_uuid) 

3637 else: 

3638 # Ensure it's not a COW image... 

3639 linstorcowutil = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type) 

3640 try: 

3641 info = linstorcowutil.get_info(vdi_uuid) 

3642 except: 

3643 try: 

3644 linstorcowutil.force_repair( 

3645 self._linstor.get_device_path(vdi_uuid) 

3646 ) 

3647 info = linstorcowutil.get_info(vdi_uuid) 

3648 except: 

3649 info = None 

3650 

3651 except Exception as e: 

3652 Util.log( 

3653 ' [VDI {}: failed to load VDI info]: {}' 

3654 .format(vdi_uuid, e) 

3655 ) 

3656 info = CowImageInfo(vdi_uuid) 

3657 info.error = 1 

3658 

3659 all_vdi_info[vdi_uuid] = info 

3660 

3661 return all_vdi_info 

3662 

3663 @override 

3664 def _prepareCoalesceLeaf(self, vdi) -> None: 

3665 vdi._activateChain() 

3666 vdi.deflate() 

3667 vdi._inflateParentForCoalesce() 

3668 

3669 @override 

3670 def _finishCoalesceLeaf(self, parent) -> None: 

3671 if not parent.isSnapshot() or parent.isAttachedRW(): 

3672 parent.inflateFully() 

3673 else: 

3674 parent.deflate() 

3675 

3676 @override 

3677 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3678 return LinstorCowUtil( 

3679 self.xapi.session, self._linstor, parent.vdi_type 

3680 ).compute_volume_size(parent.sizeVirt) - parent.getDrbdSize() 

3681 

3682 def _hasValidDevicePath(self, uuid): 

3683 try: 

3684 self._linstor.get_device_path(uuid) 

3685 except Exception: 

3686 # TODO: Maybe log exception. 

3687 return False 

3688 return True 

3689 

3690 @override 

3691 def _liveLeafCoalesce(self, vdi) -> bool: 

3692 self.lock() 

3693 try: 

3694 self._linstor.ensure_volume_is_not_locked( 

3695 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3696 ) 

3697 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3698 finally: 

3699 self.unlock() 

3700 

3701 @override 

3702 def _handleInterruptedCoalesceLeaf(self) -> None: 

3703 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3704 for uuid, parentUuid in entries.items(): 

3705 if self._hasValidDevicePath(parentUuid) or \ 

3706 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3707 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3708 else: 

3709 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3710 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3711 vdi = self.getVDI(uuid) 

3712 if vdi: 

3713 vdi.ensureUnpaused() 

3714 

3715 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3716 Util.log('*** UNDO LEAF-COALESCE') 

3717 parent = self.getVDI(parentUuid) 

3718 if not parent: 

3719 parent = self.getVDI(childUuid) 

3720 if not parent: 

3721 raise util.SMException( 

3722 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3723 ) 

3724 Util.log( 

3725 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3726 ) 

3727 parent.rename(parentUuid) 

3728 

3729 child = self.getVDI(childUuid) 

3730 if not child: 

3731 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3732 if not child: 

3733 raise util.SMException( 

3734 'Neither {} nor {} found'.format( 

3735 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3736 ) 

3737 ) 

3738 Util.log('Renaming child back to {}'.format(childUuid)) 

3739 child.rename(childUuid) 

3740 Util.log('Updating the VDI record') 

3741 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3742 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3743 

3744 # TODO: Maybe deflate here. 

3745 

3746 if child.hidden: 

3747 child._setHidden(False) 

3748 if not parent.hidden: 

3749 parent._setHidden(True) 

3750 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3751 Util.log('*** leaf-coalesce undo successful') 

3752 

3753 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3754 Util.log('*** FINISH LEAF-COALESCE') 

3755 vdi = self.getVDI(childUuid) 

3756 if not vdi: 

3757 raise util.SMException('VDI {} not found'.format(childUuid)) 

3758 # TODO: Maybe inflate. 

3759 try: 

3760 self.forgetVDI(parentUuid) 

3761 except XenAPI.Failure: 

3762 pass 

3763 self._updateSlavesOnResize(vdi) 

3764 Util.log('*** finished leaf-coalesce successfully') 

3765 

3766 def _checkSlaves(self, vdi): 

3767 try: 

3768 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3769 for openers in all_openers.values(): 

3770 for opener in openers.values(): 

3771 if opener['process-name'] != 'tapdisk': 

3772 raise util.SMException( 

3773 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3774 ) 

3775 except LinstorVolumeManagerError as e: 

3776 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3777 raise 

3778 

3779 

3780################################################################################ 

3781# 

3782# Helpers 

3783# 

3784def daemonize(): 

3785 pid = os.fork() 

3786 if pid: 

3787 os.waitpid(pid, 0) 

3788 Util.log("New PID [%d]" % pid) 

3789 return False 

3790 os.chdir("/") 

3791 os.setsid() 

3792 pid = os.fork() 

3793 if pid: 

3794 Util.log("Will finish as PID [%d]" % pid) 

3795 os._exit(0) 

3796 for fd in [0, 1, 2]: 

3797 try: 

3798 os.close(fd) 

3799 except OSError: 

3800 pass 

3801 # we need to fill those special fd numbers or pread won't work 

3802 sys.stdin = open("/dev/null", 'r') 

3803 sys.stderr = open("/dev/null", 'w') 

3804 sys.stdout = open("/dev/null", 'w') 

3805 # As we're a new process we need to clear the lock objects 

3806 lock.Lock.clearAll() 

3807 return True 

3808 

3809 

3810def normalizeType(type): 

3811 if type in LVMSR.SUBTYPES: 

3812 type = SR.TYPE_LVHD 

3813 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3814 # temporary while LVHD is symlinked as LVM 

3815 type = SR.TYPE_LVHD 

3816 if type in [ 

3817 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3818 "moosefs", "xfs", "zfs", "largeblock" 

3819 ]: 

3820 type = SR.TYPE_FILE 

3821 if type in ["linstor"]: 

3822 type = SR.TYPE_LINSTOR 

3823 if type not in SR.TYPES: 

3824 raise util.SMException("Unsupported SR type: %s" % type) 

3825 return type 

3826 

3827GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3828 

3829 

3830def _gc_init_file(sr_uuid): 

3831 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3832 

3833 

3834def _create_init_file(sr_uuid): 

3835 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3836 with open(os.path.join(_gc_init_file(sr_uuid)), 'w+') as f: 

3837 f.write('1') 

3838 

3839 

3840def _gcLoopPause(sr, dryRun=False, immediate=False): 

3841 if immediate: 

3842 return 

3843 

3844 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3845 # point will just return. Otherwise, fall back on an abortable sleep. 

3846 

3847 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3848 

3849 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3849 ↛ exitline 3849 didn't jump to the function exit

3850 lambda *args: None) 

3851 elif os.path.exists(_gc_init_file(sr.uuid)): 

3852 def abortTest(): 

3853 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3854 

3855 # If time.sleep hangs we are in deep trouble, however for 

3856 # completeness we set the timeout of the abort thread to 

3857 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3858 Util.log("GC active, about to go quiet") 

3859 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3859 ↛ exitline 3859 didn't run the lambda on line 3859

3860 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3861 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3862 Util.log("GC active, quiet period ended") 

3863 

3864 

3865def _gcLoop(sr, dryRun=False, immediate=False): 

3866 if not lockGCActive.acquireNoblock(): 3866 ↛ 3867line 3866 didn't jump to line 3867, because the condition on line 3866 was never true

3867 Util.log("Another GC instance already active, exiting") 

3868 return 

3869 

3870 # Check we're still attached after acquiring locks 

3871 if not sr.xapi.isPluggedHere(): 

3872 Util.log("SR no longer attached, exiting") 

3873 return 

3874 

3875 # Clean up Intellicache files 

3876 sr.cleanupCache() 

3877 

3878 # Track how many we do 

3879 coalesced = 0 

3880 task_status = "success" 

3881 try: 

3882 # Check if any work needs to be done 

3883 if not sr.xapi.isPluggedHere(): 3883 ↛ 3884line 3883 didn't jump to line 3884, because the condition on line 3883 was never true

3884 Util.log("SR no longer attached, exiting") 

3885 return 

3886 sr.scanLocked() 

3887 if not sr.hasWork(): 

3888 Util.log("No work, exiting") 

3889 return 

3890 sr.xapi.create_task( 

3891 "Garbage Collection", 

3892 "Garbage collection for SR %s" % sr.uuid) 

3893 _gcLoopPause(sr, dryRun, immediate=immediate) 

3894 while True: 

3895 if SIGTERM: 

3896 Util.log("Term requested") 

3897 return 

3898 

3899 if not sr.xapi.isPluggedHere(): 3899 ↛ 3900line 3899 didn't jump to line 3900, because the condition on line 3899 was never true

3900 Util.log("SR no longer attached, exiting") 

3901 break 

3902 sr.scanLocked() 

3903 if not sr.hasWork(): 

3904 Util.log("No work, exiting") 

3905 break 

3906 

3907 if not lockGCRunning.acquireNoblock(): 3907 ↛ 3908line 3907 didn't jump to line 3908, because the condition on line 3907 was never true

3908 Util.log("Unable to acquire GC running lock.") 

3909 return 

3910 try: 

3911 if not sr.gcEnabled(): 3911 ↛ 3912line 3911 didn't jump to line 3912, because the condition on line 3911 was never true

3912 break 

3913 

3914 sr.xapi.update_task_progress("done", coalesced) 

3915 

3916 sr.cleanupCoalesceJournals() 

3917 # Create the init file here in case startup is waiting on it 

3918 _create_init_file(sr.uuid) 

3919 sr.scanLocked() 

3920 sr.updateBlockInfo() 

3921 

3922 howmany = len(sr.findGarbage()) 

3923 if howmany > 0: 

3924 Util.log("Found %d orphaned vdis" % howmany) 

3925 sr.lock() 

3926 try: 

3927 sr.garbageCollect(dryRun) 

3928 finally: 

3929 sr.unlock() 

3930 sr.xapi.srUpdate() 

3931 

3932 candidate = sr.findCoalesceable() 

3933 if candidate: 

3934 util.fistpoint.activate( 

3935 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3936 sr.coalesce(candidate, dryRun) 

3937 sr.xapi.srUpdate() 

3938 coalesced += 1 

3939 continue 

3940 

3941 candidate = sr.findLeafCoalesceable() 

3942 if candidate: 3942 ↛ 3949line 3942 didn't jump to line 3949, because the condition on line 3942 was never false

3943 sr.coalesceLeaf(candidate, dryRun) 

3944 sr.xapi.srUpdate() 

3945 coalesced += 1 

3946 continue 

3947 

3948 finally: 

3949 lockGCRunning.release() 3949 ↛ 3954line 3949 didn't jump to line 3954, because the break on line 3912 wasn't executed

3950 except: 

3951 task_status = "failure" 

3952 raise 

3953 finally: 

3954 sr.xapi.set_task_status(task_status) 

3955 Util.log("GC process exiting, no work left") 

3956 _create_init_file(sr.uuid) 

3957 lockGCActive.release() 

3958 

3959 

3960def _xapi_enabled(session, hostref): 

3961 host = session.xenapi.host.get_record(hostref) 

3962 return host['enabled'] 

3963 

3964 

3965def _ensure_xapi_initialised(session): 

3966 """ 

3967 Don't want to start GC until Xapi is fully initialised 

3968 """ 

3969 local_session = None 

3970 if session is None: 

3971 local_session = util.get_localAPI_session() 

3972 session = local_session 

3973 

3974 try: 

3975 hostref = session.xenapi.host.get_by_uuid(util.get_this_host()) 

3976 while not _xapi_enabled(session, hostref): 

3977 util.SMlog("Xapi not ready, GC waiting") 

3978 time.sleep(15) 

3979 finally: 

3980 if local_session is not None: 

3981 local_session.xenapi.session.logout() 

3982 

3983def _gc(session, srUuid, dryRun=False, immediate=False): 

3984 init(srUuid) 

3985 _ensure_xapi_initialised(session) 

3986 sr = SR.getInstance(srUuid, session) 

3987 if not sr.gcEnabled(False): 3987 ↛ 3988line 3987 didn't jump to line 3988, because the condition on line 3987 was never true

3988 return 

3989 

3990 try: 

3991 _gcLoop(sr, dryRun, immediate=immediate) 

3992 finally: 

3993 sr.check_no_space_candidates() 

3994 sr.cleanup() 

3995 sr.logFilter.logState() 

3996 del sr.xapi 

3997 

3998 

3999def _abort(srUuid, soft=False): 

4000 """Aborts an GC/coalesce. 

4001 

4002 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

4003 soft: If set to True and there is a pending abort signal, the function 

4004 doesn't do anything. If set to False, a new abort signal is issued. 

4005 

4006 returns: If soft is set to False, we return True holding lockGCActive. If 

4007 soft is set to False and an abort signal is pending, we return False 

4008 without holding lockGCActive. An exception is raised in case of error.""" 

4009 Util.log("=== SR %s: abort ===" % (srUuid)) 

4010 init(srUuid) 

4011 if not lockGCActive.acquireNoblock(): 

4012 gotLock = False 

4013 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

4014 abortFlag = IPCFlag(srUuid) 

4015 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

4016 return False 

4017 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

4018 gotLock = lockGCActive.acquireNoblock() 

4019 if gotLock: 

4020 break 

4021 time.sleep(SR.LOCK_RETRY_INTERVAL) 

4022 abortFlag.clear(FLAG_TYPE_ABORT) 

4023 if not gotLock: 

4024 raise util.CommandException(code=errno.ETIMEDOUT, 

4025 reason="SR %s: error aborting existing process" % srUuid) 

4026 return True 

4027 

4028 

4029def init(srUuid): 

4030 global lockGCRunning 

4031 if not lockGCRunning: 4031 ↛ 4032line 4031 didn't jump to line 4032, because the condition on line 4031 was never true

4032 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid) 

4033 global lockGCActive 

4034 if not lockGCActive: 4034 ↛ 4035line 4034 didn't jump to line 4035, because the condition on line 4034 was never true

4035 lockGCActive = LockActive(srUuid) 

4036 

4037 

4038class LockActive: 

4039 """ 

4040 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

4041 if another process holds the SR lock. 

4042 """ 

4043 def __init__(self, srUuid): 

4044 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

4045 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, srUuid) 

4046 

4047 def acquireNoblock(self): 

4048 self._srLock.acquire() 

4049 

4050 try: 

4051 return self._lock.acquireNoblock() 

4052 finally: 

4053 self._srLock.release() 

4054 

4055 def release(self): 

4056 self._lock.release() 

4057 

4058 

4059def usage(): 

4060 output = """Garbage collect and/or coalesce COW images in a COW-based SR 

4061 

4062Parameters: 

4063 -u --uuid UUID SR UUID 

4064 and one of: 

4065 -g --gc garbage collect, coalesce, and repeat while there is work 

4066 -G --gc_force garbage collect once, aborting any current operations 

4067 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

4068 max_age hours 

4069 -a --abort abort any currently running operation (GC or coalesce) 

4070 -q --query query the current state (GC'ing, coalescing or not running) 

4071 -x --disable disable GC/coalesce (will be in effect until you exit) 

4072 -t --debug see Debug below 

4073 

4074Options: 

4075 -b --background run in background (return immediately) (valid for -g only) 

4076 -f --force continue in the presence of COW images with errors (when doing 

4077 GC, this might cause removal of any such images) (only valid 

4078 for -G) (DANGEROUS) 

4079 

4080Debug: 

4081 The --debug parameter enables manipulation of LVHD VDIs for debugging 

4082 purposes. ** NEVER USE IT ON A LIVE VM ** 

4083 The following parameters are required: 

4084 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

4085 "deflate". 

4086 -v --vdi_uuid VDI UUID 

4087 """ 

4088 #-d --dry-run don't actually perform any SR-modifying operations 

4089 print(output) 

4090 Util.log("(Invalid usage)") 

4091 sys.exit(1) 

4092 

4093 

4094############################################################################## 

4095# 

4096# API 

4097# 

4098def abort(srUuid, soft=False): 

4099 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

4100 """ 

4101 if _abort(srUuid, soft): 

4102 Util.log("abort: releasing the process lock") 

4103 lockGCActive.release() 

4104 return True 

4105 else: 

4106 return False 

4107 

4108 

4109def gc(session, srUuid, inBackground, dryRun=False): 

4110 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

4111 immediately if inBackground=True. 

4112 

4113 The following algorithm is used: 

4114 1. If we are already GC'ing in this SR, return 

4115 2. If we are already coalescing a VDI pair: 

4116 a. Scan the SR and determine if the VDI pair is GC'able 

4117 b. If the pair is not GC'able, return 

4118 c. If the pair is GC'able, abort coalesce 

4119 3. Scan the SR 

4120 4. If there is nothing to collect, nor to coalesce, return 

4121 5. If there is something to collect, GC all, then goto 3 

4122 6. If there is something to coalesce, coalesce one pair, then goto 3 

4123 """ 

4124 Util.log("=== SR %s: gc ===" % srUuid) 

4125 

4126 signal.signal(signal.SIGTERM, receiveSignal) 

4127 

4128 if inBackground: 

4129 if daemonize(): 4129 ↛ exitline 4129 didn't return from function 'gc', because the condition on line 4129 was never false

4130 # we are now running in the background. Catch & log any errors 

4131 # because there is no other way to propagate them back at this 

4132 # point 

4133 

4134 try: 

4135 _gc(None, srUuid, dryRun) 

4136 except AbortException: 

4137 Util.log("Aborted") 

4138 except Exception: 

4139 Util.logException("gc") 

4140 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

4141 os._exit(0) 

4142 else: 

4143 _gc(session, srUuid, dryRun, immediate=True) 

4144 

4145 

4146def start_gc(session, sr_uuid): 

4147 """ 

4148 This function is used to try to start a backgrounded GC session by forking 

4149 the current process. If using the systemd version, call start_gc_service() instead. 

4150 """ 

4151 # don't bother if an instance already running (this is just an 

4152 # optimization to reduce the overhead of forking a new process if we 

4153 # don't have to, but the process will check the lock anyways) 

4154 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid) 

4155 if not lockRunning.acquireNoblock(): 

4156 if should_preempt(session, sr_uuid): 

4157 util.SMlog("Aborting currently-running coalesce of garbage VDI") 

4158 try: 

4159 if not abort(sr_uuid, soft=True): 

4160 util.SMlog("The GC has already been scheduled to re-start") 

4161 except util.CommandException as e: 

4162 if e.code != errno.ETIMEDOUT: 

4163 raise 

4164 util.SMlog('failed to abort the GC') 

4165 else: 

4166 util.SMlog("A GC instance already running, not kicking") 

4167 return 

4168 else: 

4169 lockRunning.release() 

4170 

4171 util.SMlog(f"Starting GC file is {__file__}") 

4172 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'], 

4173 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4174 

4175def start_gc_service(sr_uuid, wait=False): 

4176 """ 

4177 This starts the templated systemd service which runs GC on the given SR UUID. 

4178 If the service was already started, this is a no-op. 

4179 

4180 Because the service is a one-shot with RemainAfterExit=no, when called with 

4181 wait=True this will run the service synchronously and will not return until the 

4182 run has finished. This is used to force a run of the GC instead of just kicking it 

4183 in the background. 

4184 """ 

4185 sr_uuid_esc = sr_uuid.replace("-", "\\x2d") 

4186 util.SMlog(f"Kicking SMGC@{sr_uuid}...") 

4187 cmd=[ "/usr/bin/systemctl", "--quiet" ] 

4188 if not wait: 4188 ↛ 4190line 4188 didn't jump to line 4190, because the condition on line 4188 was never false

4189 cmd.append("--no-block") 

4190 cmd += ["start", f"SMGC@{sr_uuid_esc}"] 

4191 subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4192 

4193 

4194def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

4195 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

4196 the SR lock is held. 

4197 The following algorithm is used: 

4198 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

4199 2. Scan the SR 

4200 3. GC 

4201 4. return 

4202 """ 

4203 Util.log("=== SR %s: gc_force ===" % srUuid) 

4204 init(srUuid) 

4205 sr = SR.getInstance(srUuid, session, lockSR, True) 

4206 if not lockGCActive.acquireNoblock(): 

4207 abort(srUuid) 

4208 else: 

4209 Util.log("Nothing was running, clear to proceed") 

4210 

4211 if force: 

4212 Util.log("FORCED: will continue even if there are COW image errors") 

4213 sr.scanLocked(force) 

4214 sr.cleanupCoalesceJournals() 

4215 

4216 try: 

4217 sr.cleanupCache() 

4218 sr.garbageCollect(dryRun) 

4219 finally: 

4220 sr.cleanup() 

4221 sr.logFilter.logState() 

4222 lockGCActive.release() 

4223 

4224 

4225def get_state(srUuid): 

4226 """Return whether GC/coalesce is currently running or not. This asks systemd for 

4227 the state of the templated SMGC service and will return True if it is "activating" 

4228 or "running" (for completeness, as in practice it will never achieve the latter state) 

4229 """ 

4230 sr_uuid_esc = srUuid.replace("-", "\\x2d") 

4231 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"] 

4232 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4233 state = result.stdout.decode('utf-8').rstrip() 

4234 if state == "activating" or state == "running": 

4235 return True 

4236 return False 

4237 

4238 

4239def should_preempt(session, srUuid): 

4240 sr = SR.getInstance(srUuid, session) 

4241 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

4242 if len(entries) == 0: 

4243 return False 

4244 elif len(entries) > 1: 

4245 raise util.SMException("More than one coalesce entry: " + str(entries)) 

4246 sr.scanLocked() 

4247 coalescedUuid = entries.popitem()[0] 

4248 garbage = sr.findGarbage() 

4249 for vdi in garbage: 

4250 if vdi.uuid == coalescedUuid: 

4251 return True 

4252 return False 

4253 

4254 

4255def get_coalesceable_leaves(session, srUuid, vdiUuids): 

4256 coalesceable = [] 

4257 sr = SR.getInstance(srUuid, session) 

4258 sr.scanLocked() 

4259 for uuid in vdiUuids: 

4260 vdi = sr.getVDI(uuid) 

4261 if not vdi: 

4262 raise util.SMException("VDI %s not found" % uuid) 

4263 if vdi.isLeafCoalesceable(): 

4264 coalesceable.append(uuid) 

4265 return coalesceable 

4266 

4267 

4268def cache_cleanup(session, srUuid, maxAge): 

4269 sr = SR.getInstance(srUuid, session) 

4270 return sr.cleanupCache(maxAge) 

4271 

4272 

4273def debug(sr_uuid, cmd, vdi_uuid): 

4274 Util.log("Debug command: %s" % cmd) 

4275 sr = SR.getInstance(sr_uuid, None) 

4276 if not isinstance(sr, LVMSR): 

4277 print("Error: not an LVHD SR") 

4278 return 

4279 sr.scanLocked() 

4280 vdi = sr.getVDI(vdi_uuid) 

4281 if not vdi: 

4282 print("Error: VDI %s not found") 

4283 return 

4284 print("Running %s on SR %s" % (cmd, sr)) 

4285 print("VDI before: %s" % vdi) 

4286 if cmd == "activate": 

4287 vdi._activate() 

4288 print("VDI file: %s" % vdi.path) 

4289 if cmd == "deactivate": 

4290 ns = NS_PREFIX_LVM + sr.uuid 

4291 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

4292 if cmd == "inflate": 

4293 vdi.inflateFully() 

4294 sr.cleanup() 

4295 if cmd == "deflate": 

4296 vdi.deflate() 

4297 sr.cleanup() 

4298 sr.scanLocked() 

4299 print("VDI after: %s" % vdi) 

4300 

4301 

4302def abort_optional_reenable(uuid): 

4303 print("Disabling GC/coalesce for %s" % uuid) 

4304 ret = _abort(uuid) 

4305 input("Press enter to re-enable...") 

4306 print("GC/coalesce re-enabled") 

4307 lockGCRunning.release() 

4308 if ret: 

4309 lockGCActive.release() 

4310 

4311 

4312############################################################################## 

4313# 

4314# CLI 

4315# 

4316def main(): 

4317 action = "" 

4318 maxAge = 0 

4319 uuid = "" 

4320 background = False 

4321 force = False 

4322 dryRun = False 

4323 debug_cmd = "" 

4324 vdi_uuid = "" 

4325 shortArgs = "gGc:aqxu:bfdt:v:" 

4326 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

4327 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

4328 

4329 try: 

4330 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

4331 except getopt.GetoptError: 

4332 usage() 

4333 for o, a in opts: 

4334 if o in ("-g", "--gc"): 

4335 action = "gc" 

4336 if o in ("-G", "--gc_force"): 

4337 action = "gc_force" 

4338 if o in ("-c", "--clean_cache"): 

4339 action = "clean_cache" 

4340 maxAge = int(a) 

4341 if o in ("-a", "--abort"): 

4342 action = "abort" 

4343 if o in ("-q", "--query"): 

4344 action = "query" 

4345 if o in ("-x", "--disable"): 

4346 action = "disable" 

4347 if o in ("-u", "--uuid"): 

4348 uuid = a 

4349 if o in ("-b", "--background"): 

4350 background = True 

4351 if o in ("-f", "--force"): 

4352 force = True 

4353 if o in ("-d", "--dry-run"): 

4354 Util.log("Dry run mode") 

4355 dryRun = True 

4356 if o in ("-t", "--debug"): 

4357 action = "debug" 

4358 debug_cmd = a 

4359 if o in ("-v", "--vdi_uuid"): 

4360 vdi_uuid = a 

4361 

4362 if not action or not uuid: 

4363 usage() 

4364 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

4365 action != "debug" and (debug_cmd or vdi_uuid): 

4366 usage() 

4367 

4368 if action != "query" and action != "debug": 

4369 print("All output goes to log") 

4370 

4371 if action == "gc": 

4372 gc(None, uuid, background, dryRun) 

4373 elif action == "gc_force": 

4374 gc_force(None, uuid, force, dryRun, True) 

4375 elif action == "clean_cache": 

4376 cache_cleanup(None, uuid, maxAge) 

4377 elif action == "abort": 

4378 abort(uuid) 

4379 elif action == "query": 

4380 print("Currently running: %s" % get_state(uuid)) 

4381 elif action == "disable": 

4382 abort_optional_reenable(uuid) 

4383 elif action == "debug": 

4384 debug(uuid, debug_cmd, vdi_uuid) 

4385 

4386 

4387if __name__ == '__main__': 4387 ↛ 4388line 4387 didn't jump to line 4388, because the condition on line 4387 was never true

4388 main()