Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect COW-based SR's in the background 

19# 

20 

21from sm_typing import Any, Optional, List, override 

22 

23import os 

24import os.path 

25import sys 

26import time 

27import signal 

28import subprocess 

29import getopt 

30import datetime 

31import traceback 

32import base64 

33import zlib 

34import errno 

35import stat 

36 

37import XenAPI # pylint: disable=import-error 

38import util 

39import lvutil 

40import lvmcache 

41import journaler 

42import fjournaler 

43import lock 

44import blktap2 

45import xs_errors 

46from refcounter import RefCounter 

47from ipc import IPCFlag 

48from lvmanager import LVActivator 

49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

50from functools import reduce 

51from time import monotonic as _time 

52 

53from constants import NS_PREFIX_LVM, VG_LOCATION, VG_PREFIX 

54from cowutil import CowImageInfo, CowUtil, getCowUtil 

55from lvmcowutil import LV_PREFIX, LvmCowUtil 

56from vditype import VdiType, VdiTypeExtension, VDI_COW_TYPES, VDI_TYPE_TO_EXTENSION 

57 

58try: 

59 from linstorcowutil import LinstorCowUtil 

60 from linstorjournaler import LinstorJournaler 

61 from linstorvolumemanager import get_controller_uri 

62 from linstorvolumemanager import LinstorVolumeManager 

63 from linstorvolumemanager import LinstorVolumeManagerError 

64 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

65 

66 LINSTOR_AVAILABLE = True 

67except ImportError: 

68 LINSTOR_AVAILABLE = False 

69 

70# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

71# possible due to lvhd_stop_using_() not working correctly. However, we leave 

72# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

73# record for use by the offline tool (which makes the operation safe by pausing 

74# the VM first) 

75AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

76 

77FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

78 

79# process "lock", used simply as an indicator that a process already exists 

80# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

81# check for the fact by trying the lock). 

82lockGCRunning = None 

83 

84# process "lock" to indicate that the GC process has been activated but may not 

85# yet be running, stops a second process from being started. 

86LOCK_TYPE_GC_ACTIVE = "gc_active" 

87lockGCActive = None 

88 

89# Default coalesce error rate limit, in messages per minute. A zero value 

90# disables throttling, and a negative value disables error reporting. 

91DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

92 

93COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

94COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

95VAR_RUN = "/var/run/" 

96SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

97 

98N_RUNNING_AVERAGE = 10 

99 

100NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

101 

102# Signal Handler 

103SIGTERM = False 

104 

105 

106class AbortException(util.SMException): 

107 pass 

108 

109class CancelException(util.SMException): 

110 pass 

111 

112def receiveSignal(signalNumber, frame): 

113 global SIGTERM 

114 

115 util.SMlog("GC: recieved SIGTERM") 

116 SIGTERM = True 

117 return 

118 

119 

120################################################################################ 

121# 

122# Util 

123# 

124class Util: 

125 RET_RC = 1 

126 RET_STDOUT = 2 

127 RET_STDERR = 4 

128 

129 UUID_LEN = 36 

130 

131 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

132 

133 @staticmethod 

134 def log(text) -> None: 

135 util.SMlog(text, ident="SMGC") 

136 

137 @staticmethod 

138 def logException(tag): 

139 info = sys.exc_info() 

140 if info[0] == SystemExit: 140 ↛ 142line 140 didn't jump to line 142, because the condition on line 140 was never true

141 # this should not be happening when catching "Exception", but it is 

142 sys.exit(0) 

143 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

144 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

145 Util.log(" ***********************") 

146 Util.log(" * E X C E P T I O N *") 

147 Util.log(" ***********************") 

148 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

149 Util.log(tb) 

150 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

151 

152 @staticmethod 

153 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

154 "Execute a subprocess, then return its return code, stdout, stderr" 

155 proc = subprocess.Popen(args, 

156 stdin=subprocess.PIPE, \ 

157 stdout=subprocess.PIPE, \ 

158 stderr=subprocess.PIPE, \ 

159 shell=True, \ 

160 close_fds=True) 

161 (stdout, stderr) = proc.communicate(inputtext) 

162 stdout = str(stdout) 

163 stderr = str(stderr) 

164 rc = proc.returncode 

165 if log: 

166 Util.log("`%s`: %s" % (args, rc)) 

167 if type(expectedRC) != type([]): 

168 expectedRC = [expectedRC] 

169 if not rc in expectedRC: 

170 reason = stderr.strip() 

171 if stdout.strip(): 

172 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

173 Util.log("Failed: %s" % reason) 

174 raise util.CommandException(rc, args, reason) 

175 

176 if ret == Util.RET_RC: 

177 return rc 

178 if ret == Util.RET_STDERR: 

179 return stderr 

180 return stdout 

181 

182 @staticmethod 

183 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut, prefSig=signal.SIGKILL): 

184 """execute func in a separate thread and kill it if abortTest signals 

185 so""" 

186 abortSignaled = abortTest() # check now before we clear resultFlag 

187 resultFlag = IPCFlag(ns) 

188 resultFlag.clearAll() 

189 pid = os.fork() 

190 if pid: 

191 startTime = _time() 

192 try: 

193 while True: 

194 if resultFlag.test("success"): 

195 Util.log(" Child process completed successfully") 

196 resultFlag.clear("success") 

197 return 

198 if resultFlag.test("failure"): 

199 resultFlag.clear("failure") 

200 raise util.SMException("Child process exited with error") 

201 if abortTest() or abortSignaled or SIGTERM: 

202 os.killpg(pid, prefSig) 

203 raise AbortException("Aborting due to signal") 

204 if timeOut and _time() - startTime > timeOut: 

205 os.killpg(pid, prefSig) 

206 resultFlag.clearAll() 

207 raise util.SMException("Timed out") 

208 time.sleep(pollInterval) 

209 finally: 

210 wait_pid = 0 

211 rc = -1 

212 count = 0 

213 while wait_pid == 0 and count < 10: 

214 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

215 if wait_pid == 0: 

216 time.sleep(2) 

217 count += 1 

218 

219 if wait_pid == 0: 

220 Util.log("runAbortable: wait for process completion timed out") 

221 else: 

222 os.setpgrp() 

223 try: 

224 if func() == ret: 

225 resultFlag.set("success") 

226 else: 

227 resultFlag.set("failure") 

228 except Exception as e: 

229 Util.log("Child process failed with : (%s)" % e) 

230 resultFlag.set("failure") 

231 Util.logException("This exception has occured") 

232 os._exit(0) 

233 

234 @staticmethod 

235 def num2str(number): 

236 for prefix in ("G", "M", "K"): 236 ↛ 239line 236 didn't jump to line 239, because the loop on line 236 didn't complete

237 if number >= Util.PREFIX[prefix]: 

238 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

239 return "%s" % number 

240 

241 @staticmethod 

242 def numBits(val): 

243 count = 0 

244 while val: 

245 count += val & 1 

246 val = val >> 1 

247 return count 

248 

249 @staticmethod 

250 def countBits(bitmap1, bitmap2): 

251 """return bit count in the bitmap produced by ORing the two bitmaps""" 

252 len1 = len(bitmap1) 

253 len2 = len(bitmap2) 

254 lenLong = len1 

255 lenShort = len2 

256 bitmapLong = bitmap1 

257 if len2 > len1: 

258 lenLong = len2 

259 lenShort = len1 

260 bitmapLong = bitmap2 

261 

262 count = 0 

263 for i in range(lenShort): 

264 val = bitmap1[i] | bitmap2[i] 

265 count += Util.numBits(val) 

266 

267 for i in range(i + 1, lenLong): 

268 val = bitmapLong[i] 

269 count += Util.numBits(val) 

270 return count 

271 

272 @staticmethod 

273 def getThisScript(): 

274 thisScript = util.get_real_path(__file__) 

275 if thisScript.endswith(".pyc"): 

276 thisScript = thisScript[:-1] 

277 return thisScript 

278 

279 

280################################################################################ 

281# 

282# XAPI 

283# 

284class XAPI: 

285 USER = "root" 

286 PLUGIN_ON_SLAVE = "on-slave" 

287 

288 CONFIG_SM = 0 

289 CONFIG_OTHER = 1 

290 CONFIG_ON_BOOT = 2 

291 CONFIG_ALLOW_CACHING = 3 

292 

293 CONFIG_NAME = { 

294 CONFIG_SM: "sm-config", 

295 CONFIG_OTHER: "other-config", 

296 CONFIG_ON_BOOT: "on-boot", 

297 CONFIG_ALLOW_CACHING: "allow_caching" 

298 } 

299 

300 class LookupError(util.SMException): 

301 pass 

302 

303 @staticmethod 

304 def getSession(): 

305 session = XenAPI.xapi_local() 

306 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

307 return session 

308 

309 def __init__(self, session, srUuid): 

310 self.sessionPrivate = False 

311 self.session = session 

312 if self.session is None: 

313 self.session = self.getSession() 

314 self.sessionPrivate = True 

315 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

316 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

317 self.hostUuid = util.get_this_host() 

318 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

319 self.task = None 

320 self.task_progress = {"coalescable": 0, "done": 0} 

321 

322 def __del__(self): 

323 if self.sessionPrivate: 

324 self.session.xenapi.session.logout() 

325 

326 @property 

327 def srRef(self): 

328 return self._srRef 

329 

330 def isPluggedHere(self): 

331 pbds = self.getAttachedPBDs() 

332 for pbdRec in pbds: 

333 if pbdRec["host"] == self._hostRef: 

334 return True 

335 return False 

336 

337 def poolOK(self): 

338 host_recs = self.session.xenapi.host.get_all_records() 

339 for host_ref, host_rec in host_recs.items(): 

340 if not host_rec["enabled"]: 

341 Util.log("Host %s not enabled" % host_rec["uuid"]) 

342 return False 

343 return True 

344 

345 def isMaster(self): 

346 if self.srRecord["shared"]: 

347 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

348 return pool["master"] == self._hostRef 

349 else: 

350 pbds = self.getAttachedPBDs() 

351 if len(pbds) < 1: 

352 raise util.SMException("Local SR not attached") 

353 elif len(pbds) > 1: 

354 raise util.SMException("Local SR multiply attached") 

355 return pbds[0]["host"] == self._hostRef 

356 

357 def getAttachedPBDs(self): 

358 """Return PBD records for all PBDs of this SR that are currently 

359 attached""" 

360 attachedPBDs = [] 

361 pbds = self.session.xenapi.PBD.get_all_records() 

362 for pbdRec in pbds.values(): 

363 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

364 attachedPBDs.append(pbdRec) 

365 return attachedPBDs 

366 

367 def getOnlineHosts(self): 

368 return util.get_online_hosts(self.session) 

369 

370 def ensureInactive(self, hostRef, args): 

371 text = self.session.xenapi.host.call_plugin( \ 

372 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

373 Util.log("call-plugin returned: '%s'" % text) 

374 

375 def getRecordHost(self, hostRef): 

376 return self.session.xenapi.host.get_record(hostRef) 

377 

378 def _getRefVDI(self, uuid): 

379 return self.session.xenapi.VDI.get_by_uuid(uuid) 

380 

381 def getRefVDI(self, vdi): 

382 return self._getRefVDI(vdi.uuid) 

383 

384 def getRecordVDI(self, uuid): 

385 try: 

386 ref = self._getRefVDI(uuid) 

387 return self.session.xenapi.VDI.get_record(ref) 

388 except XenAPI.Failure: 

389 return None 

390 

391 def singleSnapshotVDI(self, vdi): 

392 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

393 {"type": "internal"}) 

394 

395 def forgetVDI(self, srUuid, vdiUuid): 

396 """Forget the VDI, but handle the case where the VDI has already been 

397 forgotten (i.e. ignore errors)""" 

398 try: 

399 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

400 self.session.xenapi.VDI.forget(vdiRef) 

401 except XenAPI.Failure: 

402 pass 

403 

404 def getConfigVDI(self, vdi, key): 

405 kind = vdi.CONFIG_TYPE[key] 

406 if kind == self.CONFIG_SM: 

407 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

408 elif kind == self.CONFIG_OTHER: 

409 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

410 elif kind == self.CONFIG_ON_BOOT: 

411 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

412 elif kind == self.CONFIG_ALLOW_CACHING: 

413 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

414 else: 

415 assert(False) 

416 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

417 return cfg 

418 

419 def removeFromConfigVDI(self, vdi, key): 

420 kind = vdi.CONFIG_TYPE[key] 

421 if kind == self.CONFIG_SM: 

422 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

423 elif kind == self.CONFIG_OTHER: 

424 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

425 else: 

426 assert(False) 

427 

428 def addToConfigVDI(self, vdi, key, val): 

429 kind = vdi.CONFIG_TYPE[key] 

430 if kind == self.CONFIG_SM: 

431 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

432 elif kind == self.CONFIG_OTHER: 

433 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

434 else: 

435 assert(False) 

436 

437 def isSnapshot(self, vdi): 

438 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

439 

440 def markCacheSRsDirty(self): 

441 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

442 'field "local_cache_enabled" = "true"') 

443 for sr_ref in sr_refs: 

444 Util.log("Marking SR %s dirty" % sr_ref) 

445 util.set_dirty(self.session, sr_ref) 

446 

447 def srUpdate(self): 

448 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

449 abortFlag = IPCFlag(self.srRecord["uuid"]) 

450 task = self.session.xenapi.Async.SR.update(self._srRef) 

451 cancelTask = True 

452 try: 

453 for i in range(60): 

454 status = self.session.xenapi.task.get_status(task) 

455 if not status == "pending": 

456 Util.log("SR.update_asynch status changed to [%s]" % status) 

457 cancelTask = False 

458 return 

459 if abortFlag.test(FLAG_TYPE_ABORT): 

460 Util.log("Abort signalled during srUpdate, cancelling task...") 

461 try: 

462 self.session.xenapi.task.cancel(task) 

463 cancelTask = False 

464 Util.log("Task cancelled") 

465 except: 

466 pass 

467 return 

468 time.sleep(1) 

469 finally: 

470 if cancelTask: 

471 self.session.xenapi.task.cancel(task) 

472 self.session.xenapi.task.destroy(task) 

473 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

474 

475 def update_task(self): 

476 self.session.xenapi.task.set_other_config( 

477 self.task, 

478 { 

479 "applies_to": self._srRef 

480 }) 

481 total = self.task_progress['coalescable'] + self.task_progress['done'] 

482 if (total > 0): 

483 self.session.xenapi.task.set_progress( 

484 self.task, float(self.task_progress['done']) / total) 

485 

486 def create_task(self, label, description): 

487 self.task = self.session.xenapi.task.create(label, description) 

488 self.update_task() 

489 

490 def update_task_progress(self, key, value): 

491 self.task_progress[key] = value 

492 if self.task: 

493 self.update_task() 

494 

495 def set_task_status(self, status): 

496 if self.task: 

497 self.session.xenapi.task.set_status(self.task, status) 

498 

499 

500################################################################################ 

501# 

502# VDI 

503# 

504class VDI(object): 

505 """Object representing a VDI of a COW-based SR""" 

506 

507 POLL_INTERVAL = 1 

508 POLL_TIMEOUT = 30 

509 DEVICE_MAJOR = 202 

510 

511 # config keys & values 

512 DB_VDI_PARENT = "vhd-parent" 

513 DB_VDI_TYPE = "vdi_type" 

514 DB_VDI_BLOCKS = "vhd-blocks" 

515 DB_VDI_PAUSED = "paused" 

516 DB_VDI_RELINKING = "relinking" 

517 DB_VDI_ACTIVATING = "activating" 

518 DB_GC = "gc" 

519 DB_COALESCE = "coalesce" 

520 DB_LEAFCLSC = "leaf-coalesce" # config key 

521 DB_GC_NO_SPACE = "gc_no_space" 

522 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

523 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

524 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

525 # no space to snap-coalesce or unable to keep 

526 # up with VDI. This is not used by the SM, it 

527 # might be used by external components. 

528 DB_ONBOOT = "on-boot" 

529 ONBOOT_RESET = "reset" 

530 DB_ALLOW_CACHING = "allow_caching" 

531 

532 CONFIG_TYPE = { 

533 DB_VDI_PARENT: XAPI.CONFIG_SM, 

534 DB_VDI_TYPE: XAPI.CONFIG_SM, 

535 DB_VDI_BLOCKS: XAPI.CONFIG_SM, 

536 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

537 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

538 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

539 DB_GC: XAPI.CONFIG_OTHER, 

540 DB_COALESCE: XAPI.CONFIG_OTHER, 

541 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

542 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

543 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

544 DB_GC_NO_SPACE: XAPI.CONFIG_SM 

545 } 

546 

547 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

548 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

549 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

550 # feasibility of leaf coalesce 

551 

552 JRN_RELINK = "relink" # journal entry type for relinking children 

553 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

554 JRN_LEAF = "leaf" # used in coalesce-leaf 

555 

556 STR_TREE_INDENT = 4 

557 

558 def __init__(self, sr, uuid, vdi_type): 

559 self.sr = sr 

560 self.scanError = True 

561 self.uuid = uuid 

562 self.vdi_type = vdi_type 

563 self.fileName = "" 

564 self.parentUuid = "" 

565 self.sizeVirt = -1 

566 self._sizePhys = -1 

567 self._sizeAllocated = -1 

568 self._hidden = False 

569 self.parent = None 

570 self.children = [] 

571 self._vdiRef = None 

572 self.cowutil = getCowUtil(vdi_type) 

573 self._clearRef() 

574 

575 @staticmethod 

576 def extractUuid(path): 

577 raise NotImplementedError("Implement in sub class") 

578 

579 def load(self, info=None) -> None: 

580 """Load VDI info""" 

581 pass 

582 

583 def getDriverName(self) -> str: 

584 return self.vdi_type 

585 

586 def getRef(self): 

587 if self._vdiRef is None: 

588 self._vdiRef = self.sr.xapi.getRefVDI(self) 

589 return self._vdiRef 

590 

591 def getConfig(self, key, default=None): 

592 config = self.sr.xapi.getConfigVDI(self, key) 

593 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 593 ↛ 594line 593 didn't jump to line 594, because the condition on line 593 was never true

594 val = config 

595 else: 

596 val = config.get(key) 

597 if val: 

598 return val 

599 return default 

600 

601 def setConfig(self, key, val): 

602 self.sr.xapi.removeFromConfigVDI(self, key) 

603 self.sr.xapi.addToConfigVDI(self, key, val) 

604 Util.log("Set %s = %s for %s" % (key, val, self)) 

605 

606 def delConfig(self, key): 

607 self.sr.xapi.removeFromConfigVDI(self, key) 

608 Util.log("Removed %s from %s" % (key, self)) 

609 

610 def ensureUnpaused(self): 

611 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

612 Util.log("Unpausing VDI %s" % self) 

613 self.unpause() 

614 

615 def pause(self, failfast=False) -> None: 

616 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

617 self.uuid, failfast): 

618 raise util.SMException("Failed to pause VDI %s" % self) 

619 

620 def _report_tapdisk_unpause_error(self): 

621 try: 

622 xapi = self.sr.xapi.session.xenapi 

623 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

624 msg_name = "failed to unpause tapdisk" 

625 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

626 "VMs using this tapdisk have lost access " \ 

627 "to the corresponding disk(s)" % self.uuid 

628 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

629 except Exception as e: 

630 util.SMlog("failed to generate message: %s" % e) 

631 

632 def unpause(self): 

633 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

634 self.uuid): 

635 self._report_tapdisk_unpause_error() 

636 raise util.SMException("Failed to unpause VDI %s" % self) 

637 

638 def refresh(self, ignoreNonexistent=True): 

639 """Pause-unpause in one step""" 

640 self.sr.lock() 

641 try: 

642 try: 

643 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 643 ↛ 645line 643 didn't jump to line 645, because the condition on line 643 was never true

644 self.sr.uuid, self.uuid): 

645 self._report_tapdisk_unpause_error() 

646 raise util.SMException("Failed to refresh %s" % self) 

647 except XenAPI.Failure as e: 

648 if util.isInvalidVDI(e) and ignoreNonexistent: 

649 Util.log("VDI %s not found, ignoring" % self) 

650 return 

651 raise 

652 finally: 

653 self.sr.unlock() 

654 

655 def isSnapshot(self): 

656 return self.sr.xapi.isSnapshot(self) 

657 

658 def isAttachedRW(self): 

659 return util.is_attached_rw( 

660 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

661 

662 def getVDIBlocks(self): 

663 val = self.updateBlockInfo() 

664 bitmap = zlib.decompress(base64.b64decode(val)) 

665 return bitmap 

666 

667 def isCoalesceable(self): 

668 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

669 return not self.scanError and \ 

670 self.parent and \ 

671 len(self.parent.children) == 1 and \ 

672 self.isHidden() and \ 

673 len(self.children) > 0 

674 

675 def isLeafCoalesceable(self): 

676 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

677 return not self.scanError and \ 

678 self.parent and \ 

679 len(self.parent.children) == 1 and \ 

680 not self.isHidden() and \ 

681 len(self.children) == 0 

682 

683 def canLiveCoalesce(self, speed): 

684 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

685 isLeafCoalesceable() already""" 

686 feasibleSize = False 

687 allowedDownTime = \ 

688 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

689 allocated_size = self.getAllocatedSize() 

690 if speed: 

691 feasibleSize = \ 

692 allocated_size // speed < allowedDownTime 

693 else: 

694 feasibleSize = \ 

695 allocated_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

696 

697 return (feasibleSize or 

698 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

699 

700 def getAllPrunable(self): 

701 if len(self.children) == 0: # base case 

702 # it is possible to have a hidden leaf that was recently coalesced 

703 # onto its parent, its children already relinked but not yet 

704 # reloaded - in which case it may not be garbage collected yet: 

705 # some tapdisks could still be using the file. 

706 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

707 return [] 

708 if not self.scanError and self.isHidden(): 

709 return [self] 

710 return [] 

711 

712 thisPrunable = True 

713 vdiList = [] 

714 for child in self.children: 

715 childList = child.getAllPrunable() 

716 vdiList.extend(childList) 

717 if child not in childList: 

718 thisPrunable = False 

719 

720 # We can destroy the current VDI if all childs are hidden BUT the 

721 # current VDI must be hidden too to do that! 

722 # Example in this case (after a failed live leaf coalesce): 

723 # 

724 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

725 # SMGC: [32436] b5458d61(1.000G/4.127M) 

726 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

727 # 

728 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

729 # Normally we are not in this function when the delete action is 

730 # executed but in `_liveLeafCoalesce`. 

731 

732 if not self.scanError and not self.isHidden() and thisPrunable: 

733 vdiList.append(self) 

734 return vdiList 

735 

736 def getSizePhys(self) -> int: 

737 return self._sizePhys 

738 

739 def getAllocatedSize(self) -> int: 

740 return self._sizeAllocated 

741 

742 def getTreeRoot(self): 

743 "Get the root of the tree that self belongs to" 

744 root = self 

745 while root.parent: 

746 root = root.parent 

747 return root 

748 

749 def getTreeHeight(self): 

750 "Get the height of the subtree rooted at self" 

751 if len(self.children) == 0: 

752 return 1 

753 

754 maxChildHeight = 0 

755 for child in self.children: 

756 childHeight = child.getTreeHeight() 

757 if childHeight > maxChildHeight: 

758 maxChildHeight = childHeight 

759 

760 return maxChildHeight + 1 

761 

762 def getAllLeaves(self) -> List["VDI"]: 

763 "Get all leaf nodes in the subtree rooted at self" 

764 if len(self.children) == 0: 

765 return [self] 

766 

767 leaves = [] 

768 for child in self.children: 

769 leaves.extend(child.getAllLeaves()) 

770 return leaves 

771 

772 def updateBlockInfo(self) -> Optional[str]: 

773 val = base64.b64encode(self._queryCowBlocks()).decode() 

774 try: 

775 self.setConfig(VDI.DB_VDI_BLOCKS, val) 

776 except Exception: 

777 if self.vdi_type != VdiType.QCOW2: 

778 raise 

779 # Sometime with QCOW2, our allocation table is too big to be stored in XAPI, in this case we do not store it 

780 # and we write `skipped` instead so that hasWork is happy (and the GC doesn't run in loop indefinitely). 

781 self.setConfig(VDI.DB_VDI_BLOCKS, "skipped") 

782 

783 return val 

784 

785 def rename(self, uuid) -> None: 

786 "Rename the VDI file" 

787 assert(not self.sr.vdis.get(uuid)) 

788 self._clearRef() 

789 oldUuid = self.uuid 

790 self.uuid = uuid 

791 self.children = [] 

792 # updating the children themselves is the responsibility of the caller 

793 del self.sr.vdis[oldUuid] 

794 self.sr.vdis[self.uuid] = self 

795 

796 def delete(self) -> None: 

797 "Physically delete the VDI" 

798 lock.Lock.cleanup(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

799 lock.Lock.cleanupAll(self.uuid) 

800 self._clear() 

801 

802 def getParent(self) -> str: 

803 return self.cowutil.getParent(self.path, lambda x: x.strip()) 803 ↛ exitline 803 didn't run the lambda on line 803

804 

805 def repair(self, parent) -> None: 

806 self.cowutil.repair(parent) 

807 

808 @override 

809 def __str__(self) -> str: 

810 strHidden = "" 

811 if self.isHidden(): 811 ↛ 812line 811 didn't jump to line 812, because the condition on line 811 was never true

812 strHidden = "*" 

813 strSizeVirt = "?" 

814 if self.sizeVirt > 0: 814 ↛ 815line 814 didn't jump to line 815, because the condition on line 814 was never true

815 strSizeVirt = Util.num2str(self.sizeVirt) 

816 strSizePhys = "?" 

817 if self._sizePhys > 0: 817 ↛ 818line 817 didn't jump to line 818, because the condition on line 817 was never true

818 strSizePhys = "/%s" % Util.num2str(self._sizePhys) 

819 strSizeAllocated = "?" 

820 if self._sizeAllocated >= 0: 

821 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

822 strType = "[{}]".format(self.vdi_type) 

823 

824 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

825 strSizePhys, strSizeAllocated, strType) 

826 

827 def validate(self, fast=False) -> None: 

828 if self.cowutil.check(self.path, fast=fast) != CowUtil.CheckResult.Success: 828 ↛ 829line 828 didn't jump to line 829, because the condition on line 828 was never true

829 raise util.SMException("COW image %s corrupted" % self) 

830 

831 def _clear(self): 

832 self.uuid = "" 

833 self.path = "" 

834 self.parentUuid = "" 

835 self.parent = None 

836 self._clearRef() 

837 

838 def _clearRef(self): 

839 self._vdiRef = None 

840 

841 @staticmethod 

842 def _cancel_exception(sig, frame): 

843 raise CancelException() 

844 

845 def _call_plugin_coalesce(self, hostRef): 

846 signal.signal(signal.SIGTERM, self._cancel_exception) 

847 args = {"path": self.path, "vdi_type": self.vdi_type} 

848 Util.log("Calling remote coalesce plugin with: {}".format(args)) 

849 try: 

850 ret = self.sr.xapi.session.xenapi.host.call_plugin( \ 

851 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_tapdisk", args) 

852 Util.log("Remote coalesce returned {}".format(ret)) 

853 except CancelException: 

854 Util.log(f"Cancelling online coalesce following signal {args}") 

855 self.sr.xapi.session.xenapi.host.call_plugin( \ 

856 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_cancel", args) 

857 raise 

858 except Exception: 

859 raise 

860 

861 def _doCoalesceOnHost(self, hostRef): 

862 self.validate() 

863 self.parent.validate(True) 

864 self.parent._increaseSizeVirt(self.sizeVirt) 

865 self.sr._updateSlavesOnResize(self.parent) 

866 #TODO: We might need to make the LV RW on the slave directly for coalesce? 

867 # Children and parent need to be RW for QCOW2 coalesce, otherwise tapdisk(libqcow) will crash trying to access them 

868 

869 def abortTest(): 

870 file = self.sr._gc_running_file(self) 

871 try: 

872 with open(file, "r") as f: 

873 if not f.read(): 

874 Util.log("abortTest: Cancelling coalesce") 

875 return True 

876 except OSError as e: 

877 if e.errno == errno.ENOENT: 

878 Util.log("File {} does not exist".format(file)) 

879 else: 

880 Util.log("IOError: {}".format(e)) 

881 return True 

882 return False 

883 

884 Util.runAbortable(lambda: self._call_plugin_coalesce(hostRef), \ 

885 None, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0, prefSig=signal.SIGTERM) 

886 

887 self.parent.validate(True) 

888 #self._verifyContents(0) 

889 self.parent.updateBlockInfo() 

890 

891 def _isOpenOnHosts(self) -> Optional[str]: 

892 for pbdRecord in self.sr.xapi.getAttachedPBDs(): 

893 hostRef = pbdRecord["host"] 

894 args = {"path": self.path} 

895 is_openers = util.strtobool(self.sr.xapi.session.xenapi.host.call_plugin( \ 

896 hostRef, XAPI.PLUGIN_ON_SLAVE, "is_openers", args)) 

897 if is_openers: 

898 return hostRef 

899 return None 

900 

901 def _doCoalesce(self) -> None: 

902 """Coalesce self onto parent. Only perform the actual coalescing of 

903 an image, but not the subsequent relinking. We'll do that as the next step, 

904 after reloading the entire SR in case things have changed while we 

905 were coalescing""" 

906 self.validate() 

907 self.parent.validate(True) 

908 self.parent._increaseSizeVirt(self.sizeVirt) 

909 self.sr._updateSlavesOnResize(self.parent) 

910 self._coalesceCowImage(0) 

911 self.parent.validate(True) 

912 #self._verifyContents(0) 

913 self.parent.updateBlockInfo() 

914 

915 def _verifyContents(self, timeOut): 

916 Util.log(" Coalesce verification on %s" % self) 

917 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

918 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

919 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

920 Util.log(" Coalesce verification succeeded") 

921 

922 def _runTapdiskDiff(self): 

923 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

924 (self.getDriverName(), self.path, \ 

925 self.parent.getDriverName(), self.parent.path) 

926 Util.doexec(cmd, 0) 

927 return True 

928 

929 @staticmethod 

930 def _reportCoalesceError(vdi, ce): 

931 """Reports a coalesce error to XenCenter. 

932 

933 vdi: the VDI object on which the coalesce error occured 

934 ce: the CommandException that was raised""" 

935 

936 msg_name = os.strerror(ce.code) 

937 if ce.code == errno.ENOSPC: 

938 # TODO We could add more information here, e.g. exactly how much 

939 # space is required for the particular coalesce, as well as actions 

940 # to be taken by the user and consequences of not taking these 

941 # actions. 

942 msg_body = 'Run out of space while coalescing.' 

943 elif ce.code == errno.EIO: 

944 msg_body = 'I/O error while coalescing.' 

945 else: 

946 msg_body = '' 

947 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

948 % (vdi.sr.uuid, msg_name, msg_body)) 

949 

950 # Create a XenCenter message, but don't spam. 

951 xapi = vdi.sr.xapi.session.xenapi 

952 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

953 oth_cfg = xapi.SR.get_other_config(sr_ref) 

954 if COALESCE_ERR_RATE_TAG in oth_cfg: 

955 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

956 else: 

957 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

958 

959 xcmsg = False 

960 if coalesce_err_rate == 0: 

961 xcmsg = True 

962 elif coalesce_err_rate > 0: 

963 now = datetime.datetime.now() 

964 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

965 if COALESCE_LAST_ERR_TAG in sm_cfg: 

966 # seconds per message (minimum distance in time between two 

967 # messages in seconds) 

968 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

969 last = datetime.datetime.fromtimestamp( 

970 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

971 if now - last >= spm: 

972 xapi.SR.remove_from_sm_config(sr_ref, 

973 COALESCE_LAST_ERR_TAG) 

974 xcmsg = True 

975 else: 

976 xcmsg = True 

977 if xcmsg: 

978 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

979 str(now.strftime('%s'))) 

980 if xcmsg: 

981 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

982 

983 def coalesce(self) -> int: 

984 return self.cowutil.coalesce(self.path) 

985 

986 @staticmethod 

987 def _doCoalesceCowImage(vdi: "VDI"): 

988 try: 

989 startTime = time.time() 

990 allocated_size = vdi.getAllocatedSize() 

991 coalesced_size = vdi.coalesce() 

992 endTime = time.time() 

993 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

994 except util.CommandException as ce: 

995 # We use try/except for the following piece of code because it runs 

996 # in a separate process context and errors will not be caught and 

997 # reported by anyone. 

998 try: 

999 # Report coalesce errors back to user via XC 

1000 VDI._reportCoalesceError(vdi, ce) 

1001 except Exception as e: 

1002 util.SMlog('failed to create XenCenter message: %s' % e) 

1003 raise ce 

1004 except: 

1005 raise 

1006 

1007 def _vdi_is_raw(self, vdi_path): 

1008 """ 

1009 Given path to vdi determine if it is raw 

1010 """ 

1011 uuid = self.extractUuid(vdi_path) 

1012 return self.sr.vdis[uuid].vdi_type == VdiType.RAW 

1013 

1014 def _coalesceCowImage(self, timeOut): 

1015 Util.log(" Running COW coalesce on %s" % self) 

1016 def abortTest(): 

1017 if self.cowutil.isCoalesceableOnRemote(): 

1018 file = self.sr._gc_running_file(self) 

1019 try: 

1020 with open(file, "r") as f: 

1021 if not f.read(): 

1022 return True 

1023 except OSError as e: 

1024 if e.errno == errno.ENOENT: 

1025 util.SMlog("File {} does not exist".format(file)) 

1026 else: 

1027 util.SMlog("IOError: {}".format(e)) 

1028 return True 

1029 return IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1030 

1031 try: 

1032 util.fistpoint.activate_custom_fn( 

1033 "cleanup_coalesceVHD_inject_failure", 

1034 util.inject_failure) 

1035 Util.runAbortable(lambda: VDI._doCoalesceCowImage(self), None, 

1036 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

1037 except: 

1038 # Exception at this phase could indicate a failure in COW coalesce 

1039 # or a kill of COW coalesce by runAbortable due to timeOut 

1040 # Try a repair and reraise the exception 

1041 parent = "" 

1042 try: 

1043 parent = self.getParent() 

1044 if not self._vdi_is_raw(parent): 

1045 # Repair error is logged and ignored. Error reraised later 

1046 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

1047 'parent %s' % (self.uuid, parent)) 

1048 self.repair(parent) 

1049 except Exception as e: 

1050 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

1051 'after failed coalesce on %s, err: %s' % 

1052 (parent, self.path, e)) 

1053 raise 

1054 

1055 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

1056 

1057 def _relinkSkip(self) -> None: 

1058 """Relink children of this VDI to point to the parent of this VDI""" 

1059 abortFlag = IPCFlag(self.sr.uuid) 

1060 for child in self.children: 

1061 if abortFlag.test(FLAG_TYPE_ABORT): 1061 ↛ 1062line 1061 didn't jump to line 1062, because the condition on line 1061 was never true

1062 raise AbortException("Aborting due to signal") 

1063 Util.log(" Relinking %s from %s to %s" % \ 

1064 (child, self, self.parent)) 

1065 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

1066 child._setParent(self.parent) 

1067 self.children = [] 

1068 

1069 def _reloadChildren(self, vdiSkip): 

1070 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

1071 the COW image metadata for this file in any online VDI""" 

1072 abortFlag = IPCFlag(self.sr.uuid) 

1073 for child in self.children: 

1074 if child == vdiSkip: 

1075 continue 

1076 if abortFlag.test(FLAG_TYPE_ABORT): 1076 ↛ 1077line 1076 didn't jump to line 1077, because the condition on line 1076 was never true

1077 raise AbortException("Aborting due to signal") 

1078 Util.log(" Reloading VDI %s" % child) 

1079 child._reload() 

1080 

1081 def _reload(self): 

1082 """Pause & unpause to cause blktap to reload the image metadata""" 

1083 for child in self.children: 1083 ↛ 1084line 1083 didn't jump to line 1084, because the loop on line 1083 never started

1084 child._reload() 

1085 

1086 # only leaves can be attached 

1087 if len(self.children) == 0: 1087 ↛ exitline 1087 didn't return from function '_reload', because the condition on line 1087 was never false

1088 try: 

1089 self.delConfig(VDI.DB_VDI_RELINKING) 

1090 except XenAPI.Failure as e: 

1091 if not util.isInvalidVDI(e): 

1092 raise 

1093 self.refresh() 

1094 

1095 def _needRelink(self, list_not_to_relink): 

1096 """ 

1097 If we coalesce up the chain, we shouldn't need to do the relink at all, we only need to do the relink on the children if their direct parent was the one we were coalescing 

1098 """ 

1099 if not list_not_to_relink: 1099 ↛ 1101line 1099 didn't jump to line 1101, because the condition on line 1099 was never false

1100 return True 

1101 if self.uuid in list_not_to_relink: 

1102 return False 

1103 else: 

1104 return True 

1105 

1106 def _tagChildrenForRelink(self, list_not_to_relink=None): 

1107 if len(self.children) == 0: 

1108 retries = 0 

1109 try: 

1110 while retries < 15: 

1111 retries += 1 

1112 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1113 Util.log("VDI %s is activating, wait to relink" % 

1114 self.uuid) 

1115 else: 

1116 if self._needRelink(list_not_to_relink): 1116 ↛ 1126line 1116 didn't jump to line 1126, because the condition on line 1116 was never false

1117 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1118 

1119 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1120 self.delConfig(VDI.DB_VDI_RELINKING) 

1121 Util.log("VDI %s started activating while tagging" % 

1122 self.uuid) 

1123 else: 

1124 return 

1125 else: 

1126 Util.log(f"Not adding relinking tag to VDI {self.uuid}") 

1127 return 

1128 time.sleep(2) 

1129 

1130 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1131 except XenAPI.Failure as e: 

1132 if not util.isInvalidVDI(e): 

1133 raise 

1134 

1135 for child in self.children: 

1136 child._tagChildrenForRelink(list_not_to_relink) 

1137 

1138 def _loadInfoParent(self): 

1139 ret = self.cowutil.getParent(self.path, LvmCowUtil.extractUuid) 

1140 if ret: 

1141 self.parentUuid = ret 

1142 

1143 def _setParent(self, parent) -> None: 

1144 self.cowutil.setParent(self.path, parent.path, False) 

1145 self.parent = parent 

1146 self.parentUuid = parent.uuid 

1147 parent.children.append(self) 

1148 try: 

1149 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1150 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1151 (self.uuid, self.parentUuid)) 

1152 except: 

1153 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1154 (self.uuid, self.parentUuid)) 

1155 

1156 def _update_vhd_parent(self, real_parent_uuid): 

1157 try: 

1158 self.setConfig(self.DB_VDI_PARENT, real_parent_uuid) 

1159 Util.log("Updated the vhd-parent field for child %s with real parent %s following a online coalesce" % \ 

1160 (self.uuid, real_parent_uuid)) 

1161 except: 

1162 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1163 (self.uuid, real_parent_uuid)) 

1164 

1165 def isHidden(self) -> bool: 

1166 if self._hidden is None: 1166 ↛ 1167line 1166 didn't jump to line 1167, because the condition on line 1166 was never true

1167 self._loadInfoHidden() 

1168 return self._hidden 

1169 

1170 def _loadInfoHidden(self) -> None: 

1171 hidden = self.cowutil.getHidden(self.path) 

1172 self._hidden = (hidden != 0) 

1173 

1174 def _setHidden(self, hidden=True) -> None: 

1175 self._hidden = None 

1176 self.cowutil.setHidden(self.path, hidden) 

1177 self._hidden = hidden 

1178 

1179 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1180 """ensure the virtual size of 'self' is at least 'size'. Note that 

1181 resizing a COW image must always be offline and atomically: the file must 

1182 not be open by anyone and no concurrent operations may take place. 

1183 Thus we use the Agent API call for performing paused atomic 

1184 operations. If the caller is already in the atomic context, it must 

1185 call with atomic = False""" 

1186 if self.sizeVirt >= size: 1186 ↛ 1188line 1186 didn't jump to line 1188, because the condition on line 1186 was never false

1187 return 

1188 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1189 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1190 

1191 msize = self.cowutil.getMaxResizeSize(self.path) 

1192 if (size <= msize): 

1193 self.cowutil.setSizeVirtFast(self.path, size) 

1194 else: 

1195 if atomic: 

1196 vdiList = self._getAllSubtree() 

1197 self.sr.lock() 

1198 try: 

1199 self.sr.pauseVDIs(vdiList) 

1200 try: 

1201 self._setSizeVirt(size) 

1202 finally: 

1203 self.sr.unpauseVDIs(vdiList) 

1204 finally: 

1205 self.sr.unlock() 

1206 else: 

1207 self._setSizeVirt(size) 

1208 

1209 self.sizeVirt = self.cowutil.getSizeVirt(self.path) 

1210 

1211 def _setSizeVirt(self, size) -> None: 

1212 """WARNING: do not call this method directly unless all VDIs in the 

1213 subtree are guaranteed to be unplugged (and remain so for the duration 

1214 of the operation): this operation is only safe for offline COW images""" 

1215 jFile = os.path.join(self.sr.path, self.uuid) 

1216 self.cowutil.setSizeVirt(self.path, size, jFile) 

1217 

1218 def _queryCowBlocks(self) -> bytes: 

1219 return self.cowutil.getBlockBitmap(self.path) 

1220 

1221 def _getCoalescedSizeData(self): 

1222 """Get the data size of the resulting image if we coalesce self onto 

1223 parent. We calculate the actual size by using the image block allocation 

1224 information (as opposed to just adding up the two image sizes to get an 

1225 upper bound)""" 

1226 # make sure we don't use stale BAT info from vdi_rec since the child 

1227 # was writable all this time 

1228 self.delConfig(VDI.DB_VDI_BLOCKS) 

1229 blocksChild = self.getVDIBlocks() 

1230 blocksParent = self.parent.getVDIBlocks() 

1231 numBlocks = Util.countBits(blocksChild, blocksParent) 

1232 Util.log("Num combined blocks = %d" % numBlocks) 

1233 sizeData = numBlocks * self.cowutil.getBlockSize(self.path) 

1234 assert(sizeData <= self.sizeVirt) 

1235 return sizeData 

1236 

1237 def _calcExtraSpaceForCoalescing(self) -> int: 

1238 sizeData = self._getCoalescedSizeData() 

1239 sizeCoalesced = sizeData + self.cowutil.calcOverheadBitmap(sizeData) + \ 

1240 self.cowutil.calcOverheadEmpty(self.sizeVirt) 

1241 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1242 return sizeCoalesced - self.parent.getSizePhys() 

1243 

1244 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1245 """How much extra space in the SR will be required to 

1246 [live-]leaf-coalesce this VDI""" 

1247 # the space requirements are the same as for inline coalesce 

1248 return self._calcExtraSpaceForCoalescing() 

1249 

1250 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1251 """How much extra space in the SR will be required to 

1252 snapshot-coalesce this VDI""" 

1253 return self._calcExtraSpaceForCoalescing() + \ 

1254 self.cowutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1255 

1256 def _getAllSubtree(self): 

1257 """Get self and all VDIs in the subtree of self as a flat list""" 

1258 vdiList = [self] 

1259 for child in self.children: 

1260 vdiList.extend(child._getAllSubtree()) 

1261 return vdiList 

1262 

1263 

1264class FileVDI(VDI): 

1265 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1266 

1267 @override 

1268 @staticmethod 

1269 def extractUuid(path): 

1270 fileName = os.path.basename(path) 

1271 return os.path.splitext(fileName)[0] 

1272 

1273 def __init__(self, sr, uuid, vdi_type): 

1274 VDI.__init__(self, sr, uuid, vdi_type) 

1275 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1276 

1277 @override 

1278 def load(self, info=None) -> None: 

1279 if not info: 

1280 if not util.pathexists(self.path): 

1281 raise util.SMException("%s not found" % self.path) 

1282 try: 

1283 info = self.cowutil.getInfo(self.path, self.extractUuid) 

1284 except util.SMException: 

1285 Util.log(" [VDI %s: failed to read COW image metadata]" % self.uuid) 

1286 return 

1287 self.parent = None 

1288 self.children = [] 

1289 self.parentUuid = info.parentUuid 

1290 self.sizeVirt = info.sizeVirt 

1291 self._sizePhys = info.sizePhys 

1292 self._sizeAllocated = info.sizeAllocated 

1293 self._hidden = info.hidden 

1294 self.scanError = False 

1295 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1296 (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])) 

1297 

1298 @override 

1299 def rename(self, uuid) -> None: 

1300 oldPath = self.path 

1301 VDI.rename(self, uuid) 

1302 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1303 self.path = os.path.join(self.sr.path, self.fileName) 

1304 assert(not util.pathexists(self.path)) 

1305 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1306 os.rename(oldPath, self.path) 

1307 

1308 @override 

1309 def delete(self) -> None: 

1310 if len(self.children) > 0: 1310 ↛ 1311line 1310 didn't jump to line 1311, because the condition on line 1310 was never true

1311 raise util.SMException("VDI %s has children, can't delete" % \ 

1312 self.uuid) 

1313 try: 

1314 self.sr.lock() 

1315 try: 

1316 os.unlink(self.path) 

1317 self.sr.forgetVDI(self.uuid) 

1318 finally: 

1319 self.sr.unlock() 

1320 except OSError: 

1321 raise util.SMException("os.unlink(%s) failed" % self.path) 

1322 VDI.delete(self) 

1323 

1324 @override 

1325 def getAllocatedSize(self) -> int: 

1326 if self._sizeAllocated == -1: 1326 ↛ 1327line 1326 didn't jump to line 1327, because the condition on line 1326 was never true

1327 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1328 return self._sizeAllocated 

1329 

1330 

1331class LVMVDI(VDI): 

1332 """Object representing a VDI in an LVM SR""" 

1333 

1334 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1335 

1336 @override 

1337 def load(self, info=None) -> None: 

1338 # `info` is always set. `None` default value is only here to match parent method. 

1339 assert info, "No info given to LVMVDI.load" 

1340 self.parent = None 

1341 self.children = [] 

1342 self._sizePhys = -1 

1343 self._sizeAllocated = -1 

1344 self.scanError = info.scanError 

1345 self.sizeLV = info.sizeLV 

1346 self.sizeVirt = info.sizeVirt 

1347 self.fileName = info.lvName 

1348 self.lvActive = info.lvActive 

1349 self.lvOpen = info.lvOpen 

1350 self.lvReadonly = info.lvReadonly 

1351 self._hidden = info.hidden 

1352 self.parentUuid = info.parentUuid 

1353 self.path = os.path.join(self.sr.path, self.fileName) 

1354 self.lvmcowutil = LvmCowUtil(self.cowutil) 

1355 

1356 @override 

1357 @staticmethod 

1358 def extractUuid(path): 

1359 return LvmCowUtil.extractUuid(path) 

1360 

1361 def inflate(self, size): 

1362 """inflate the LV containing the COW image to 'size'""" 

1363 if not VdiType.isCowImage(self.vdi_type): 

1364 return 

1365 self._activate() 

1366 self.sr.lock() 

1367 try: 

1368 self.lvmcowutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size) 

1369 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1370 finally: 

1371 self.sr.unlock() 

1372 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1373 self._sizePhys = -1 

1374 self._sizeAllocated = -1 

1375 

1376 def deflate(self): 

1377 """deflate the LV containing the image to minimum""" 

1378 if not VdiType.isCowImage(self.vdi_type): 

1379 return 

1380 self._activate() 

1381 self.sr.lock() 

1382 try: 

1383 self.lvmcowutil.deflate(self.sr.lvmCache, self.fileName, self.getSizePhys()) 

1384 finally: 

1385 self.sr.unlock() 

1386 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1387 self._sizePhys = -1 

1388 self._sizeAllocated = -1 

1389 

1390 def inflateFully(self): 

1391 self.inflate(self.lvmcowutil.calcVolumeSize(self.sizeVirt)) 

1392 

1393 def inflateParentForCoalesce(self): 

1394 """Inflate the parent only as much as needed for the purposes of 

1395 coalescing""" 

1396 if not VdiType.isCowImage(self.parent.vdi_type): 

1397 return 

1398 inc = self._calcExtraSpaceForCoalescing() 

1399 if inc > 0: 

1400 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1401 self.parent.inflate(self.parent.sizeLV + inc) 

1402 

1403 @override 

1404 def updateBlockInfo(self) -> Optional[str]: 

1405 if VdiType.isCowImage(self.vdi_type): 

1406 return VDI.updateBlockInfo(self) 

1407 return None 

1408 

1409 @override 

1410 def rename(self, uuid) -> None: 

1411 oldUuid = self.uuid 

1412 oldLVName = self.fileName 

1413 VDI.rename(self, uuid) 

1414 self.fileName = LV_PREFIX[self.vdi_type] + self.uuid 

1415 self.path = os.path.join(self.sr.path, self.fileName) 

1416 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1417 

1418 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1419 if self.sr.lvActivator.get(oldUuid, False): 

1420 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1421 

1422 ns = NS_PREFIX_LVM + self.sr.uuid 

1423 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1424 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1425 RefCounter.reset(oldUuid, ns) 

1426 

1427 @override 

1428 def delete(self) -> None: 

1429 if len(self.children) > 0: 

1430 raise util.SMException("VDI %s has children, can't delete" % \ 

1431 self.uuid) 

1432 self.sr.lock() 

1433 try: 

1434 self.sr.lvmCache.remove(self.fileName) 

1435 self.sr.forgetVDI(self.uuid) 

1436 finally: 

1437 self.sr.unlock() 

1438 RefCounter.reset(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

1439 VDI.delete(self) 

1440 

1441 @override 

1442 def getSizePhys(self) -> int: 

1443 if self._sizePhys == -1: 

1444 self._loadInfoSizePhys() 

1445 return self._sizePhys 

1446 

1447 def _loadInfoSizePhys(self): 

1448 """Get the physical utilization of the COW image file. We do it individually 

1449 (and not using the COW batch scanner) as an optimization: this info is 

1450 relatively expensive and we need it only for VDI's involved in 

1451 coalescing.""" 

1452 if not VdiType.isCowImage(self.vdi_type): 

1453 return 

1454 self._activate() 

1455 self._sizePhys = self.cowutil.getSizePhys(self.path) 

1456 if self._sizePhys <= 0: 

1457 raise util.SMException("phys size of %s = %d" % \ 

1458 (self, self._sizePhys)) 

1459 

1460 @override 

1461 def getAllocatedSize(self) -> int: 

1462 if self._sizeAllocated == -1: 

1463 self._loadInfoSizeAllocated() 

1464 return self._sizeAllocated 

1465 

1466 def _loadInfoSizeAllocated(self): 

1467 """ 

1468 Get the allocated size of the COW volume. 

1469 """ 

1470 if not VdiType.isCowImage(self.vdi_type): 

1471 return 

1472 self._activate() 

1473 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1474 

1475 @override 

1476 def _loadInfoHidden(self) -> None: 

1477 if not VdiType.isCowImage(self.vdi_type): 

1478 self._hidden = self.sr.lvmCache.getHidden(self.fileName) 

1479 else: 

1480 VDI._loadInfoHidden(self) 

1481 

1482 @override 

1483 def _setHidden(self, hidden=True) -> None: 

1484 if not VdiType.isCowImage(self.vdi_type): 

1485 self._hidden = None 

1486 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1487 self._hidden = hidden 

1488 else: 

1489 VDI._setHidden(self, hidden) 

1490 

1491 @override 

1492 def __str__(self) -> str: 

1493 strType = self.vdi_type 

1494 if self.vdi_type == VdiType.RAW: 

1495 strType = "RAW" 

1496 strHidden = "" 

1497 if self.isHidden(): 

1498 strHidden = "*" 

1499 strSizePhys = "" 

1500 if self._sizePhys > 0: 

1501 strSizePhys = Util.num2str(self._sizePhys) 

1502 strSizeAllocated = "" 

1503 if self._sizeAllocated >= 0: 

1504 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1505 strActive = "n" 

1506 if self.lvActive: 

1507 strActive = "a" 

1508 if self.lvOpen: 

1509 strActive += "o" 

1510 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1511 Util.num2str(self.sizeVirt), strSizePhys, strSizeAllocated, 

1512 Util.num2str(self.sizeLV), strActive) 

1513 

1514 @override 

1515 def validate(self, fast=False) -> None: 

1516 if VdiType.isCowImage(self.vdi_type): 

1517 VDI.validate(self, fast) 

1518 

1519 def _setChainRw(self) -> List[str]: 

1520 """ 

1521 Set the readonly LV and children writable. 

1522 It's needed because the coalesce can be done by tapdisk directly 

1523 and it will need to write parent information for children. 

1524 The VDI we want to coalesce into it's parent need to be writable for libqcow coalesce part. 

1525 Return a list of the LV that were previously readonly to be made RO again after the coalesce. 

1526 """ 

1527 was_ro = [] 

1528 if self.lvReadonly: 

1529 self.sr.lvmCache.setReadonly(self.fileName, False) 

1530 was_ro.append(self.fileName) 

1531 

1532 for child in self.children: 

1533 if child.lvReadonly: 

1534 self.sr.lvmCache.setReadonly(child.fileName, False) 

1535 was_ro.append(child.fileName) 

1536 

1537 return was_ro 

1538 

1539 def _setChainRo(self, was_ro: List[str]) -> None: 

1540 """Set the list of LV in parameters to readonly""" 

1541 for lvName in was_ro: 

1542 self.sr.lvmCache.setReadonly(lvName, True) 

1543 

1544 @override 

1545 def _doCoalesce(self) -> None: 

1546 """LVMVDI parents must first be activated, inflated, and made writable""" 

1547 was_ro = [] 

1548 try: 

1549 self._activateChain() 

1550 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1551 self.parent.validate() 

1552 self.inflateParentForCoalesce() 

1553 was_ro = self._setChainRw() 

1554 VDI._doCoalesce(self) 

1555 finally: 

1556 self.parent._loadInfoSizePhys() 

1557 self.parent.deflate() 

1558 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1559 self._setChainRo(was_ro) 

1560 

1561 @override 

1562 def _setParent(self, parent) -> None: 

1563 self._activate() 

1564 if self.lvReadonly: 

1565 self.sr.lvmCache.setReadonly(self.fileName, False) 

1566 

1567 try: 

1568 self.cowutil.setParent(self.path, parent.path, parent.vdi_type == VdiType.RAW) 

1569 finally: 

1570 if self.lvReadonly: 

1571 self.sr.lvmCache.setReadonly(self.fileName, True) 

1572 self._deactivate() 

1573 self.parent = parent 

1574 self.parentUuid = parent.uuid 

1575 parent.children.append(self) 

1576 try: 

1577 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1578 Util.log("Updated the VDI-parent field for child %s with %s" % \ 

1579 (self.uuid, self.parentUuid)) 

1580 except: 

1581 Util.log("Failed to update the VDI-parent with %s for child %s" % \ 

1582 (self.parentUuid, self.uuid)) 

1583 

1584 def _activate(self): 

1585 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1586 

1587 def _activateChain(self): 

1588 vdi = self 

1589 while vdi: 

1590 vdi._activate() 

1591 vdi = vdi.parent 

1592 

1593 def _deactivate(self): 

1594 self.sr.lvActivator.deactivate(self.uuid, False) 

1595 

1596 @override 

1597 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1598 "ensure the virtual size of 'self' is at least 'size'" 

1599 self._activate() 

1600 if VdiType.isCowImage(self.vdi_type): 

1601 VDI._increaseSizeVirt(self, size, atomic) 

1602 return 

1603 

1604 # raw VDI case 

1605 offset = self.sizeLV 

1606 if self.sizeVirt < size: 

1607 oldSize = self.sizeLV 

1608 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1609 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1610 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1611 offset = oldSize 

1612 unfinishedZero = False 

1613 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1614 if jval: 

1615 unfinishedZero = True 

1616 offset = int(jval) 

1617 length = self.sizeLV - offset 

1618 if not length: 

1619 return 

1620 

1621 if unfinishedZero: 

1622 Util.log(" ==> Redoing unfinished zeroing out") 

1623 else: 

1624 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1625 str(offset)) 

1626 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1627 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1628 func = lambda: util.zeroOut(self.path, offset, length) 

1629 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1630 VDI.POLL_INTERVAL, 0) 

1631 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1632 

1633 @override 

1634 def _setSizeVirt(self, size) -> None: 

1635 """WARNING: do not call this method directly unless all VDIs in the 

1636 subtree are guaranteed to be unplugged (and remain so for the duration 

1637 of the operation): this operation is only safe for offline COW images.""" 

1638 self._activate() 

1639 jFile = self.lvmcowutil.createResizeJournal(self.sr.lvmCache, self.uuid) 

1640 try: 

1641 self.lvmcowutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size, jFile) 

1642 finally: 

1643 self.lvmcowutil.destroyResizeJournal(self.sr.lvmCache, self.uuid) 

1644 

1645 @override 

1646 def _queryCowBlocks(self) -> bytes: 

1647 self._activate() 

1648 return VDI._queryCowBlocks(self) 

1649 

1650 @override 

1651 def _calcExtraSpaceForCoalescing(self) -> int: 

1652 if not VdiType.isCowImage(self.parent.vdi_type): 

1653 return 0 # raw parents are never deflated in the first place 

1654 sizeCoalesced = self.lvmcowutil.calcVolumeSize(self._getCoalescedSizeData()) 

1655 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1656 return sizeCoalesced - self.parent.sizeLV 

1657 

1658 @override 

1659 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1660 """How much extra space in the SR will be required to 

1661 [live-]leaf-coalesce this VDI""" 

1662 # we can deflate the leaf to minimize the space requirements 

1663 deflateDiff = self.sizeLV - lvutil.calcSizeLV(self.getSizePhys()) 

1664 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1665 

1666 @override 

1667 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1668 return self._calcExtraSpaceForCoalescing() + \ 

1669 lvutil.calcSizeLV(self.getSizePhys()) 

1670 

1671 

1672class LinstorVDI(VDI): 

1673 """Object representing a VDI in a LINSTOR SR""" 

1674 

1675 VOLUME_LOCK_TIMEOUT = 30 

1676 

1677 @override 

1678 def load(self, info=None) -> None: 

1679 self.parentUuid = info.parentUuid 

1680 self.scanError = True 

1681 self.parent = None 

1682 self.children = [] 

1683 

1684 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1685 self.path = self.sr._linstor.build_device_path(self.fileName) 

1686 self.linstorcowutil = LinstorCowUtil(self.sr.xapi.session, self.sr._linstor, info.vdiType) 

1687 

1688 if not info: 

1689 try: 

1690 info = self.linstorcowutil.get_info(self.uuid) 

1691 except util.SMException: 

1692 Util.log( 

1693 ' [VDI {}: failed to read COW image metadata]'.format(self.uuid) 

1694 ) 

1695 return 

1696 

1697 self.parentUuid = info.parentUuid 

1698 self.sizeVirt = info.sizeVirt 

1699 self._sizePhys = -1 

1700 self._sizeAllocated = -1 

1701 self.drbd_size = -1 

1702 self._hidden = info.hidden 

1703 self.scanError = False 

1704 

1705 @override 

1706 def getSizePhys(self, fetch=False) -> int: 

1707 if self._sizePhys < 0 or fetch: 

1708 self._sizePhys = self.linstorcowutil.get_size_phys(self.uuid) 

1709 return self._sizePhys 

1710 

1711 def getDrbdSize(self, fetch=False): 

1712 if self.drbd_size < 0 or fetch: 

1713 self.drbd_size = self.linstorcowutil.get_drbd_size(self.uuid) 

1714 return self.drbd_size 

1715 

1716 @override 

1717 def getAllocatedSize(self) -> int: 

1718 if self._sizeAllocated == -1: 

1719 if VdiType.isCowImage(self.vdi_type): 

1720 self._sizeAllocated = self.linstorcowutil.get_allocated_size(self.uuid) 

1721 return self._sizeAllocated 

1722 

1723 def inflate(self, size): 

1724 if not VdiType.isCowImage(self.vdi_type): 

1725 return 

1726 self.sr.lock() 

1727 try: 

1728 # Ensure we use the real DRBD size and not the cached one. 

1729 # Why? Because this attribute can be changed if volume is resized by user. 

1730 self.drbd_size = self.getDrbdSize(fetch=True) 

1731 self.linstorcowutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1732 finally: 

1733 self.sr.unlock() 

1734 self.drbd_size = -1 

1735 self._sizePhys = -1 

1736 self._sizeAllocated = -1 

1737 

1738 def deflate(self): 

1739 if not VdiType.isCowImage(self.vdi_type): 

1740 return 

1741 self.sr.lock() 

1742 try: 

1743 # Ensure we use the real sizes and not the cached info. 

1744 self.drbd_size = self.getDrbdSize(fetch=True) 

1745 self._sizePhys = self.getSizePhys(fetch=True) 

1746 self.linstorcowutil.force_deflate(self.path, self._sizePhys, self.drbd_size, zeroize=False) 

1747 finally: 

1748 self.sr.unlock() 

1749 self.drbd_size = -1 

1750 self._sizePhys = -1 

1751 self._sizeAllocated = -1 

1752 

1753 def inflateFully(self): 

1754 if VdiType.isCowImage(self.vdi_type): 

1755 self.inflate(self.linstorcowutil.compute_volume_size(self.sizeVirt)) 

1756 

1757 @override 

1758 def rename(self, uuid) -> None: 

1759 Util.log('Renaming {} -> {} (path={})'.format( 

1760 self.uuid, uuid, self.path 

1761 )) 

1762 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1763 VDI.rename(self, uuid) 

1764 

1765 @override 

1766 def delete(self) -> None: 

1767 if len(self.children) > 0: 

1768 raise util.SMException( 

1769 'VDI {} has children, can\'t delete'.format(self.uuid) 

1770 ) 

1771 self.sr.lock() 

1772 try: 

1773 self.sr._linstor.destroy_volume(self.uuid) 

1774 self.sr.forgetVDI(self.uuid) 

1775 finally: 

1776 self.sr.unlock() 

1777 VDI.delete(self) 

1778 

1779 @override 

1780 def validate(self, fast=False) -> None: 

1781 if VdiType.isCowImage(self.vdi_type) and self.linstorcowutil.check(self.uuid, fast=fast) != CowUtil.CheckResult.Success: 

1782 raise util.SMException('COW image {} corrupted'.format(self)) 

1783 

1784 @override 

1785 def pause(self, failfast=False) -> None: 

1786 self.sr._linstor.ensure_volume_is_not_locked( 

1787 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1788 ) 

1789 return super(LinstorVDI, self).pause(failfast) 

1790 

1791 @override 

1792 def coalesce(self) -> int: 

1793 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1794 # Using another exception we can't execute the next coalesce calls. 

1795 return self.linstorcowutil.force_coalesce(self.path) 

1796 

1797 @override 

1798 def getParent(self) -> str: 

1799 return self.linstorcowutil.get_parent( 

1800 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1801 ) 

1802 

1803 @override 

1804 def repair(self, parent_uuid) -> None: 

1805 self.linstorcowutil.force_repair( 

1806 self.sr._linstor.get_device_path(parent_uuid) 

1807 ) 

1808 

1809 @override 

1810 def _relinkSkip(self) -> None: 

1811 abortFlag = IPCFlag(self.sr.uuid) 

1812 for child in self.children: 

1813 if abortFlag.test(FLAG_TYPE_ABORT): 

1814 raise AbortException('Aborting due to signal') 

1815 Util.log( 

1816 ' Relinking {} from {} to {}'.format( 

1817 child, self, self.parent 

1818 ) 

1819 ) 

1820 

1821 session = child.sr.xapi.session 

1822 sr_uuid = child.sr.uuid 

1823 vdi_uuid = child.uuid 

1824 try: 

1825 self.sr._linstor.ensure_volume_is_not_locked( 

1826 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1827 ) 

1828 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1829 child._setParent(self.parent) 

1830 finally: 

1831 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1832 self.children = [] 

1833 

1834 @override 

1835 def _setParent(self, parent) -> None: 

1836 self.sr._linstor.get_device_path(self.uuid) 

1837 self.linstorcowutil.force_parent(self.path, parent.path) 

1838 self.parent = parent 

1839 self.parentUuid = parent.uuid 

1840 parent.children.append(self) 

1841 try: 

1842 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1843 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1844 (self.uuid, self.parentUuid)) 

1845 except: 

1846 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1847 (self.uuid, self.parentUuid)) 

1848 

1849 @override 

1850 def _doCoalesce(self) -> None: 

1851 try: 

1852 self._activateChain() 

1853 self.parent.validate() 

1854 self._inflateParentForCoalesce() 

1855 VDI._doCoalesce(self) 

1856 finally: 

1857 self.parent.deflate() 

1858 

1859 def _activateChain(self): 

1860 vdi = self 

1861 while vdi: 

1862 try: 

1863 p = self.sr._linstor.get_device_path(vdi.uuid) 

1864 except Exception as e: 

1865 # Use SMException to skip coalesce. 

1866 # Otherwise the GC is stopped... 

1867 raise util.SMException(str(e)) 

1868 vdi = vdi.parent 

1869 

1870 @override 

1871 def _setHidden(self, hidden=True) -> None: 

1872 HIDDEN_TAG = 'hidden' 

1873 

1874 if not VdiType.isCowImage(self.vdi_type): 

1875 self._hidden = None 

1876 self.sr._linstor.update_volume_metadata(self.uuid, { 

1877 HIDDEN_TAG: hidden 

1878 }) 

1879 self._hidden = hidden 

1880 else: 

1881 VDI._setHidden(self, hidden) 

1882 

1883 @override 

1884 def _increaseSizeVirt(self, size, atomic=True): 

1885 if self.vdi_type == VdiType.RAW: 

1886 offset = self.drbd_size 

1887 if self.sizeVirt < size: 

1888 oldSize = self.drbd_size 

1889 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) 

1890 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) 

1891 self.sr._linstor.resize_volume(self.uuid, self.drbd_size) 

1892 offset = oldSize 

1893 unfinishedZero = False 

1894 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) 

1895 if jval: 

1896 unfinishedZero = True 

1897 offset = int(jval) 

1898 length = self.drbd_size - offset 

1899 if not length: 

1900 return 

1901 

1902 if unfinishedZero: 

1903 Util.log(" ==> Redoing unfinished zeroing out") 

1904 else: 

1905 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) 

1906 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1907 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1908 func = lambda: util.zeroOut(self.path, offset, length) 

1909 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

1910 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) 

1911 return 

1912 

1913 if self.sizeVirt >= size: 

1914 return 

1915 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1916 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1917 

1918 msize = self.linstorcowutil.get_max_resize_size(self.uuid) * 1024 * 1024 

1919 if (size <= msize): 

1920 self.linstorcowutil.set_size_virt_fast(self.path, size) 

1921 else: 

1922 if atomic: 

1923 vdiList = self._getAllSubtree() 

1924 self.sr.lock() 

1925 try: 

1926 self.sr.pauseVDIs(vdiList) 

1927 try: 

1928 self._setSizeVirt(size) 

1929 finally: 

1930 self.sr.unpauseVDIs(vdiList) 

1931 finally: 

1932 self.sr.unlock() 

1933 else: 

1934 self._setSizeVirt(size) 

1935 

1936 self.sizeVirt = self.linstorcowutil.get_size_virt(self.uuid) 

1937 

1938 @override 

1939 def _setSizeVirt(self, size) -> None: 

1940 jfile = self.uuid + '-jvhd' 

1941 self.sr._linstor.create_volume( 

1942 jfile, self.cowutil.getResizeJournalSize(), persistent=False, volume_name=jfile 

1943 ) 

1944 try: 

1945 self.inflate(self.linstorcowutil.compute_volume_size(size)) 

1946 self.linstorcowutil.set_size_virt(self.path, size, jfile) 

1947 finally: 

1948 try: 

1949 self.sr._linstor.destroy_volume(jfile) 

1950 except Exception: 

1951 # We can ignore it, in any case this volume is not persistent. 

1952 pass 

1953 

1954 @override 

1955 def _queryCowBlocks(self) -> bytes: 

1956 return self.linstorcowutil.get_block_bitmap(self.uuid) 

1957 

1958 def _inflateParentForCoalesce(self): 

1959 if not VdiType.isCowImage(self.parent.vdi_type): 

1960 return 

1961 inc = self._calcExtraSpaceForCoalescing() 

1962 if inc > 0: 

1963 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1964 

1965 @override 

1966 def _calcExtraSpaceForCoalescing(self) -> int: 

1967 if not VdiType.isCowImage(self.parent.vdi_type): 

1968 return 0 

1969 size_coalesced = self.linstorcowutil.compute_volume_size(self._getCoalescedSizeData()) 

1970 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1971 return size_coalesced - self.parent.getDrbdSize() 

1972 

1973 @override 

1974 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1975 assert self.getDrbdSize() > 0 

1976 assert self.getSizePhys() > 0 

1977 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1978 assert deflate_diff >= 0 

1979 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1980 

1981 @override 

1982 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1983 assert self.getSizePhys() > 0 

1984 return self._calcExtraSpaceForCoalescing() + \ 

1985 LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1986 

1987################################################################################ 

1988# 

1989# SR 

1990# 

1991class SR(object): 

1992 class LogFilter: 

1993 def __init__(self, sr): 

1994 self.sr = sr 

1995 self.stateLogged = False 

1996 self.prevState = {} 

1997 self.currState = {} 

1998 

1999 def logState(self): 

2000 changes = "" 

2001 self.currState.clear() 

2002 for vdi in self.sr.vdiTrees: 

2003 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

2004 if not self.prevState.get(vdi.uuid) or \ 

2005 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

2006 changes += self.currState[vdi.uuid] 

2007 

2008 for uuid in self.prevState: 

2009 if not self.currState.get(uuid): 

2010 changes += "Tree %s gone\n" % uuid 

2011 

2012 result = "SR %s (%d VDIs in %d COW trees): " % \ 

2013 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

2014 

2015 if len(changes) > 0: 

2016 if self.stateLogged: 

2017 result += "showing only COW trees that changed:" 

2018 result += "\n%s" % changes 

2019 else: 

2020 result += "no changes" 

2021 

2022 for line in result.split("\n"): 

2023 Util.log("%s" % line) 

2024 self.prevState.clear() 

2025 for key, val in self.currState.items(): 

2026 self.prevState[key] = val 

2027 self.stateLogged = True 

2028 

2029 def logNewVDI(self, uuid): 

2030 if self.stateLogged: 

2031 Util.log("Found new VDI when scanning: %s" % uuid) 

2032 

2033 def _getTreeStr(self, vdi, indent=8): 

2034 treeStr = "%s%s\n" % (" " * indent, vdi) 

2035 for child in vdi.children: 

2036 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

2037 return treeStr 

2038 

2039 TYPE_FILE = "file" 

2040 TYPE_LVHD = "lvhd" 

2041 TYPE_LINSTOR = "linstor" 

2042 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

2043 

2044 LOCK_RETRY_INTERVAL = 3 

2045 LOCK_RETRY_ATTEMPTS = 20 

2046 LOCK_RETRY_ATTEMPTS_LOCK = 100 

2047 

2048 SCAN_RETRY_ATTEMPTS = 3 

2049 

2050 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

2051 TMP_RENAME_PREFIX = "OLD_" 

2052 

2053 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

2054 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

2055 

2056 @staticmethod 

2057 def getInstance(uuid, xapiSession, createLock=True, force=False): 

2058 xapi = XAPI(xapiSession, uuid) 

2059 type = normalizeType(xapi.srRecord["type"]) 

2060 if type == SR.TYPE_FILE: 

2061 return FileSR(uuid, xapi, createLock, force) 

2062 elif type == SR.TYPE_LVHD: 

2063 return LVMSR(uuid, xapi, createLock, force) 

2064 elif type == SR.TYPE_LINSTOR: 

2065 return LinstorSR(uuid, xapi, createLock, force) 

2066 raise util.SMException("SR type %s not recognized" % type) 

2067 

2068 def __init__(self, uuid, xapi, createLock, force): 

2069 self.logFilter = self.LogFilter(self) 

2070 self.uuid = uuid 

2071 self.path = "" 

2072 self.name = "" 

2073 self.vdis = {} 

2074 self.vdiTrees = [] 

2075 self.journaler = None 

2076 self.xapi = xapi 

2077 self._locked = 0 

2078 self._srLock = None 

2079 if createLock: 2079 ↛ 2080line 2079 didn't jump to line 2080, because the condition on line 2079 was never true

2080 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, self.uuid) 

2081 else: 

2082 Util.log("Requested no SR locking") 

2083 self.name = self.xapi.srRecord["name_label"] 

2084 self._failedCoalesceTargets = [] 

2085 

2086 if not self.xapi.isPluggedHere(): 

2087 if force: 2087 ↛ 2088line 2087 didn't jump to line 2088, because the condition on line 2087 was never true

2088 Util.log("SR %s not attached on this host, ignoring" % uuid) 

2089 else: 

2090 if not self.wait_for_plug(): 

2091 raise util.SMException("SR %s not attached on this host" % uuid) 

2092 

2093 if force: 2093 ↛ 2094line 2093 didn't jump to line 2094, because the condition on line 2093 was never true

2094 Util.log("Not checking if we are Master (SR %s)" % uuid) 

2095 elif not self.xapi.isMaster(): 2095 ↛ 2096line 2095 didn't jump to line 2096, because the condition on line 2095 was never true

2096 raise util.SMException("This host is NOT master, will not run") 

2097 

2098 self.no_space_candidates = {} 

2099 

2100 def msg_cleared(self, xapi_session, msg_ref): 

2101 try: 

2102 msg = xapi_session.xenapi.message.get_record(msg_ref) 

2103 except XenAPI.Failure: 

2104 return True 

2105 

2106 return msg is None 

2107 

2108 def check_no_space_candidates(self): 

2109 xapi_session = self.xapi.getSession() 

2110 

2111 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE) 

2112 if self.no_space_candidates: 

2113 if msg_id is None or self.msg_cleared(xapi_session, msg_id): 

2114 util.SMlog("Could not coalesce due to a lack of space " 

2115 f"in SR {self.uuid}") 

2116 msg_body = ("Unable to perform data coalesce due to a lack " 

2117 f"of space in SR {self.uuid}") 

2118 msg_id = xapi_session.xenapi.message.create( 

2119 'SM_GC_NO_SPACE', 

2120 3, 

2121 "SR", 

2122 self.uuid, 

2123 msg_body) 

2124 xapi_session.xenapi.SR.remove_from_sm_config( 

2125 self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2126 xapi_session.xenapi.SR.add_to_sm_config( 

2127 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id) 

2128 

2129 for candidate in self.no_space_candidates.values(): 

2130 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id) 

2131 elif msg_id is not None: 

2132 # Everything was coalescable, remove the message 

2133 xapi_session.xenapi.message.destroy(msg_id) 

2134 

2135 def clear_no_space_msg(self, vdi): 

2136 msg_id = None 

2137 try: 

2138 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE) 

2139 except XenAPI.Failure: 

2140 pass 

2141 

2142 self.no_space_candidates.pop(vdi.uuid, None) 

2143 if msg_id is not None: 2143 ↛ exitline 2143 didn't return from function 'clear_no_space_msg', because the condition on line 2143 was never false

2144 vdi.delConfig(VDI.DB_GC_NO_SPACE) 

2145 

2146 

2147 def wait_for_plug(self): 

2148 for _ in range(1, 10): 

2149 time.sleep(2) 

2150 if self.xapi.isPluggedHere(): 

2151 return True 

2152 return False 

2153 

2154 def gcEnabled(self, refresh=True): 

2155 if refresh: 

2156 self.xapi.srRecord = \ 

2157 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

2158 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

2159 Util.log("GC is disabled for this SR, abort") 

2160 return False 

2161 return True 

2162 

2163 def scan(self, force=False) -> None: 

2164 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

2165 update VDI objects if they already exist""" 

2166 pass 

2167 

2168 def scanLocked(self, force=False): 

2169 self.lock() 

2170 try: 

2171 self.scan(force) 

2172 finally: 

2173 self.unlock() 

2174 

2175 def getVDI(self, uuid): 

2176 return self.vdis.get(uuid) 

2177 

2178 def hasWork(self): 

2179 if len(self.findGarbage()) > 0: 

2180 return True 

2181 if self.findCoalesceable(): 

2182 return True 

2183 if self.findLeafCoalesceable(): 

2184 return True 

2185 if self.needUpdateBlockInfo(): 

2186 return True 

2187 return False 

2188 

2189 def findCoalesceable(self): 

2190 """Find a coalesceable VDI. Return a vdi that should be coalesced 

2191 (choosing one among all coalesceable candidates according to some 

2192 criteria) or None if there is no VDI that could be coalesced""" 

2193 

2194 candidates = [] 

2195 

2196 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

2197 if srSwitch == "false": 

2198 Util.log("Coalesce disabled for this SR") 

2199 return candidates 

2200 

2201 # finish any VDI for which a relink journal entry exists first 

2202 journals = self.journaler.getAll(VDI.JRN_RELINK) 

2203 for uuid in journals: 

2204 vdi = self.getVDI(uuid) 

2205 if vdi and vdi not in self._failedCoalesceTargets: 

2206 return vdi 

2207 

2208 for vdi in self.vdis.values(): 

2209 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

2210 candidates.append(vdi) 

2211 Util.log("%s is coalescable" % vdi.uuid) 

2212 

2213 self.xapi.update_task_progress("coalescable", len(candidates)) 

2214 

2215 # pick one in the tallest tree 

2216 treeHeight = dict() 

2217 for c in candidates: 

2218 height = c.getTreeRoot().getTreeHeight() 

2219 if treeHeight.get(height): 

2220 treeHeight[height].append(c) 

2221 else: 

2222 treeHeight[height] = [c] 

2223 

2224 freeSpace = self.getFreeSpace() 

2225 heights = list(treeHeight.keys()) 

2226 heights.sort(reverse=True) 

2227 for h in heights: 

2228 for c in treeHeight[h]: 

2229 spaceNeeded = c._calcExtraSpaceForCoalescing() 

2230 if spaceNeeded <= freeSpace: 

2231 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

2232 self.clear_no_space_msg(c) 

2233 return c 

2234 else: 

2235 self.no_space_candidates[c.uuid] = c 

2236 Util.log("No space to coalesce %s (free space: %d)" % \ 

2237 (c, freeSpace)) 

2238 return None 

2239 

2240 def getSwitch(self, key): 

2241 return self.xapi.srRecord["other_config"].get(key) 

2242 

2243 def forbiddenBySwitch(self, switch, condition, fail_msg): 

2244 srSwitch = self.getSwitch(switch) 

2245 ret = False 

2246 if srSwitch: 

2247 ret = srSwitch == condition 

2248 

2249 if ret: 

2250 Util.log(fail_msg) 

2251 

2252 return ret 

2253 

2254 def leafCoalesceForbidden(self): 

2255 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

2256 "false", 

2257 "Coalesce disabled for this SR") or 

2258 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

2259 VDI.LEAFCLSC_DISABLED, 

2260 "Leaf-coalesce disabled for this SR")) 

2261 

2262 def findLeafCoalesceable(self): 

2263 """Find leaf-coalesceable VDIs in each COW tree""" 

2264 

2265 candidates = [] 

2266 if self.leafCoalesceForbidden(): 

2267 return candidates 

2268 

2269 self.gatherLeafCoalesceable(candidates) 

2270 

2271 self.xapi.update_task_progress("coalescable", len(candidates)) 

2272 

2273 freeSpace = self.getFreeSpace() 

2274 for candidate in candidates: 

2275 # check the space constraints to see if leaf-coalesce is actually 

2276 # feasible for this candidate 

2277 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

2278 spaceNeededLive = spaceNeeded 

2279 if spaceNeeded > freeSpace: 

2280 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

2281 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

2282 spaceNeeded = spaceNeededLive 

2283 

2284 if spaceNeeded <= freeSpace: 

2285 Util.log("Leaf-coalesce candidate: %s" % candidate) 

2286 self.clear_no_space_msg(candidate) 

2287 return candidate 

2288 else: 

2289 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

2290 (candidate, freeSpace)) 

2291 if spaceNeededLive <= freeSpace: 

2292 Util.log("...but enough space if skip snap-coalesce") 

2293 candidate.setConfig(VDI.DB_LEAFCLSC, 

2294 VDI.LEAFCLSC_OFFLINE) 

2295 self.no_space_candidates[candidate.uuid] = candidate 

2296 

2297 return None 

2298 

2299 def gatherLeafCoalesceable(self, candidates): 

2300 for vdi in self.vdis.values(): 

2301 if not vdi.isLeafCoalesceable(): 

2302 continue 

2303 if vdi in self._failedCoalesceTargets: 

2304 continue 

2305 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2306 Util.log("Skipping reset-on-boot %s" % vdi) 

2307 continue 

2308 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2309 Util.log("Skipping allow_caching=true %s" % vdi) 

2310 continue 

2311 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2312 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2313 continue 

2314 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2315 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2316 continue 

2317 candidates.append(vdi) 

2318 

2319 def coalesce(self, vdi, dryRun=False): 

2320 """Coalesce vdi onto parent""" 

2321 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2322 if dryRun: 2322 ↛ 2323line 2322 didn't jump to line 2323, because the condition on line 2322 was never true

2323 return 

2324 

2325 try: 

2326 self._coalesce(vdi) 

2327 except util.SMException as e: 

2328 if isinstance(e, AbortException): 2328 ↛ 2329line 2328 didn't jump to line 2329, because the condition on line 2328 was never true

2329 self.cleanup() 

2330 raise 

2331 else: 

2332 self._failedCoalesceTargets.append(vdi) 

2333 Util.logException("coalesce") 

2334 Util.log("Coalesce failed, skipping") 

2335 self.cleanup() 

2336 

2337 def coalesceLeaf(self, vdi, dryRun=False): 

2338 """Leaf-coalesce vdi onto parent""" 

2339 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2340 if dryRun: 

2341 return 

2342 

2343 try: 

2344 uuid = vdi.uuid 

2345 try: 

2346 # "vdi" object will no longer be valid after this call 

2347 self._coalesceLeaf(vdi) 

2348 finally: 

2349 vdi = self.getVDI(uuid) 

2350 if vdi: 

2351 vdi.delConfig(vdi.DB_LEAFCLSC) 

2352 except AbortException: 

2353 self.cleanup() 

2354 raise 

2355 except (util.SMException, XenAPI.Failure) as e: 

2356 self._failedCoalesceTargets.append(vdi) 

2357 Util.logException("leaf-coalesce") 

2358 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2359 self.cleanup() 

2360 

2361 def garbageCollect(self, dryRun=False): 

2362 vdiList = self.findGarbage() 

2363 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2364 for vdi in vdiList: 

2365 Util.log(" %s" % vdi) 

2366 if not dryRun: 

2367 self.deleteVDIs(vdiList) 

2368 self.cleanupJournals(dryRun) 

2369 

2370 def findGarbage(self): 

2371 vdiList = [] 

2372 for vdi in self.vdiTrees: 

2373 vdiList.extend(vdi.getAllPrunable()) 

2374 return vdiList 

2375 

2376 def deleteVDIs(self, vdiList) -> None: 

2377 for vdi in vdiList: 

2378 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2379 raise AbortException("Aborting due to signal") 

2380 Util.log("Deleting unlinked VDI %s" % vdi) 

2381 self.deleteVDI(vdi) 

2382 

2383 def deleteVDI(self, vdi) -> None: 

2384 assert(len(vdi.children) == 0) 

2385 del self.vdis[vdi.uuid] 

2386 if vdi.parent: 2386 ↛ 2388line 2386 didn't jump to line 2388, because the condition on line 2386 was never false

2387 vdi.parent.children.remove(vdi) 

2388 if vdi in self.vdiTrees: 2388 ↛ 2389line 2388 didn't jump to line 2389, because the condition on line 2388 was never true

2389 self.vdiTrees.remove(vdi) 

2390 vdi.delete() 

2391 

2392 def forgetVDI(self, vdiUuid) -> None: 

2393 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2394 

2395 def pauseVDIs(self, vdiList) -> None: 

2396 paused = [] 

2397 failed = False 

2398 for vdi in vdiList: 

2399 try: 

2400 vdi.pause() 

2401 paused.append(vdi) 

2402 except: 

2403 Util.logException("pauseVDIs") 

2404 failed = True 

2405 break 

2406 

2407 if failed: 

2408 self.unpauseVDIs(paused) 

2409 raise util.SMException("Failed to pause VDIs") 

2410 

2411 def unpauseVDIs(self, vdiList): 

2412 failed = False 

2413 for vdi in vdiList: 

2414 try: 

2415 vdi.unpause() 

2416 except: 

2417 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2418 failed = True 

2419 if failed: 

2420 raise util.SMException("Failed to unpause VDIs") 

2421 

2422 def getFreeSpace(self) -> int: 

2423 return 0 

2424 

2425 def cleanup(self): 

2426 Util.log("In cleanup") 

2427 return 

2428 

2429 @override 

2430 def __str__(self) -> str: 

2431 if self.name: 

2432 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2433 else: 

2434 ret = "%s" % self.uuid 

2435 return ret 

2436 

2437 def lock(self): 

2438 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2439 signal to avoid deadlocking (trying to acquire the SR lock while the 

2440 lock is held by a process that is trying to abort us)""" 

2441 if not self._srLock: 

2442 return 

2443 

2444 if self._locked == 0: 

2445 abortFlag = IPCFlag(self.uuid) 

2446 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2447 if self._srLock.acquireNoblock(): 

2448 self._locked += 1 

2449 return 

2450 if abortFlag.test(FLAG_TYPE_ABORT): 

2451 raise AbortException("Abort requested") 

2452 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2453 raise util.SMException("Unable to acquire the SR lock") 

2454 

2455 self._locked += 1 

2456 

2457 def unlock(self): 

2458 if not self._srLock: 2458 ↛ 2460line 2458 didn't jump to line 2460, because the condition on line 2458 was never false

2459 return 

2460 assert(self._locked > 0) 

2461 self._locked -= 1 

2462 if self._locked == 0: 

2463 self._srLock.release() 

2464 

2465 def needUpdateBlockInfo(self) -> bool: 

2466 for vdi in self.vdis.values(): 

2467 if vdi.scanError or len(vdi.children) == 0: 

2468 continue 

2469 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2470 return True 

2471 return False 

2472 

2473 def updateBlockInfo(self) -> None: 

2474 for vdi in self.vdis.values(): 

2475 if vdi.scanError or len(vdi.children) == 0: 

2476 continue 

2477 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2478 vdi.updateBlockInfo() 

2479 

2480 def cleanupCoalesceJournals(self): 

2481 """Remove stale coalesce VDI indicators""" 

2482 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2483 for uuid, jval in entries.items(): 

2484 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2485 

2486 def cleanupJournals(self, dryRun=False): 

2487 """delete journal entries for non-existing VDIs""" 

2488 for t in [LVMVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2489 entries = self.journaler.getAll(t) 

2490 for uuid, jval in entries.items(): 

2491 if self.getVDI(uuid): 

2492 continue 

2493 if t == SR.JRN_CLONE: 

2494 baseUuid, clonUuid = jval.split("_") 

2495 if self.getVDI(baseUuid): 

2496 continue 

2497 Util.log(" Deleting stale '%s' journal entry for %s " 

2498 "(%s)" % (t, uuid, jval)) 

2499 if not dryRun: 

2500 self.journaler.remove(t, uuid) 

2501 

2502 def cleanupCache(self, maxAge=-1) -> int: 

2503 return 0 

2504 

2505 def _hasLeavesAttachedOn(self, vdi: VDI): 

2506 leaves = vdi.getAllLeaves() 

2507 leaves_vdi = [leaf.uuid for leaf in leaves] 

2508 return util.get_hosts_attached_on(self.xapi.session, leaves_vdi) 

2509 

2510 def _gc_running_file(self, vdi: VDI): 

2511 run_file = "gc_running_{}".format(vdi.uuid) 

2512 return os.path.join(NON_PERSISTENT_DIR, str(self.uuid), run_file) 

2513 

2514 def _create_running_file(self, vdi: VDI): 

2515 with open(self._gc_running_file(vdi), "w") as f: 

2516 f.write("1") 

2517 

2518 def _delete_running_file(self, vdi: VDI): 

2519 os.unlink(self._gc_running_file(vdi)) 

2520 

2521 def _coalesce(self, vdi: VDI): 

2522 list_not_to_relink = None 

2523 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2523 ↛ 2526line 2523 didn't jump to line 2526, because the condition on line 2523 was never true

2524 # this means we had done the actual coalescing already and just 

2525 # need to finish relinking and/or refreshing the children 

2526 Util.log("==> Coalesce apparently already done: skipping") 

2527 else: 

2528 # JRN_COALESCE is used to check which VDI is being coalesced in 

2529 # order to decide whether to abort the coalesce. We remove the 

2530 # journal as soon as the COW coalesce step is done, because we 

2531 # don't expect the rest of the process to take long 

2532 

2533 if os.path.exists(self._gc_running_file(vdi)): 2533 ↛ 2534line 2533 didn't jump to line 2534, because the condition on line 2533 was never true

2534 util.SMlog("gc_running already exist for {}. Ignoring...".format(self.uuid)) 

2535 

2536 self._create_running_file(vdi) 

2537 

2538 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2539 host_refs = self._hasLeavesAttachedOn(vdi) 

2540 #TODO: this check of multiple host_refs should be done earlier in `is_coalesceable` to avoid stopping this late every time 

2541 if len(host_refs) > 1: 2541 ↛ 2542line 2541 didn't jump to line 2542, because the condition on line 2541 was never true

2542 Util.log("Not coalesceable, chain activated more than once") 

2543 raise Exception("Not coalesceable, chain activated more than once") #TODO: Use correct error 

2544 

2545 try: 

2546 if host_refs and vdi.cowutil.isCoalesceableOnRemote(): 2546 ↛ 2548line 2546 didn't jump to line 2548, because the condition on line 2546 was never true

2547 #Leaf opened on another host, we need to call online coalesce 

2548 Util.log("Remote coalesce for {}".format(vdi.path)) 

2549 vdi._doCoalesceOnHost(list(host_refs)[0]) 

2550 # If we use a host OpaqueRef to do a online coalesce, this vdi will not need to be relinked since it was done by tapdisk 

2551 # If we coalesce up the chain, we shouldn't need to do the relink at all, we only need to do the relink on the children if their direct parent was the one we were coalescing 

2552 for child in vdi.children: 

2553 real_parent_uuid = child.extractUuid(child.getParent()) 

2554 if real_parent_uuid == vdi.parent.uuid: 

2555 child._update_vhd_parent(real_parent_uuid) # We update the sm-config:vhd-parent value for this VDI since it has already been relinked 

2556 list_not_to_relink = [leaf.uuid for leaf in child.getAllLeaves()] 

2557 else: 

2558 Util.log("Offline coalesce for {}".format(vdi.path)) 

2559 vdi._doCoalesce() 

2560 except Exception as e: 

2561 Util.log("EXCEPTION while coalescing: {}".format(e)) 

2562 self._delete_running_file(vdi) 

2563 raise 

2564 

2565 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2566 self._delete_running_file(vdi) 

2567 

2568 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2569 

2570 # we now need to relink the children: lock the SR to prevent ops 

2571 # like SM.clone from manipulating the VDIs we'll be relinking and 

2572 # rescan the SR first in case the children changed since the last 

2573 # scan 

2574 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2575 

2576 self.lock() 

2577 try: 

2578 vdi.parent._tagChildrenForRelink(list_not_to_relink) 

2579 self.scan() 

2580 vdi._relinkSkip() 

2581 finally: 

2582 self.unlock() 

2583 # Reload the children to leave things consistent 

2584 vdi.parent._reloadChildren(vdi) 

2585 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2586 

2587 self.deleteVDI(vdi) 

2588 

2589 class CoalesceTracker: 

2590 GRACE_ITERATIONS = 2 

2591 MAX_ITERATIONS_NO_PROGRESS = 3 

2592 MAX_ITERATIONS = 20 

2593 MAX_INCREASE_FROM_MINIMUM = 1.2 

2594 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2595 " --> Final size {finSize}" 

2596 

2597 def __init__(self, sr): 

2598 self.itsNoProgress = 0 

2599 self.its = 0 

2600 self.minSize = float("inf") 

2601 self._history = [] 

2602 self.reason = "" 

2603 self.startSize = None 

2604 self.finishSize = None 

2605 self.sr = sr 

2606 self.grace_remaining = self.GRACE_ITERATIONS 

2607 

2608 @property 

2609 def history(self): 

2610 return [x['msg'] for x in self._history] 

2611 

2612 def moving_average(self): 

2613 """ 

2614 Calculate a three point moving average 

2615 """ 

2616 assert len(self._history) >= 3 

2617 

2618 mv_average = sum([x['finalsize'] for x in self._history]) / len(self._history) 

2619 util.SMlog(f'Calculated moving average as {mv_average}') 

2620 return mv_average 

2621 

2622 def abortCoalesce(self, prevSize, curSize): 

2623 self.its += 1 

2624 self._history.append( 

2625 { 

2626 'finalsize': curSize, 

2627 'msg': self.HISTORY_STRING.format(its=self.its, 

2628 initSize=prevSize, 

2629 finSize=curSize) 

2630 } 

2631 ) 

2632 

2633 self.finishSize = curSize 

2634 

2635 if self.startSize is None: 

2636 self.startSize = prevSize 

2637 

2638 if curSize < self.minSize: 

2639 self.minSize = curSize 

2640 

2641 if prevSize < self.minSize: 

2642 self.minSize = prevSize 

2643 

2644 if self.its < 4: 

2645 # Perform at least three iterations 

2646 return False 

2647 

2648 if prevSize >= curSize or curSize < self.moving_average(): 

2649 # We made progress 

2650 return False 

2651 else: 

2652 self.itsNoProgress += 1 

2653 Util.log("No progress, attempt:" 

2654 " {attempt}".format(attempt=self.itsNoProgress)) 

2655 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2656 

2657 if self.its > self.MAX_ITERATIONS: 

2658 max = self.MAX_ITERATIONS 

2659 self.reason = \ 

2660 "Max iterations ({max}) exceeded".format(max=max) 

2661 return True 

2662 

2663 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS: 

2664 max = self.MAX_ITERATIONS_NO_PROGRESS 

2665 self.reason = \ 

2666 "No progress made for {max} iterations".format(max=max) 

2667 return True 

2668 

2669 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2670 if curSize > maxSizeFromMin: 

2671 self.grace_remaining -= 1 

2672 if self.grace_remaining == 0: 

2673 self.reason = "Unexpected bump in size," \ 

2674 " compared to minimum achieved" 

2675 

2676 return True 

2677 

2678 return False 

2679 

2680 def printSizes(self): 

2681 Util.log("Starting size was {size}" 

2682 .format(size=self.startSize)) 

2683 Util.log("Final size was {size}" 

2684 .format(size=self.finishSize)) 

2685 Util.log("Minimum size achieved was {size}" 

2686 .format(size=self.minSize)) 

2687 

2688 def printReasoning(self): 

2689 Util.log("Aborted coalesce") 

2690 for hist in self.history: 

2691 Util.log(hist) 

2692 Util.log(self.reason) 

2693 self.printSizes() 

2694 

2695 def printSummary(self): 

2696 if self.its == 0: 

2697 return 

2698 

2699 if self.reason: 2699 ↛ 2700line 2699 didn't jump to line 2700, because the condition on line 2699 was never true

2700 Util.log("Aborted coalesce") 

2701 Util.log(self.reason) 

2702 else: 

2703 Util.log("Coalesce summary") 

2704 

2705 Util.log(f"Performed {self.its} iterations") 

2706 self.printSizes() 

2707 

2708 

2709 def _coalesceLeaf(self, vdi): 

2710 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2711 complete due to external changes, namely vdi_delete and vdi_snapshot 

2712 that alter leaf-coalescibility of vdi""" 

2713 tracker = self.CoalesceTracker(self) 

2714 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2715 prevSizePhys = vdi.getSizePhys() 

2716 if not self._snapshotCoalesce(vdi): 2716 ↛ 2717line 2716 didn't jump to line 2717, because the condition on line 2716 was never true

2717 return False 

2718 if tracker.abortCoalesce(prevSizePhys, vdi.getSizePhys()): 

2719 tracker.printReasoning() 

2720 raise util.SMException("VDI {uuid} could not be coalesced" 

2721 .format(uuid=vdi.uuid)) 

2722 tracker.printSummary() 

2723 return self._liveLeafCoalesce(vdi) 

2724 

2725 def calcStorageSpeed(self, startTime, endTime, coalescedSize): 

2726 speed = None 

2727 total_time = endTime - startTime 

2728 if total_time > 0: 

2729 speed = float(coalescedSize) / float(total_time) 

2730 return speed 

2731 

2732 def writeSpeedToFile(self, speed): 

2733 content = [] 

2734 speedFile = None 

2735 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2736 self.lock() 

2737 try: 

2738 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2739 lines = "" 

2740 if not os.path.isfile(path): 

2741 lines = str(speed) + "\n" 

2742 else: 

2743 speedFile = open(path, "r+") 

2744 content = speedFile.readlines() 

2745 content.append(str(speed) + "\n") 

2746 if len(content) > N_RUNNING_AVERAGE: 

2747 del content[0] 

2748 lines = "".join(content) 

2749 

2750 util.atomicFileWrite(path, VAR_RUN, lines) 

2751 finally: 

2752 if speedFile is not None: 

2753 speedFile.close() 

2754 Util.log("Closing file: {myfile}".format(myfile=path)) 

2755 self.unlock() 

2756 

2757 def recordStorageSpeed(self, startTime, endTime, coalescedSize): 

2758 speed = self.calcStorageSpeed(startTime, endTime, coalescedSize) 

2759 if speed is None: 

2760 return 

2761 

2762 self.writeSpeedToFile(speed) 

2763 

2764 def getStorageSpeed(self): 

2765 speedFile = None 

2766 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2767 self.lock() 

2768 try: 

2769 speed = None 

2770 if os.path.isfile(path): 

2771 speedFile = open(path) 

2772 content = speedFile.readlines() 

2773 try: 

2774 content = [float(i) for i in content] 

2775 except ValueError: 

2776 Util.log("Something bad in the speed log:{log}". 

2777 format(log=speedFile.readlines())) 

2778 return speed 

2779 

2780 if len(content): 

2781 speed = sum(content) / float(len(content)) 

2782 if speed <= 0: 2782 ↛ 2784line 2782 didn't jump to line 2784, because the condition on line 2782 was never true

2783 # Defensive, should be impossible. 

2784 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2785 format(speed=speed, uuid=self.uuid)) 

2786 speed = None 

2787 else: 

2788 Util.log("Speed file empty for SR: {uuid}". 

2789 format(uuid=self.uuid)) 

2790 else: 

2791 Util.log("Speed log missing for SR: {uuid}". 

2792 format(uuid=self.uuid)) 

2793 return speed 

2794 finally: 

2795 if not (speedFile is None): 

2796 speedFile.close() 

2797 self.unlock() 

2798 

2799 def _snapshotCoalesce(self, vdi): 

2800 # Note that because we are not holding any locks here, concurrent SM 

2801 # operations may change this tree under our feet. In particular, vdi 

2802 # can be deleted, or it can be snapshotted. 

2803 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2804 Util.log("Single-snapshotting %s" % vdi) 

2805 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2806 try: 

2807 ret = self.xapi.singleSnapshotVDI(vdi) 

2808 Util.log("Single-snapshot returned: %s" % ret) 

2809 except XenAPI.Failure as e: 

2810 if util.isInvalidVDI(e): 

2811 Util.log("The VDI appears to have been concurrently deleted") 

2812 return False 

2813 raise 

2814 self.scanLocked() 

2815 tempSnap = vdi.parent 

2816 if not tempSnap.isCoalesceable(): 

2817 Util.log("The VDI appears to have been concurrently snapshotted") 

2818 return False 

2819 Util.log("Coalescing parent %s" % tempSnap) 

2820 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2821 sizePhys = vdi.getSizePhys() 

2822 self._coalesce(tempSnap) 

2823 if not vdi.isLeafCoalesceable(): 

2824 Util.log("The VDI tree appears to have been altered since") 

2825 return False 

2826 return True 

2827 

2828 def _liveLeafCoalesce(self, vdi: VDI) -> bool: 

2829 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2830 self.lock() 

2831 try: 

2832 self.scan() 

2833 if not self.getVDI(vdi.uuid): 

2834 Util.log("The VDI appears to have been deleted meanwhile") 

2835 return False 

2836 if not vdi.isLeafCoalesceable(): 

2837 Util.log("The VDI is no longer leaf-coalesceable") 

2838 return False 

2839 

2840 uuid = vdi.uuid 

2841 vdi.pause(failfast=True) 

2842 try: 

2843 try: 

2844 # "vdi" object will no longer be valid after this call 

2845 self._create_running_file(vdi) 

2846 self._doCoalesceLeaf(vdi) 

2847 except: 

2848 Util.logException("_doCoalesceLeaf") 

2849 self._handleInterruptedCoalesceLeaf() 

2850 raise 

2851 finally: 

2852 vdi = self.getVDI(uuid) 

2853 if vdi: 

2854 vdi.ensureUnpaused() 

2855 self._delete_running_file(vdi) 

2856 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2857 if vdiOld: 

2858 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2859 self.deleteVDI(vdiOld) 

2860 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2861 finally: 

2862 self.cleanup() 

2863 self.unlock() 

2864 self.logFilter.logState() 

2865 return True 

2866 

2867 def _doCoalesceLeaf(self, vdi: VDI): 

2868 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2869 offline/atomic context""" 

2870 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2871 self._prepareCoalesceLeaf(vdi) 

2872 vdi.parent._setHidden(False) 

2873 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2874 vdi.validate(True) 

2875 vdi.parent.validate(True) 

2876 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2877 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2878 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 2878 ↛ 2879line 2878 didn't jump to line 2879, because the condition on line 2878 was never true

2879 Util.log("Leaf-coalesce forced, will not use timeout") 

2880 timeout = 0 

2881 vdi._coalesceCowImage(timeout) 

2882 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2883 vdi.parent.validate(True) 

2884 #vdi._verifyContents(timeout / 2) 

2885 

2886 # rename 

2887 vdiUuid = vdi.uuid 

2888 oldName = vdi.fileName 

2889 origParentUuid = vdi.parent.uuid 

2890 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2891 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2892 vdi.parent.rename(vdiUuid) 

2893 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2894 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2895 

2896 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2897 # garbage 

2898 

2899 # update the VDI record 

2900 if vdi.parent.vdi_type == VdiType.RAW: 2900 ↛ 2901line 2900 didn't jump to line 2901, because the condition on line 2900 was never true

2901 vdi.parent.setConfig(VDI.DB_VDI_TYPE, VdiType.RAW) 

2902 vdi.parent.delConfig(VDI.DB_VDI_BLOCKS) 

2903 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2904 

2905 self._updateNode(vdi) 

2906 

2907 # delete the obsolete leaf & inflate the parent (in that order, to 

2908 # minimize free space requirements) 

2909 parent = vdi.parent 

2910 vdi._setHidden(True) 

2911 vdi.parent.children = [] 

2912 vdi.parent = None 

2913 

2914 if parent.parent is None: 

2915 parent.delConfig(VDI.DB_VDI_PARENT) 

2916 

2917 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2918 freeSpace = self.getFreeSpace() 

2919 if freeSpace < extraSpace: 2919 ↛ 2922line 2919 didn't jump to line 2922, because the condition on line 2919 was never true

2920 # don't delete unless we need the space: deletion is time-consuming 

2921 # because it requires contacting the slaves, and we're paused here 

2922 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2923 self.deleteVDI(vdi) 

2924 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2925 

2926 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2927 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2928 

2929 self.forgetVDI(origParentUuid) 

2930 self._finishCoalesceLeaf(parent) 

2931 self._updateSlavesOnResize(parent) 

2932 

2933 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

2934 assert(VdiType.isCowImage(parent.vdi_type)) 

2935 extra = child.getSizePhys() - parent.getSizePhys() 

2936 if extra < 0: 2936 ↛ 2937line 2936 didn't jump to line 2937, because the condition on line 2936 was never true

2937 extra = 0 

2938 return extra 

2939 

2940 def _prepareCoalesceLeaf(self, vdi) -> None: 

2941 pass 

2942 

2943 def _updateNode(self, vdi) -> None: 

2944 pass 

2945 

2946 def _finishCoalesceLeaf(self, parent) -> None: 

2947 pass 

2948 

2949 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

2950 pass 

2951 

2952 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: 

2953 pass 

2954 

2955 def _updateSlavesOnResize(self, vdi) -> None: 

2956 pass 

2957 

2958 def _removeStaleVDIs(self, uuidsPresent) -> None: 

2959 for uuid in list(self.vdis.keys()): 

2960 if not uuid in uuidsPresent: 

2961 Util.log("VDI %s disappeared since last scan" % \ 

2962 self.vdis[uuid]) 

2963 del self.vdis[uuid] 

2964 

2965 def _handleInterruptedCoalesceLeaf(self) -> None: 

2966 """An interrupted leaf-coalesce operation may leave the COW tree in an 

2967 inconsistent state. If the old-leaf VDI is still present, we revert the 

2968 operation (in case the original error is persistent); otherwise we must 

2969 finish the operation""" 

2970 pass 

2971 

2972 def _buildTree(self, force): 

2973 self.vdiTrees = [] 

2974 for vdi in self.vdis.values(): 

2975 if vdi.parentUuid: 

2976 parent = self.getVDI(vdi.parentUuid) 

2977 if not parent: 

2978 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2979 self.vdiTrees.append(vdi) 

2980 continue 

2981 if force: 

2982 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2983 (vdi.parentUuid, vdi.uuid)) 

2984 self.vdiTrees.append(vdi) 

2985 continue 

2986 else: 

2987 raise util.SMException("Parent VDI %s of %s not " \ 

2988 "found" % (vdi.parentUuid, vdi.uuid)) 

2989 vdi.parent = parent 

2990 parent.children.append(vdi) 

2991 else: 

2992 self.vdiTrees.append(vdi) 

2993 

2994 

2995class FileSR(SR): 

2996 TYPE = SR.TYPE_FILE 

2997 CACHE_FILE_EXT = ".vhdcache" 

2998 # cache cleanup actions 

2999 CACHE_ACTION_KEEP = 0 

3000 CACHE_ACTION_REMOVE = 1 

3001 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

3002 

3003 def __init__(self, uuid, xapi, createLock, force): 

3004 SR.__init__(self, uuid, xapi, createLock, force) 

3005 self.path = "/var/run/sr-mount/%s" % self.uuid 

3006 self.journaler = fjournaler.Journaler(self.path) 

3007 

3008 @override 

3009 def scan(self, force=False) -> None: 

3010 if not util.pathexists(self.path): 

3011 raise util.SMException("directory %s not found!" % self.uuid) 

3012 

3013 uuidsPresent: List[str] = [] 

3014 

3015 for vdi_type in VDI_COW_TYPES: 

3016 scan_result = self._scan(vdi_type, force) 

3017 for uuid, image_info in scan_result.items(): 

3018 vdi = self.getVDI(uuid) 

3019 if not vdi: 

3020 self.logFilter.logNewVDI(uuid) 

3021 vdi = FileVDI(self, uuid, vdi_type) 

3022 self.vdis[uuid] = vdi 

3023 vdi.load(image_info) 

3024 uuidsPresent.extend(scan_result.keys()) 

3025 

3026 rawList = [x for x in os.listdir(self.path) if x.endswith(VdiTypeExtension.RAW)] 

3027 for rawName in rawList: 

3028 uuid = FileVDI.extractUuid(rawName) 

3029 uuidsPresent.append(uuid) 

3030 vdi = self.getVDI(uuid) 

3031 if not vdi: 

3032 self.logFilter.logNewVDI(uuid) 

3033 vdi = FileVDI(self, uuid, VdiType.RAW) 

3034 self.vdis[uuid] = vdi 

3035 self._removeStaleVDIs(uuidsPresent) 

3036 self._buildTree(force) 

3037 self.logFilter.logState() 

3038 self._handleInterruptedCoalesceLeaf() 

3039 

3040 @override 

3041 def getFreeSpace(self) -> int: 

3042 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

3043 

3044 @override 

3045 def deleteVDIs(self, vdiList) -> None: 

3046 rootDeleted = False 

3047 for vdi in vdiList: 

3048 if not vdi.parent: 

3049 rootDeleted = True 

3050 break 

3051 SR.deleteVDIs(self, vdiList) 

3052 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

3053 self.xapi.markCacheSRsDirty() 

3054 

3055 @override 

3056 def cleanupCache(self, maxAge=-1) -> int: 

3057 """Clean up IntelliCache cache files. Caches for leaf nodes are 

3058 removed when the leaf node no longer exists or its allow-caching 

3059 attribute is not set. Caches for parent nodes are removed when the 

3060 parent node no longer exists or it hasn't been used in more than 

3061 <maxAge> hours. 

3062 Return number of caches removed. 

3063 """ 

3064 numRemoved = 0 

3065 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

3066 Util.log("Found %d cache files" % len(cacheFiles)) 

3067 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

3068 for cacheFile in cacheFiles: 

3069 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

3070 action = self.CACHE_ACTION_KEEP 

3071 rec = self.xapi.getRecordVDI(uuid) 

3072 if not rec: 

3073 Util.log("Cache %s: VDI doesn't exist" % uuid) 

3074 action = self.CACHE_ACTION_REMOVE 

3075 elif rec["managed"] and not rec["allow_caching"]: 

3076 Util.log("Cache %s: caching disabled" % uuid) 

3077 action = self.CACHE_ACTION_REMOVE 

3078 elif not rec["managed"] and maxAge >= 0: 

3079 lastAccess = datetime.datetime.fromtimestamp( \ 

3080 os.path.getatime(os.path.join(self.path, cacheFile))) 

3081 if lastAccess < cutoff: 

3082 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

3083 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

3084 

3085 if action == self.CACHE_ACTION_KEEP: 

3086 Util.log("Keeping cache %s" % uuid) 

3087 continue 

3088 

3089 lockId = uuid 

3090 parentUuid = None 

3091 if rec and rec["managed"]: 

3092 parentUuid = rec["sm_config"].get("vhd-parent") 

3093 if parentUuid: 

3094 lockId = parentUuid 

3095 

3096 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

3097 cacheLock.acquire() 

3098 try: 

3099 if self._cleanupCache(uuid, action): 

3100 numRemoved += 1 

3101 finally: 

3102 cacheLock.release() 

3103 return numRemoved 

3104 

3105 def _cleanupCache(self, uuid, action): 

3106 assert(action != self.CACHE_ACTION_KEEP) 

3107 rec = self.xapi.getRecordVDI(uuid) 

3108 if rec and rec["allow_caching"]: 

3109 Util.log("Cache %s appears to have become valid" % uuid) 

3110 return False 

3111 

3112 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

3113 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

3114 if tapdisk: 

3115 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

3116 Util.log("Cache %s still in use" % uuid) 

3117 return False 

3118 Util.log("Shutting down tapdisk for %s" % fullPath) 

3119 tapdisk.shutdown() 

3120 

3121 Util.log("Deleting file %s" % fullPath) 

3122 os.unlink(fullPath) 

3123 return True 

3124 

3125 def _isCacheFileName(self, name): 

3126 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

3127 name.endswith(self.CACHE_FILE_EXT) 

3128 

3129 def _scan(self, vdi_type, force): 

3130 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3131 error = False 

3132 pattern = os.path.join(self.path, "*%s" % VDI_TYPE_TO_EXTENSION[vdi_type]) 

3133 scan_result = getCowUtil(vdi_type).getAllInfoFromVG(pattern, FileVDI.extractUuid) 

3134 for uuid, vdiInfo in scan_result.items(): 

3135 if vdiInfo.error: 

3136 error = True 

3137 break 

3138 if not error: 

3139 return scan_result 

3140 Util.log("Scan error on attempt %d" % i) 

3141 if force: 

3142 return scan_result 

3143 raise util.SMException("Scan error") 

3144 

3145 @override 

3146 def deleteVDI(self, vdi) -> None: 

3147 self._checkSlaves(vdi) 

3148 SR.deleteVDI(self, vdi) 

3149 

3150 def _checkSlaves(self, vdi): 

3151 onlineHosts = self.xapi.getOnlineHosts() 

3152 abortFlag = IPCFlag(self.uuid) 

3153 for pbdRecord in self.xapi.getAttachedPBDs(): 

3154 hostRef = pbdRecord["host"] 

3155 if hostRef == self.xapi._hostRef: 

3156 continue 

3157 if abortFlag.test(FLAG_TYPE_ABORT): 

3158 raise AbortException("Aborting due to signal") 

3159 try: 

3160 self._checkSlave(hostRef, vdi) 

3161 except util.CommandException: 

3162 if hostRef in onlineHosts: 

3163 raise 

3164 

3165 def _checkSlave(self, hostRef, vdi): 

3166 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

3167 Util.log("Checking with slave: %s" % repr(call)) 

3168 _host = self.xapi.session.xenapi.host 

3169 text = _host.call_plugin( * call) 

3170 

3171 @override 

3172 def _handleInterruptedCoalesceLeaf(self) -> None: 

3173 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3174 for uuid, parentUuid in entries.items(): 

3175 fileList = os.listdir(self.path) 

3176 childName = uuid + VdiTypeExtension.VHD 

3177 tmpChildName = self.TMP_RENAME_PREFIX + uuid + VdiTypeExtension.VHD 

3178 parentName1 = parentUuid + VdiTypeExtension.VHD 

3179 parentName2 = parentUuid + VdiTypeExtension.RAW 

3180 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

3181 if parentPresent or tmpChildName in fileList: 

3182 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3183 else: 

3184 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3185 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3186 vdi = self.getVDI(uuid) 

3187 if vdi: 

3188 vdi.ensureUnpaused() 

3189 

3190 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3191 Util.log("*** UNDO LEAF-COALESCE") 

3192 parent = self.getVDI(parentUuid) 

3193 if not parent: 

3194 parent = self.getVDI(childUuid) 

3195 if not parent: 

3196 raise util.SMException("Neither %s nor %s found" % \ 

3197 (parentUuid, childUuid)) 

3198 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3199 parent.rename(parentUuid) 

3200 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3201 

3202 child = self.getVDI(childUuid) 

3203 if not child: 

3204 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3205 if not child: 

3206 raise util.SMException("Neither %s nor %s found" % \ 

3207 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3208 Util.log("Renaming child back to %s" % childUuid) 

3209 child.rename(childUuid) 

3210 Util.log("Updating the VDI record") 

3211 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3212 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3213 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3214 

3215 if child.isHidden(): 

3216 child._setHidden(False) 

3217 if not parent.isHidden(): 

3218 parent._setHidden(True) 

3219 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3220 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3221 Util.log("*** leaf-coalesce undo successful") 

3222 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3223 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3224 

3225 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3226 Util.log("*** FINISH LEAF-COALESCE") 

3227 vdi = self.getVDI(childUuid) 

3228 if not vdi: 

3229 Util.log(f"_finishInterruptedCoalesceLeaf, vdi {childUuid} not found, aborting") 

3230 raise util.SMException("VDI %s not found" % childUuid) 

3231 try: 

3232 self.forgetVDI(parentUuid) 

3233 except XenAPI.Failure: 

3234 Util.logException('_finishInterruptedCoalesceLeaf') 

3235 pass 

3236 self._updateSlavesOnResize(vdi) 

3237 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3238 Util.log("*** finished leaf-coalesce successfully") 

3239 

3240 

3241class LVMSR(SR): 

3242 TYPE = SR.TYPE_LVHD 

3243 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

3244 

3245 def __init__(self, uuid, xapi, createLock, force): 

3246 SR.__init__(self, uuid, xapi, createLock, force) 

3247 self.vgName = "%s%s" % (VG_PREFIX, self.uuid) 

3248 self.path = os.path.join(VG_LOCATION, self.vgName) 

3249 

3250 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) 

3251 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) 

3252 lvm_conf = other_conf.get('lvm-conf') if other_conf else None 

3253 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) 

3254 

3255 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

3256 self.journaler = journaler.Journaler(self.lvmCache) 

3257 

3258 @override 

3259 def deleteVDI(self, vdi) -> None: 

3260 if self.lvActivator.get(vdi.uuid, False): 

3261 self.lvActivator.deactivate(vdi.uuid, False) 

3262 self._checkSlaves(vdi) 

3263 SR.deleteVDI(self, vdi) 

3264 

3265 @override 

3266 def forgetVDI(self, vdiUuid) -> None: 

3267 SR.forgetVDI(self, vdiUuid) 

3268 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

3269 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

3270 

3271 @override 

3272 def getFreeSpace(self) -> int: 

3273 stats = lvutil._getVGstats(self.vgName) 

3274 return stats['physical_size'] - stats['physical_utilisation'] 

3275 

3276 @override 

3277 def cleanup(self): 

3278 if not self.lvActivator.deactivateAll(): 

3279 Util.log("ERROR deactivating LVs while cleaning up") 

3280 

3281 @override 

3282 def needUpdateBlockInfo(self) -> bool: 

3283 for vdi in self.vdis.values(): 

3284 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3285 continue 

3286 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3287 return True 

3288 return False 

3289 

3290 @override 

3291 def updateBlockInfo(self) -> None: 

3292 numUpdated = 0 

3293 for vdi in self.vdis.values(): 

3294 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3295 continue 

3296 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3297 vdi.updateBlockInfo() 

3298 numUpdated += 1 

3299 if numUpdated: 

3300 # deactivate the LVs back sooner rather than later. If we don't 

3301 # now, by the time this thread gets to deactivations, another one 

3302 # might have leaf-coalesced a node and deleted it, making the child 

3303 # inherit the refcount value and preventing the correct decrement 

3304 self.cleanup() 

3305 

3306 @override 

3307 def scan(self, force=False) -> None: 

3308 vdis = self._scan(force) 

3309 for uuid, vdiInfo in vdis.items(): 

3310 vdi = self.getVDI(uuid) 

3311 if not vdi: 

3312 self.logFilter.logNewVDI(uuid) 

3313 vdi = LVMVDI(self, uuid, vdiInfo.vdiType) 

3314 self.vdis[uuid] = vdi 

3315 vdi.load(vdiInfo) 

3316 self._removeStaleVDIs(vdis.keys()) 

3317 self._buildTree(force) 

3318 self.logFilter.logState() 

3319 self._handleInterruptedCoalesceLeaf() 

3320 

3321 def _scan(self, force): 

3322 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3323 error = False 

3324 self.lvmCache.refresh() 

3325 vdis = LvmCowUtil.getVDIInfo(self.lvmCache) 

3326 for uuid, vdiInfo in vdis.items(): 

3327 if vdiInfo.scanError: 

3328 error = True 

3329 break 

3330 if not error: 

3331 return vdis 

3332 Util.log("Scan error, retrying (%d)" % i) 

3333 if force: 

3334 return vdis 

3335 raise util.SMException("Scan error") 

3336 

3337 @override 

3338 def _removeStaleVDIs(self, uuidsPresent) -> None: 

3339 for uuid in list(self.vdis.keys()): 

3340 if not uuid in uuidsPresent: 

3341 Util.log("VDI %s disappeared since last scan" % \ 

3342 self.vdis[uuid]) 

3343 del self.vdis[uuid] 

3344 if self.lvActivator.get(uuid, False): 

3345 self.lvActivator.remove(uuid, False) 

3346 

3347 @override 

3348 def _liveLeafCoalesce(self, vdi) -> bool: 

3349 """If the parent is raw and the child was resized (virt. size), then 

3350 we'll need to resize the parent, which can take a while due to zeroing 

3351 out of the extended portion of the LV. Do it before pausing the child 

3352 to avoid a protracted downtime""" 

3353 if not VdiType.isCowImage(vdi.parent.vdi_type) and vdi.sizeVirt > vdi.parent.sizeVirt: 

3354 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3355 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

3356 

3357 return SR._liveLeafCoalesce(self, vdi) 

3358 

3359 @override 

3360 def _prepareCoalesceLeaf(self, vdi) -> None: 

3361 vdi._activateChain() 

3362 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3363 vdi.deflate() 

3364 vdi.inflateParentForCoalesce() 

3365 

3366 @override 

3367 def _updateNode(self, vdi) -> None: 

3368 # fix the refcounts: the remaining node should inherit the binary 

3369 # refcount from the leaf (because if it was online, it should remain 

3370 # refcounted as such), but the normal refcount from the parent (because 

3371 # this node is really the parent node) - minus 1 if it is online (since 

3372 # non-leaf nodes increment their normal counts when they are online and 

3373 # we are now a leaf, storing that 1 in the binary refcount). 

3374 ns = NS_PREFIX_LVM + self.uuid 

3375 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

3376 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

3377 pCnt = pCnt - cBcnt 

3378 assert(pCnt >= 0) 

3379 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

3380 

3381 @override 

3382 def _finishCoalesceLeaf(self, parent) -> None: 

3383 if not parent.isSnapshot() or parent.isAttachedRW(): 

3384 parent.inflateFully() 

3385 else: 

3386 parent.deflate() 

3387 

3388 @override 

3389 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3390 return parent.lvmcowutil.calcVolumeSize(parent.sizeVirt) - parent.sizeLV 

3391 

3392 @override 

3393 def _handleInterruptedCoalesceLeaf(self) -> None: 

3394 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3395 for uuid, parentUuid in entries.items(): 

3396 undo = False 

3397 for prefix in LV_PREFIX.values(): 

3398 parentLV = prefix + parentUuid 

3399 undo = self.lvmCache.checkLV(parentLV) 

3400 if undo: 

3401 break 

3402 

3403 if not undo: 

3404 for prefix in LV_PREFIX.values(): 

3405 tmpChildLV = prefix + uuid 

3406 undo = self.lvmCache.checkLV(tmpChildLV) 

3407 if undo: 

3408 break 

3409 

3410 if undo: 

3411 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3412 else: 

3413 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3414 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3415 vdi = self.getVDI(uuid) 

3416 if vdi: 

3417 vdi.ensureUnpaused() 

3418 

3419 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3420 Util.log("*** UNDO LEAF-COALESCE") 

3421 parent = self.getVDI(parentUuid) 

3422 if not parent: 

3423 parent = self.getVDI(childUuid) 

3424 if not parent: 

3425 raise util.SMException("Neither %s nor %s found" % \ 

3426 (parentUuid, childUuid)) 

3427 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3428 parent.rename(parentUuid) 

3429 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3430 

3431 child = self.getVDI(childUuid) 

3432 if not child: 

3433 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3434 if not child: 

3435 raise util.SMException("Neither %s nor %s found" % \ 

3436 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3437 Util.log("Renaming child back to %s" % childUuid) 

3438 child.rename(childUuid) 

3439 Util.log("Updating the VDI record") 

3440 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3441 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3442 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3443 

3444 # refcount (best effort - assume that it had succeeded if the 

3445 # second rename succeeded; if not, this adjustment will be wrong, 

3446 # leading to a non-deactivation of the LV) 

3447 ns = NS_PREFIX_LVM + self.uuid 

3448 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3449 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3450 pCnt = pCnt + cBcnt 

3451 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3452 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3453 

3454 parent.deflate() 

3455 child.inflateFully() 

3456 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3457 if child.isHidden(): 

3458 child._setHidden(False) 

3459 if not parent.isHidden(): 

3460 parent._setHidden(True) 

3461 if not parent.lvReadonly: 

3462 self.lvmCache.setReadonly(parent.fileName, True) 

3463 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3464 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3465 Util.log("*** leaf-coalesce undo successful") 

3466 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3467 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3468 

3469 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3470 Util.log("*** FINISH LEAF-COALESCE") 

3471 vdi = self.getVDI(childUuid) 

3472 if not vdi: 

3473 raise util.SMException("VDI %s not found" % childUuid) 

3474 vdi.inflateFully() 

3475 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3476 try: 

3477 self.forgetVDI(parentUuid) 

3478 except XenAPI.Failure: 

3479 pass 

3480 self._updateSlavesOnResize(vdi) 

3481 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3482 Util.log("*** finished leaf-coalesce successfully") 

3483 

3484 def _checkSlaves(self, vdi): 

3485 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3486 try to check all slaves, including those that the Agent believes are 

3487 offline, but ignore failures for offline hosts. This is to avoid cases 

3488 where the Agent thinks a host is offline but the host is up.""" 

3489 args = {"vgName": self.vgName, 

3490 "action1": "deactivateNoRefcount", 

3491 "lvName1": vdi.fileName, 

3492 "action2": "cleanupLockAndRefcount", 

3493 "uuid2": vdi.uuid, 

3494 "ns2": NS_PREFIX_LVM + self.uuid} 

3495 onlineHosts = self.xapi.getOnlineHosts() 

3496 abortFlag = IPCFlag(self.uuid) 

3497 for pbdRecord in self.xapi.getAttachedPBDs(): 

3498 hostRef = pbdRecord["host"] 

3499 if hostRef == self.xapi._hostRef: 

3500 continue 

3501 if abortFlag.test(FLAG_TYPE_ABORT): 

3502 raise AbortException("Aborting due to signal") 

3503 Util.log("Checking with slave %s (path %s)" % ( 

3504 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3505 try: 

3506 self.xapi.ensureInactive(hostRef, args) 

3507 except XenAPI.Failure: 

3508 if hostRef in onlineHosts: 

3509 raise 

3510 

3511 @override 

3512 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

3513 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3514 if not slaves: 

3515 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3516 child) 

3517 return 

3518 

3519 tmpName = child.vdi_type + self.TMP_RENAME_PREFIX + child.uuid 

3520 args = {"vgName": self.vgName, 

3521 "action1": "deactivateNoRefcount", 

3522 "lvName1": tmpName, 

3523 "action2": "deactivateNoRefcount", 

3524 "lvName2": child.fileName, 

3525 "action3": "refresh", 

3526 "lvName3": child.fileName, 

3527 "action4": "refresh", 

3528 "lvName4": parent.fileName} 

3529 for slave in slaves: 

3530 Util.log("Updating %s, %s, %s on slave %s" % \ 

3531 (tmpName, child.fileName, parent.fileName, 

3532 self.xapi.getRecordHost(slave)['hostname'])) 

3533 text = self.xapi.session.xenapi.host.call_plugin( \ 

3534 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3535 Util.log("call-plugin returned: '%s'" % text) 

3536 

3537 @override 

3538 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: 

3539 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3540 if not slaves: 

3541 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3542 return 

3543 

3544 args = {"vgName": self.vgName, 

3545 "action1": "deactivateNoRefcount", 

3546 "lvName1": oldNameLV, 

3547 "action2": "refresh", 

3548 "lvName2": vdi.fileName, 

3549 "action3": "cleanupLockAndRefcount", 

3550 "uuid3": origParentUuid, 

3551 "ns3": NS_PREFIX_LVM + self.uuid} 

3552 for slave in slaves: 

3553 Util.log("Updating %s to %s on slave %s" % \ 

3554 (oldNameLV, vdi.fileName, 

3555 self.xapi.getRecordHost(slave)['hostname'])) 

3556 text = self.xapi.session.xenapi.host.call_plugin( \ 

3557 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3558 Util.log("call-plugin returned: '%s'" % text) 

3559 

3560 @override 

3561 def _updateSlavesOnResize(self, vdi) -> None: 

3562 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3563 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3564 if not slaves: 

3565 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3566 return 

3567 LvmCowUtil.refreshVolumeOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3568 vdi.fileName, vdi.uuid, slaves) 

3569 

3570 

3571class LinstorSR(SR): 

3572 TYPE = SR.TYPE_LINSTOR 

3573 

3574 def __init__(self, uuid, xapi, createLock, force): 

3575 if not LINSTOR_AVAILABLE: 

3576 raise util.SMException( 

3577 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3578 ) 

3579 

3580 SR.__init__(self, uuid, xapi, createLock, force) 

3581 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3582 

3583 class LinstorProxy: 

3584 def __init__(self, sr: LinstorSR) -> None: 

3585 self.sr = sr 

3586 

3587 def __getattr__(self, attr: str) -> Any: 

3588 assert self.sr, "Cannot use `LinstorProxy` without valid `LinstorVolumeManager` instance" 

3589 return getattr(self.sr._linstor, attr) 

3590 

3591 self._linstor_proxy = LinstorProxy(self) 

3592 self._reloadLinstor(journaler_only=True) 

3593 

3594 @override 

3595 def deleteVDI(self, vdi) -> None: 

3596 self._checkSlaves(vdi) 

3597 SR.deleteVDI(self, vdi) 

3598 

3599 @override 

3600 def getFreeSpace(self) -> int: 

3601 return self._linstor.max_volume_size_allowed 

3602 

3603 @override 

3604 def scan(self, force=False) -> None: 

3605 all_vdi_info = self._scan(force) 

3606 for uuid, vdiInfo in all_vdi_info.items(): 

3607 # When vdiInfo is None, the VDI is RAW. 

3608 vdi = self.getVDI(uuid) 

3609 if not vdi: 

3610 self.logFilter.logNewVDI(uuid) 

3611 vdi = LinstorVDI(self, uuid, vdiInfo.vdiType if vdiInfo else VdiType.RAW) 

3612 self.vdis[uuid] = vdi 

3613 if vdiInfo: 

3614 vdi.load(vdiInfo) 

3615 self._removeStaleVDIs(all_vdi_info.keys()) 

3616 self._buildTree(force) 

3617 self.logFilter.logState() 

3618 self._handleInterruptedCoalesceLeaf() 

3619 

3620 @override 

3621 def pauseVDIs(self, vdiList) -> None: 

3622 self._linstor.ensure_volume_list_is_not_locked( 

3623 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3624 ) 

3625 return super(LinstorSR, self).pauseVDIs(vdiList) 

3626 

3627 def _reloadLinstor(self, journaler_only=False): 

3628 session = self.xapi.session 

3629 host_ref = util.get_this_host_ref(session) 

3630 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3631 

3632 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3633 if pbd is None: 

3634 raise util.SMException('Failed to find PBD') 

3635 

3636 dconf = session.xenapi.PBD.get_device_config(pbd) 

3637 group_name = dconf['group-name'] 

3638 

3639 controller_uri = get_controller_uri() 

3640 self.journaler = LinstorJournaler( 

3641 controller_uri, group_name, logger=util.SMlog 

3642 ) 

3643 

3644 if journaler_only: 

3645 return 

3646 

3647 self._linstor = LinstorVolumeManager( 

3648 controller_uri, 

3649 group_name, 

3650 repair=True, 

3651 logger=util.SMlog 

3652 ) 

3653 

3654 def _scan(self, force): 

3655 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3656 self._reloadLinstor() 

3657 error = False 

3658 try: 

3659 all_vdi_info = self._load_vdi_info() 

3660 for uuid, vdiInfo in all_vdi_info.items(): 

3661 if vdiInfo and vdiInfo.error: 

3662 error = True 

3663 break 

3664 if not error: 

3665 return all_vdi_info 

3666 Util.log('Scan error, retrying ({})'.format(i)) 

3667 except Exception as e: 

3668 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3669 Util.log(traceback.format_exc()) 

3670 

3671 if force: 

3672 return all_vdi_info 

3673 raise util.SMException('Scan error') 

3674 

3675 def _load_vdi_info(self): 

3676 all_vdi_info = {} 

3677 

3678 # TODO: Ensure metadata contains the right info. 

3679 

3680 all_volume_info = self._linstor.get_volumes_with_info() 

3681 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3682 for vdi_uuid, volume_info in all_volume_info.items(): 

3683 vdi_type = VdiType.RAW 

3684 try: 

3685 volume_metadata = volumes_metadata[vdi_uuid] 

3686 if not volume_info.name and not list(volume_metadata.items()): 

3687 continue # Ignore it, probably deleted. 

3688 

3689 if vdi_uuid.startswith('DELETED_'): 

3690 # Assume it's really a RAW volume of a failed snap without COW header/footer. 

3691 # We must remove this VDI now without adding it in the VDI list. 

3692 # Otherwise `Relinking` calls and other actions can be launched on it. 

3693 # We don't want that... 

3694 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3695 

3696 self.lock() 

3697 try: 

3698 self._linstor.destroy_volume(vdi_uuid) 

3699 try: 

3700 self.forgetVDI(vdi_uuid) 

3701 except: 

3702 pass 

3703 except Exception as e: 

3704 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3705 finally: 

3706 self.unlock() 

3707 continue 

3708 

3709 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3710 volume_name = self._linstor.get_volume_name(vdi_uuid) 

3711 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX): 

3712 # Always RAW! 

3713 info = None 

3714 elif VdiType.isCowImage(vdi_type): 

3715 info = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type).get_info(vdi_uuid) 

3716 else: 

3717 # Ensure it's not a COW image... 

3718 linstorcowutil = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type) 

3719 try: 

3720 info = linstorcowutil.get_info(vdi_uuid) 

3721 except: 

3722 try: 

3723 linstorcowutil.force_repair( 

3724 self._linstor.get_device_path(vdi_uuid) 

3725 ) 

3726 info = linstorcowutil.get_info(vdi_uuid) 

3727 except: 

3728 info = None 

3729 

3730 except Exception as e: 

3731 Util.log( 

3732 ' [VDI {}: failed to load VDI info]: {}' 

3733 .format(vdi_uuid, e) 

3734 ) 

3735 info = CowImageInfo(vdi_uuid) 

3736 info.error = 1 

3737 

3738 if info: 

3739 info.vdiType = vdi_type 

3740 

3741 all_vdi_info[vdi_uuid] = info 

3742 

3743 return all_vdi_info 

3744 

3745 @override 

3746 def _prepareCoalesceLeaf(self, vdi) -> None: 

3747 vdi._activateChain() 

3748 vdi.deflate() 

3749 vdi._inflateParentForCoalesce() 

3750 

3751 @override 

3752 def _finishCoalesceLeaf(self, parent) -> None: 

3753 if not parent.isSnapshot() or parent.isAttachedRW(): 

3754 parent.inflateFully() 

3755 else: 

3756 parent.deflate() 

3757 

3758 @override 

3759 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3760 return LinstorCowUtil( 

3761 self.xapi.session, self._linstor, parent.vdi_type 

3762 ).compute_volume_size(parent.sizeVirt) - parent.getDrbdSize() 

3763 

3764 def _hasValidDevicePath(self, uuid): 

3765 try: 

3766 self._linstor.get_device_path(uuid) 

3767 except Exception: 

3768 # TODO: Maybe log exception. 

3769 return False 

3770 return True 

3771 

3772 @override 

3773 def _liveLeafCoalesce(self, vdi) -> bool: 

3774 self.lock() 

3775 try: 

3776 self._linstor.ensure_volume_is_not_locked( 

3777 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3778 ) 

3779 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3780 finally: 

3781 self.unlock() 

3782 

3783 @override 

3784 def _handleInterruptedCoalesceLeaf(self) -> None: 

3785 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3786 for uuid, parentUuid in entries.items(): 

3787 if self._hasValidDevicePath(parentUuid) or \ 

3788 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3789 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3790 else: 

3791 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3792 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3793 vdi = self.getVDI(uuid) 

3794 if vdi: 

3795 vdi.ensureUnpaused() 

3796 

3797 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3798 Util.log('*** UNDO LEAF-COALESCE') 

3799 parent = self.getVDI(parentUuid) 

3800 if not parent: 

3801 parent = self.getVDI(childUuid) 

3802 if not parent: 

3803 raise util.SMException( 

3804 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3805 ) 

3806 Util.log( 

3807 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3808 ) 

3809 parent.rename(parentUuid) 

3810 

3811 child = self.getVDI(childUuid) 

3812 if not child: 

3813 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3814 if not child: 

3815 raise util.SMException( 

3816 'Neither {} nor {} found'.format( 

3817 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3818 ) 

3819 ) 

3820 Util.log('Renaming child back to {}'.format(childUuid)) 

3821 child.rename(childUuid) 

3822 Util.log('Updating the VDI record') 

3823 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3824 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3825 

3826 # TODO: Maybe deflate here. 

3827 

3828 if child.isHidden(): 

3829 child._setHidden(False) 

3830 if not parent.isHidden(): 

3831 parent._setHidden(True) 

3832 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3833 Util.log('*** leaf-coalesce undo successful') 

3834 

3835 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3836 Util.log('*** FINISH LEAF-COALESCE') 

3837 vdi = self.getVDI(childUuid) 

3838 if not vdi: 

3839 raise util.SMException('VDI {} not found'.format(childUuid)) 

3840 # TODO: Maybe inflate. 

3841 try: 

3842 self.forgetVDI(parentUuid) 

3843 except XenAPI.Failure: 

3844 pass 

3845 self._updateSlavesOnResize(vdi) 

3846 Util.log('*** finished leaf-coalesce successfully') 

3847 

3848 def _checkSlaves(self, vdi): 

3849 try: 

3850 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3851 for openers in all_openers.values(): 

3852 for opener in openers.values(): 

3853 if opener['process-name'] != 'tapdisk': 

3854 raise util.SMException( 

3855 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3856 ) 

3857 except LinstorVolumeManagerError as e: 

3858 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3859 raise 

3860 

3861 

3862################################################################################ 

3863# 

3864# Helpers 

3865# 

3866def daemonize(): 

3867 pid = os.fork() 

3868 if pid: 

3869 os.waitpid(pid, 0) 

3870 Util.log("New PID [%d]" % pid) 

3871 return False 

3872 os.chdir("/") 

3873 os.setsid() 

3874 pid = os.fork() 

3875 if pid: 

3876 Util.log("Will finish as PID [%d]" % pid) 

3877 os._exit(0) 

3878 for fd in [0, 1, 2]: 

3879 try: 

3880 os.close(fd) 

3881 except OSError: 

3882 pass 

3883 # we need to fill those special fd numbers or pread won't work 

3884 sys.stdin = open("/dev/null", 'r') 

3885 sys.stderr = open("/dev/null", 'w') 

3886 sys.stdout = open("/dev/null", 'w') 

3887 # As we're a new process we need to clear the lock objects 

3888 lock.Lock.clearAll() 

3889 return True 

3890 

3891 

3892def normalizeType(type): 

3893 if type in LVMSR.SUBTYPES: 

3894 type = SR.TYPE_LVHD 

3895 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3896 # temporary while LVHD is symlinked as LVM 

3897 type = SR.TYPE_LVHD 

3898 if type in [ 

3899 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3900 "moosefs", "xfs", "zfs", "largeblock" 

3901 ]: 

3902 type = SR.TYPE_FILE 

3903 if type in ["linstor"]: 

3904 type = SR.TYPE_LINSTOR 

3905 if type not in SR.TYPES: 

3906 raise util.SMException("Unsupported SR type: %s" % type) 

3907 return type 

3908 

3909GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3910 

3911 

3912def _gc_init_file(sr_uuid): 

3913 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3914 

3915 

3916def _create_init_file(sr_uuid): 

3917 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3918 with open(os.path.join(_gc_init_file(sr_uuid)), 'w+') as f: 

3919 f.write('1') 

3920 

3921 

3922def _gcLoopPause(sr, dryRun=False, immediate=False): 

3923 if immediate: 

3924 return 

3925 

3926 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3927 # point will just return. Otherwise, fall back on an abortable sleep. 

3928 

3929 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3930 

3931 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3931 ↛ exitline 3931 didn't jump to the function exit

3932 lambda *args: None) 

3933 elif os.path.exists(_gc_init_file(sr.uuid)): 

3934 def abortTest(): 

3935 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3936 

3937 # If time.sleep hangs we are in deep trouble, however for 

3938 # completeness we set the timeout of the abort thread to 

3939 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3940 Util.log("GC active, about to go quiet") 

3941 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3941 ↛ exitline 3941 didn't run the lambda on line 3941

3942 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3943 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3944 Util.log("GC active, quiet period ended") 

3945 

3946 

3947def _gcLoop(sr, dryRun=False, immediate=False): 

3948 if not lockGCActive.acquireNoblock(): 3948 ↛ 3949line 3948 didn't jump to line 3949, because the condition on line 3948 was never true

3949 Util.log("Another GC instance already active, exiting") 

3950 return 

3951 

3952 # Check we're still attached after acquiring locks 

3953 if not sr.xapi.isPluggedHere(): 

3954 Util.log("SR no longer attached, exiting") 

3955 return 

3956 

3957 # Clean up Intellicache files 

3958 sr.cleanupCache() 

3959 

3960 # Track how many we do 

3961 coalesced = 0 

3962 task_status = "success" 

3963 try: 

3964 # Check if any work needs to be done 

3965 if not sr.xapi.isPluggedHere(): 3965 ↛ 3966line 3965 didn't jump to line 3966, because the condition on line 3965 was never true

3966 Util.log("SR no longer attached, exiting") 

3967 return 

3968 sr.scanLocked() 

3969 if not sr.hasWork(): 

3970 Util.log("No work, exiting") 

3971 return 

3972 sr.xapi.create_task( 

3973 "Garbage Collection", 

3974 "Garbage collection for SR %s" % sr.uuid) 

3975 _gcLoopPause(sr, dryRun, immediate=immediate) 

3976 while True: 

3977 if SIGTERM: 

3978 Util.log("Term requested") 

3979 return 

3980 

3981 if not sr.xapi.isPluggedHere(): 3981 ↛ 3982line 3981 didn't jump to line 3982, because the condition on line 3981 was never true

3982 Util.log("SR no longer attached, exiting") 

3983 break 

3984 sr.scanLocked() 

3985 if not sr.hasWork(): 

3986 Util.log("No work, exiting") 

3987 break 

3988 

3989 if not lockGCRunning.acquireNoblock(): 3989 ↛ 3990line 3989 didn't jump to line 3990, because the condition on line 3989 was never true

3990 Util.log("Unable to acquire GC running lock.") 

3991 return 

3992 try: 

3993 if not sr.gcEnabled(): 3993 ↛ 3994line 3993 didn't jump to line 3994, because the condition on line 3993 was never true

3994 break 

3995 

3996 sr.xapi.update_task_progress("done", coalesced) 

3997 

3998 sr.cleanupCoalesceJournals() 

3999 # Create the init file here in case startup is waiting on it 

4000 _create_init_file(sr.uuid) 

4001 sr.scanLocked() 

4002 sr.updateBlockInfo() 

4003 

4004 howmany = len(sr.findGarbage()) 

4005 if howmany > 0: 

4006 Util.log("Found %d orphaned vdis" % howmany) 

4007 sr.lock() 

4008 try: 

4009 sr.garbageCollect(dryRun) 

4010 finally: 

4011 sr.unlock() 

4012 sr.xapi.srUpdate() 

4013 

4014 candidate = sr.findCoalesceable() 

4015 if candidate: 

4016 util.fistpoint.activate( 

4017 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

4018 sr.coalesce(candidate, dryRun) 

4019 sr.xapi.srUpdate() 

4020 coalesced += 1 

4021 continue 

4022 

4023 candidate = sr.findLeafCoalesceable() 

4024 if candidate: 4024 ↛ 4031line 4024 didn't jump to line 4031, because the condition on line 4024 was never false

4025 sr.coalesceLeaf(candidate, dryRun) 

4026 sr.xapi.srUpdate() 

4027 coalesced += 1 

4028 continue 

4029 

4030 finally: 

4031 lockGCRunning.release() 4031 ↛ 4036line 4031 didn't jump to line 4036, because the break on line 3994 wasn't executed

4032 except: 

4033 task_status = "failure" 

4034 raise 

4035 finally: 

4036 sr.xapi.set_task_status(task_status) 

4037 Util.log("GC process exiting, no work left") 

4038 _create_init_file(sr.uuid) 

4039 lockGCActive.release() 

4040 

4041 

4042def _gc(session, srUuid, dryRun=False, immediate=False): 

4043 init(srUuid) 

4044 sr = SR.getInstance(srUuid, session) 

4045 if not sr.gcEnabled(False): 4045 ↛ 4046line 4045 didn't jump to line 4046, because the condition on line 4045 was never true

4046 return 

4047 

4048 try: 

4049 _gcLoop(sr, dryRun, immediate=immediate) 

4050 finally: 

4051 sr.check_no_space_candidates() 

4052 sr.cleanup() 

4053 sr.logFilter.logState() 

4054 del sr.xapi 

4055 

4056 

4057def _abort(srUuid, soft=False): 

4058 """Aborts an GC/coalesce. 

4059 

4060 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

4061 soft: If set to True and there is a pending abort signal, the function 

4062 doesn't do anything. If set to False, a new abort signal is issued. 

4063 

4064 returns: If soft is set to False, we return True holding lockGCActive. If 

4065 soft is set to False and an abort signal is pending, we return False 

4066 without holding lockGCActive. An exception is raised in case of error.""" 

4067 Util.log("=== SR %s: abort ===" % (srUuid)) 

4068 init(srUuid) 

4069 if not lockGCActive.acquireNoblock(): 

4070 gotLock = False 

4071 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

4072 abortFlag = IPCFlag(srUuid) 

4073 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

4074 return False 

4075 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

4076 gotLock = lockGCActive.acquireNoblock() 

4077 if gotLock: 

4078 break 

4079 time.sleep(SR.LOCK_RETRY_INTERVAL) 

4080 abortFlag.clear(FLAG_TYPE_ABORT) 

4081 if not gotLock: 

4082 raise util.CommandException(code=errno.ETIMEDOUT, 

4083 reason="SR %s: error aborting existing process" % srUuid) 

4084 return True 

4085 

4086 

4087def init(srUuid): 

4088 global lockGCRunning 

4089 if not lockGCRunning: 4089 ↛ 4090line 4089 didn't jump to line 4090, because the condition on line 4089 was never true

4090 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid) 

4091 global lockGCActive 

4092 if not lockGCActive: 4092 ↛ 4093line 4092 didn't jump to line 4093, because the condition on line 4092 was never true

4093 lockGCActive = LockActive(srUuid) 

4094 

4095 

4096class LockActive: 

4097 """ 

4098 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

4099 if another process holds the SR lock. 

4100 """ 

4101 def __init__(self, srUuid): 

4102 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

4103 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, srUuid) 

4104 

4105 def acquireNoblock(self): 

4106 self._srLock.acquire() 

4107 

4108 try: 

4109 return self._lock.acquireNoblock() 

4110 finally: 

4111 self._srLock.release() 

4112 

4113 def release(self): 

4114 self._lock.release() 

4115 

4116 

4117def usage(): 

4118 output = """Garbage collect and/or coalesce COW images in a COW-based SR 

4119 

4120Parameters: 

4121 -u --uuid UUID SR UUID 

4122 and one of: 

4123 -g --gc garbage collect, coalesce, and repeat while there is work 

4124 -G --gc_force garbage collect once, aborting any current operations 

4125 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

4126 max_age hours 

4127 -a --abort abort any currently running operation (GC or coalesce) 

4128 -q --query query the current state (GC'ing, coalescing or not running) 

4129 -x --disable disable GC/coalesce (will be in effect until you exit) 

4130 -t --debug see Debug below 

4131 

4132Options: 

4133 -b --background run in background (return immediately) (valid for -g only) 

4134 -f --force continue in the presence of COW images with errors (when doing 

4135 GC, this might cause removal of any such images) (only valid 

4136 for -G) (DANGEROUS) 

4137 

4138Debug: 

4139 The --debug parameter enables manipulation of LVHD VDIs for debugging 

4140 purposes. ** NEVER USE IT ON A LIVE VM ** 

4141 The following parameters are required: 

4142 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

4143 "deflate". 

4144 -v --vdi_uuid VDI UUID 

4145 """ 

4146 #-d --dry-run don't actually perform any SR-modifying operations 

4147 print(output) 

4148 Util.log("(Invalid usage)") 

4149 sys.exit(1) 

4150 

4151 

4152############################################################################## 

4153# 

4154# API 

4155# 

4156def abort(srUuid, soft=False): 

4157 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

4158 """ 

4159 if _abort(srUuid, soft): 

4160 stop_gc_service(srUuid) 

4161 Util.log("abort: releasing the process lock") 

4162 lockGCActive.release() 

4163 return True 

4164 else: 

4165 return False 

4166 

4167 

4168def run_gc(session, srUuid, dryRun, immediate=False): 

4169 try: 

4170 _gc(session, srUuid, dryRun, immediate=immediate) 

4171 return 0 

4172 except AbortException: 

4173 Util.log("Aborted") 

4174 return 2 

4175 except Exception: 

4176 Util.logException("gc") 

4177 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

4178 return 1 

4179 

4180 

4181def gc(session, srUuid, inBackground, dryRun=False): 

4182 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

4183 immediately if inBackground=True. 

4184 

4185 The following algorithm is used: 

4186 1. If we are already GC'ing in this SR, return 

4187 2. If we are already coalescing a VDI pair: 

4188 a. Scan the SR and determine if the VDI pair is GC'able 

4189 b. If the pair is not GC'able, return 

4190 c. If the pair is GC'able, abort coalesce 

4191 3. Scan the SR 

4192 4. If there is nothing to collect, nor to coalesce, return 

4193 5. If there is something to collect, GC all, then goto 3 

4194 6. If there is something to coalesce, coalesce one pair, then goto 3 

4195 """ 

4196 Util.log("=== SR %s: gc ===" % srUuid) 

4197 

4198 signal.signal(signal.SIGTERM, receiveSignal) 

4199 

4200 if inBackground: 

4201 if daemonize(): 4201 ↛ exitline 4201 didn't return from function 'gc', because the condition on line 4201 was never false

4202 # we are now running in the background. Catch & log any errors 

4203 # because there is no other way to propagate them back at this 

4204 # point 

4205 

4206 run_gc(None, srUuid, dryRun) 

4207 os._exit(0) 

4208 else: 

4209 os._exit(run_gc(session, srUuid, dryRun, immediate=True)) 

4210 

4211 

4212def start_gc(session, sr_uuid): 

4213 """ 

4214 This function is used to try to start a backgrounded GC session by forking 

4215 the current process. If using the systemd version, call start_gc_service() instead. 

4216 """ 

4217 # don't bother if an instance already running (this is just an 

4218 # optimization to reduce the overhead of forking a new process if we 

4219 # don't have to, but the process will check the lock anyways) 

4220 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid) 

4221 if not lockRunning.acquireNoblock(): 

4222 if should_preempt(session, sr_uuid): 

4223 util.SMlog("Aborting currently-running coalesce of garbage VDI") 

4224 try: 

4225 if not abort(sr_uuid, soft=True): 

4226 util.SMlog("The GC has already been scheduled to re-start") 

4227 except util.CommandException as e: 

4228 if e.code != errno.ETIMEDOUT: 

4229 raise 

4230 util.SMlog('failed to abort the GC') 

4231 else: 

4232 util.SMlog("A GC instance already running, not kicking") 

4233 return 

4234 else: 

4235 lockRunning.release() 

4236 

4237 util.SMlog(f"Starting GC file is {__file__}") 

4238 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'], 

4239 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4240 

4241def _gc_service_cmd(sr_uuid, action, extra_args=None): 

4242 """ 

4243 Build and run the systemctl command for the GC service using util.doexec. 

4244 """ 

4245 sr_uuid_esc = sr_uuid.replace("-", "\\x2d") 

4246 cmd=["/usr/bin/systemctl", "--quiet"] 

4247 if extra_args: 

4248 cmd.extend(extra_args) 

4249 cmd += [action, f"SMGC@{sr_uuid_esc}"] 

4250 return util.doexec(cmd) 

4251 

4252 

4253def start_gc_service(sr_uuid, wait=False): 

4254 """ 

4255 This starts the templated systemd service which runs GC on the given SR UUID. 

4256 If the service was already started, this is a no-op. 

4257 

4258 Because the service is a one-shot with RemainAfterExit=no, when called with 

4259 wait=True this will run the service synchronously and will not return until the 

4260 run has finished. This is used to force a run of the GC instead of just kicking it 

4261 in the background. 

4262 """ 

4263 util.SMlog(f"Kicking SMGC@{sr_uuid}...") 

4264 _gc_service_cmd(sr_uuid, "start", extra_args=None if wait else ["--no-block"]) 

4265 

4266 

4267def stop_gc_service(sr_uuid): 

4268 """ 

4269 Stops the templated systemd service which runs GC on the given SR UUID. 

4270 """ 

4271 util.SMlog(f"Stopping SMGC@{sr_uuid}...") 

4272 (rc, _stdout, stderr) = _gc_service_cmd(sr_uuid, "stop") 

4273 if rc != 0: 4273 ↛ exitline 4273 didn't return from function 'stop_gc_service', because the condition on line 4273 was never false

4274 util.SMlog(f"Failed to stop gc service `SMGC@{sr_uuid}`: `{stderr}`") 

4275 

4276 

4277def wait_for_completion(sr_uuid): 

4278 while get_state(sr_uuid): 

4279 time.sleep(5) 

4280 

4281 

4282def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

4283 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

4284 the SR lock is held. 

4285 The following algorithm is used: 

4286 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

4287 2. Scan the SR 

4288 3. GC 

4289 4. return 

4290 """ 

4291 Util.log("=== SR %s: gc_force ===" % srUuid) 

4292 init(srUuid) 

4293 sr = SR.getInstance(srUuid, session, lockSR, True) 

4294 if not lockGCActive.acquireNoblock(): 

4295 abort(srUuid) 

4296 else: 

4297 Util.log("Nothing was running, clear to proceed") 

4298 

4299 if force: 

4300 Util.log("FORCED: will continue even if there are COW image errors") 

4301 sr.scanLocked(force) 

4302 sr.cleanupCoalesceJournals() 

4303 

4304 try: 

4305 sr.cleanupCache() 

4306 sr.garbageCollect(dryRun) 

4307 finally: 

4308 sr.cleanup() 

4309 sr.logFilter.logState() 

4310 lockGCActive.release() 

4311 

4312 

4313def get_state(srUuid): 

4314 """Return whether GC/coalesce is currently running or not. This asks systemd for 

4315 the state of the templated SMGC service and will return True if it is "activating" 

4316 or "running" (for completeness, as in practice it will never achieve the latter state) 

4317 """ 

4318 sr_uuid_esc = srUuid.replace("-", "\\x2d") 

4319 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"] 

4320 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4321 state = result.stdout.decode('utf-8').rstrip() 

4322 if state == "activating" or state == "running": 

4323 return True 

4324 return False 

4325 

4326 

4327def should_preempt(session, srUuid): 

4328 sr = SR.getInstance(srUuid, session) 

4329 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

4330 if len(entries) == 0: 

4331 return False 

4332 elif len(entries) > 1: 

4333 raise util.SMException("More than one coalesce entry: " + str(entries)) 

4334 sr.scanLocked() 

4335 coalescedUuid = entries.popitem()[0] 

4336 garbage = sr.findGarbage() 

4337 for vdi in garbage: 

4338 if vdi.uuid == coalescedUuid: 

4339 return True 

4340 return False 

4341 

4342 

4343def get_coalesceable_leaves(session, srUuid, vdiUuids): 

4344 coalesceable = [] 

4345 sr = SR.getInstance(srUuid, session) 

4346 sr.scanLocked() 

4347 for uuid in vdiUuids: 

4348 vdi = sr.getVDI(uuid) 

4349 if not vdi: 

4350 raise util.SMException("VDI %s not found" % uuid) 

4351 if vdi.isLeafCoalesceable(): 

4352 coalesceable.append(uuid) 

4353 return coalesceable 

4354 

4355 

4356def cache_cleanup(session, srUuid, maxAge): 

4357 sr = SR.getInstance(srUuid, session) 

4358 return sr.cleanupCache(maxAge) 

4359 

4360 

4361def debug(sr_uuid, cmd, vdi_uuid): 

4362 Util.log("Debug command: %s" % cmd) 

4363 sr = SR.getInstance(sr_uuid, None) 

4364 if not isinstance(sr, LVMSR): 

4365 print("Error: not an LVHD SR") 

4366 return 

4367 sr.scanLocked() 

4368 vdi = sr.getVDI(vdi_uuid) 

4369 if not vdi: 

4370 print("Error: VDI %s not found") 

4371 return 

4372 print("Running %s on SR %s" % (cmd, sr)) 

4373 print("VDI before: %s" % vdi) 

4374 if cmd == "activate": 

4375 vdi._activate() 

4376 print("VDI file: %s" % vdi.path) 

4377 if cmd == "deactivate": 

4378 ns = NS_PREFIX_LVM + sr.uuid 

4379 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

4380 if cmd == "inflate": 

4381 vdi.inflateFully() 

4382 sr.cleanup() 

4383 if cmd == "deflate": 

4384 vdi.deflate() 

4385 sr.cleanup() 

4386 sr.scanLocked() 

4387 print("VDI after: %s" % vdi) 

4388 

4389 

4390def abort_optional_reenable(uuid): 

4391 print("Disabling GC/coalesce for %s" % uuid) 

4392 ret = _abort(uuid) 

4393 input("Press enter to re-enable...") 

4394 print("GC/coalesce re-enabled") 

4395 lockGCRunning.release() 

4396 if ret: 

4397 lockGCActive.release() 

4398 

4399 

4400############################################################################## 

4401# 

4402# CLI 

4403# 

4404def main(): 

4405 action = "" 

4406 maxAge = 0 

4407 uuid = "" 

4408 background = False 

4409 force = False 

4410 dryRun = False 

4411 debug_cmd = "" 

4412 vdi_uuid = "" 

4413 shortArgs = "gGc:aqxu:bfdt:v:" 

4414 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

4415 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

4416 

4417 try: 

4418 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

4419 except getopt.GetoptError: 

4420 usage() 

4421 for o, a in opts: 

4422 if o in ("-g", "--gc"): 

4423 action = "gc" 

4424 if o in ("-G", "--gc_force"): 

4425 action = "gc_force" 

4426 if o in ("-c", "--clean_cache"): 

4427 action = "clean_cache" 

4428 maxAge = int(a) 

4429 if o in ("-a", "--abort"): 

4430 action = "abort" 

4431 if o in ("-q", "--query"): 

4432 action = "query" 

4433 if o in ("-x", "--disable"): 

4434 action = "disable" 

4435 if o in ("-u", "--uuid"): 

4436 uuid = a 

4437 if o in ("-b", "--background"): 

4438 background = True 

4439 if o in ("-f", "--force"): 

4440 force = True 

4441 if o in ("-d", "--dry-run"): 

4442 Util.log("Dry run mode") 

4443 dryRun = True 

4444 if o in ("-t", "--debug"): 

4445 action = "debug" 

4446 debug_cmd = a 

4447 if o in ("-v", "--vdi_uuid"): 

4448 vdi_uuid = a 

4449 

4450 if not action or not uuid: 

4451 usage() 

4452 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

4453 action != "debug" and (debug_cmd or vdi_uuid): 

4454 usage() 

4455 

4456 if action != "query" and action != "debug": 

4457 print("All output goes to log") 

4458 

4459 if action == "gc": 

4460 gc(None, uuid, background, dryRun) 

4461 elif action == "gc_force": 

4462 gc_force(None, uuid, force, dryRun, True) 

4463 elif action == "clean_cache": 

4464 cache_cleanup(None, uuid, maxAge) 

4465 elif action == "abort": 

4466 abort(uuid) 

4467 elif action == "query": 

4468 print("Currently running: %s" % get_state(uuid)) 

4469 elif action == "disable": 

4470 abort_optional_reenable(uuid) 

4471 elif action == "debug": 

4472 debug(uuid, debug_cmd, vdi_uuid) 

4473 

4474 

4475if __name__ == '__main__': 4475 ↛ 4476line 4475 didn't jump to line 4476, because the condition on line 4475 was never true

4476 main()