Coverage for drivers/cleanup.py : 34%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/python3
2#
3# Copyright (C) Citrix Systems Inc.
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; version 2.1 only.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public License
15# along with this program; if not, write to the Free Software Foundation, Inc.,
16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17#
18# Script to coalesce and garbage collect COW-based SR's in the background
19#
21from sm_typing import Any, Optional, List, override
23import os
24import os.path
25import sys
26import time
27import signal
28import subprocess
29import getopt
30import datetime
31import traceback
32import base64
33import zlib
34import errno
35import stat
37import XenAPI # pylint: disable=import-error
38import util
39import lvutil
40import lvmcache
41import journaler
42import fjournaler
43import lock
44import blktap2
45import xs_errors
46from refcounter import RefCounter
47from ipc import IPCFlag
48from lvmanager import LVActivator
49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG
50from functools import reduce
51from time import monotonic as _time
53from constants import NS_PREFIX_LVM, VG_LOCATION, VG_PREFIX
54from cowutil import CowImageInfo, CowUtil, getCowUtil
55from lvmcowutil import LV_PREFIX, LvmCowUtil
56from vditype import VdiType, VdiTypeExtension, VDI_COW_TYPES, VDI_TYPE_TO_EXTENSION
58try:
59 from linstorcowutil import LinstorCowUtil
60 from linstorjournaler import LinstorJournaler
61 from linstorvolumemanager import get_controller_uri
62 from linstorvolumemanager import LinstorVolumeManager
63 from linstorvolumemanager import LinstorVolumeManagerError
64 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX
66 LINSTOR_AVAILABLE = True
67except ImportError:
68 LINSTOR_AVAILABLE = False
70# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not
71# possible due to lvhd_stop_using_() not working correctly. However, we leave
72# this option available through the explicit LEAFCLSC_FORCE flag in the VDI
73# record for use by the offline tool (which makes the operation safe by pausing
74# the VM first)
75AUTO_ONLINE_LEAF_COALESCE_ENABLED = True
77FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce
79# process "lock", used simply as an indicator that a process already exists
80# that is doing GC/coalesce on this SR (such a process holds the lock, and we
81# check for the fact by trying the lock).
82lockGCRunning = None
84# process "lock" to indicate that the GC process has been activated but may not
85# yet be running, stops a second process from being started.
86LOCK_TYPE_GC_ACTIVE = "gc_active"
87lockGCActive = None
89# Default coalesce error rate limit, in messages per minute. A zero value
90# disables throttling, and a negative value disables error reporting.
91DEFAULT_COALESCE_ERR_RATE = 1.0 / 60
93COALESCE_LAST_ERR_TAG = 'last-coalesce-error'
94COALESCE_ERR_RATE_TAG = 'coalesce-error-rate'
95VAR_RUN = "/var/run/"
96SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log"
98N_RUNNING_AVERAGE = 10
100NON_PERSISTENT_DIR = '/run/nonpersistent/sm'
102# Signal Handler
103SIGTERM = False
106class AbortException(util.SMException):
107 pass
109class CancelException(util.SMException):
110 pass
112def receiveSignal(signalNumber, frame):
113 global SIGTERM
115 util.SMlog("GC: recieved SIGTERM")
116 SIGTERM = True
117 return
120################################################################################
121#
122# Util
123#
124class Util:
125 RET_RC = 1
126 RET_STDOUT = 2
127 RET_STDERR = 4
129 UUID_LEN = 36
131 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024}
133 @staticmethod
134 def log(text) -> None:
135 util.SMlog(text, ident="SMGC")
137 @staticmethod
138 def logException(tag):
139 info = sys.exc_info()
140 if info[0] == SystemExit: 140 ↛ 142line 140 didn't jump to line 142, because the condition on line 140 was never true
141 # this should not be happening when catching "Exception", but it is
142 sys.exit(0)
143 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2]))
144 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
145 Util.log(" ***********************")
146 Util.log(" * E X C E P T I O N *")
147 Util.log(" ***********************")
148 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1]))
149 Util.log(tb)
150 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
152 @staticmethod
153 def doexec(args, expectedRC, inputtext=None, ret=None, log=True):
154 "Execute a subprocess, then return its return code, stdout, stderr"
155 proc = subprocess.Popen(args,
156 stdin=subprocess.PIPE, \
157 stdout=subprocess.PIPE, \
158 stderr=subprocess.PIPE, \
159 shell=True, \
160 close_fds=True)
161 (stdout, stderr) = proc.communicate(inputtext)
162 stdout = str(stdout)
163 stderr = str(stderr)
164 rc = proc.returncode
165 if log:
166 Util.log("`%s`: %s" % (args, rc))
167 if type(expectedRC) != type([]):
168 expectedRC = [expectedRC]
169 if not rc in expectedRC:
170 reason = stderr.strip()
171 if stdout.strip():
172 reason = "%s (stdout: %s)" % (reason, stdout.strip())
173 Util.log("Failed: %s" % reason)
174 raise util.CommandException(rc, args, reason)
176 if ret == Util.RET_RC:
177 return rc
178 if ret == Util.RET_STDERR:
179 return stderr
180 return stdout
182 @staticmethod
183 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut, prefSig=signal.SIGKILL):
184 """execute func in a separate thread and kill it if abortTest signals
185 so"""
186 abortSignaled = abortTest() # check now before we clear resultFlag
187 resultFlag = IPCFlag(ns)
188 resultFlag.clearAll()
189 pid = os.fork()
190 if pid:
191 startTime = _time()
192 try:
193 while True:
194 if resultFlag.test("success"):
195 Util.log(" Child process completed successfully")
196 resultFlag.clear("success")
197 return
198 if resultFlag.test("failure"):
199 resultFlag.clear("failure")
200 raise util.SMException("Child process exited with error")
201 if abortTest() or abortSignaled or SIGTERM:
202 os.killpg(pid, prefSig)
203 raise AbortException("Aborting due to signal")
204 if timeOut and _time() - startTime > timeOut:
205 os.killpg(pid, prefSig)
206 resultFlag.clearAll()
207 raise util.SMException("Timed out")
208 time.sleep(pollInterval)
209 finally:
210 wait_pid = 0
211 rc = -1
212 count = 0
213 while wait_pid == 0 and count < 10:
214 wait_pid, rc = os.waitpid(pid, os.WNOHANG)
215 if wait_pid == 0:
216 time.sleep(2)
217 count += 1
219 if wait_pid == 0:
220 Util.log("runAbortable: wait for process completion timed out")
221 else:
222 os.setpgrp()
223 try:
224 if func() == ret:
225 resultFlag.set("success")
226 else:
227 resultFlag.set("failure")
228 except Exception as e:
229 Util.log("Child process failed with : (%s)" % e)
230 resultFlag.set("failure")
231 Util.logException("This exception has occured")
232 os._exit(0)
234 @staticmethod
235 def num2str(number):
236 for prefix in ("G", "M", "K"): 236 ↛ 239line 236 didn't jump to line 239, because the loop on line 236 didn't complete
237 if number >= Util.PREFIX[prefix]:
238 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix)
239 return "%s" % number
241 @staticmethod
242 def numBits(val):
243 count = 0
244 while val:
245 count += val & 1
246 val = val >> 1
247 return count
249 @staticmethod
250 def countBits(bitmap1, bitmap2):
251 """return bit count in the bitmap produced by ORing the two bitmaps"""
252 len1 = len(bitmap1)
253 len2 = len(bitmap2)
254 lenLong = len1
255 lenShort = len2
256 bitmapLong = bitmap1
257 if len2 > len1:
258 lenLong = len2
259 lenShort = len1
260 bitmapLong = bitmap2
262 count = 0
263 for i in range(lenShort):
264 val = bitmap1[i] | bitmap2[i]
265 count += Util.numBits(val)
267 for i in range(i + 1, lenLong):
268 val = bitmapLong[i]
269 count += Util.numBits(val)
270 return count
272 @staticmethod
273 def getThisScript():
274 thisScript = util.get_real_path(__file__)
275 if thisScript.endswith(".pyc"):
276 thisScript = thisScript[:-1]
277 return thisScript
280################################################################################
281#
282# XAPI
283#
284class XAPI:
285 USER = "root"
286 PLUGIN_ON_SLAVE = "on-slave"
288 CONFIG_SM = 0
289 CONFIG_OTHER = 1
290 CONFIG_ON_BOOT = 2
291 CONFIG_ALLOW_CACHING = 3
293 CONFIG_NAME = {
294 CONFIG_SM: "sm-config",
295 CONFIG_OTHER: "other-config",
296 CONFIG_ON_BOOT: "on-boot",
297 CONFIG_ALLOW_CACHING: "allow_caching"
298 }
300 class LookupError(util.SMException):
301 pass
303 @staticmethod
304 def getSession():
305 session = XenAPI.xapi_local()
306 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM')
307 return session
309 def __init__(self, session, srUuid):
310 self.sessionPrivate = False
311 self.session = session
312 if self.session is None:
313 self.session = self.getSession()
314 self.sessionPrivate = True
315 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid)
316 self.srRecord = self.session.xenapi.SR.get_record(self._srRef)
317 self.hostUuid = util.get_this_host()
318 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid)
319 self.task = None
320 self.task_progress = {"coalescable": 0, "done": 0}
322 def __del__(self):
323 if self.sessionPrivate:
324 self.session.xenapi.session.logout()
326 @property
327 def srRef(self):
328 return self._srRef
330 def isPluggedHere(self):
331 pbds = self.getAttachedPBDs()
332 for pbdRec in pbds:
333 if pbdRec["host"] == self._hostRef:
334 return True
335 return False
337 def poolOK(self):
338 host_recs = self.session.xenapi.host.get_all_records()
339 for host_ref, host_rec in host_recs.items():
340 if not host_rec["enabled"]:
341 Util.log("Host %s not enabled" % host_rec["uuid"])
342 return False
343 return True
345 def isMaster(self):
346 if self.srRecord["shared"]:
347 pool = list(self.session.xenapi.pool.get_all_records().values())[0]
348 return pool["master"] == self._hostRef
349 else:
350 pbds = self.getAttachedPBDs()
351 if len(pbds) < 1:
352 raise util.SMException("Local SR not attached")
353 elif len(pbds) > 1:
354 raise util.SMException("Local SR multiply attached")
355 return pbds[0]["host"] == self._hostRef
357 def getAttachedPBDs(self):
358 """Return PBD records for all PBDs of this SR that are currently
359 attached"""
360 attachedPBDs = []
361 pbds = self.session.xenapi.PBD.get_all_records()
362 for pbdRec in pbds.values():
363 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]:
364 attachedPBDs.append(pbdRec)
365 return attachedPBDs
367 def getOnlineHosts(self):
368 return util.get_online_hosts(self.session)
370 def ensureInactive(self, hostRef, args):
371 text = self.session.xenapi.host.call_plugin( \
372 hostRef, self.PLUGIN_ON_SLAVE, "multi", args)
373 Util.log("call-plugin returned: '%s'" % text)
375 def getRecordHost(self, hostRef):
376 return self.session.xenapi.host.get_record(hostRef)
378 def _getRefVDI(self, uuid):
379 return self.session.xenapi.VDI.get_by_uuid(uuid)
381 def getRefVDI(self, vdi):
382 return self._getRefVDI(vdi.uuid)
384 def getRecordVDI(self, uuid):
385 try:
386 ref = self._getRefVDI(uuid)
387 return self.session.xenapi.VDI.get_record(ref)
388 except XenAPI.Failure:
389 return None
391 def singleSnapshotVDI(self, vdi):
392 return self.session.xenapi.VDI.snapshot(vdi.getRef(),
393 {"type": "internal"})
395 def forgetVDI(self, srUuid, vdiUuid):
396 """Forget the VDI, but handle the case where the VDI has already been
397 forgotten (i.e. ignore errors)"""
398 try:
399 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid)
400 self.session.xenapi.VDI.forget(vdiRef)
401 except XenAPI.Failure:
402 pass
404 def getConfigVDI(self, vdi, key):
405 kind = vdi.CONFIG_TYPE[key]
406 if kind == self.CONFIG_SM:
407 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef())
408 elif kind == self.CONFIG_OTHER:
409 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef())
410 elif kind == self.CONFIG_ON_BOOT:
411 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef())
412 elif kind == self.CONFIG_ALLOW_CACHING:
413 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef())
414 else:
415 assert(False)
416 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg)))
417 return cfg
419 def removeFromConfigVDI(self, vdi, key):
420 kind = vdi.CONFIG_TYPE[key]
421 if kind == self.CONFIG_SM:
422 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key)
423 elif kind == self.CONFIG_OTHER:
424 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key)
425 else:
426 assert(False)
428 def addToConfigVDI(self, vdi, key, val):
429 kind = vdi.CONFIG_TYPE[key]
430 if kind == self.CONFIG_SM:
431 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val)
432 elif kind == self.CONFIG_OTHER:
433 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val)
434 else:
435 assert(False)
437 def isSnapshot(self, vdi):
438 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef())
440 def markCacheSRsDirty(self):
441 sr_refs = self.session.xenapi.SR.get_all_records_where( \
442 'field "local_cache_enabled" = "true"')
443 for sr_ref in sr_refs:
444 Util.log("Marking SR %s dirty" % sr_ref)
445 util.set_dirty(self.session, sr_ref)
447 def srUpdate(self):
448 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"])
449 abortFlag = IPCFlag(self.srRecord["uuid"])
450 task = self.session.xenapi.Async.SR.update(self._srRef)
451 cancelTask = True
452 try:
453 for i in range(60):
454 status = self.session.xenapi.task.get_status(task)
455 if not status == "pending":
456 Util.log("SR.update_asynch status changed to [%s]" % status)
457 cancelTask = False
458 return
459 if abortFlag.test(FLAG_TYPE_ABORT):
460 Util.log("Abort signalled during srUpdate, cancelling task...")
461 try:
462 self.session.xenapi.task.cancel(task)
463 cancelTask = False
464 Util.log("Task cancelled")
465 except:
466 pass
467 return
468 time.sleep(1)
469 finally:
470 if cancelTask:
471 self.session.xenapi.task.cancel(task)
472 self.session.xenapi.task.destroy(task)
473 Util.log("Asynch srUpdate still running, but timeout exceeded.")
475 def update_task(self):
476 self.session.xenapi.task.set_other_config(
477 self.task,
478 {
479 "applies_to": self._srRef
480 })
481 total = self.task_progress['coalescable'] + self.task_progress['done']
482 if (total > 0):
483 self.session.xenapi.task.set_progress(
484 self.task, float(self.task_progress['done']) / total)
486 def create_task(self, label, description):
487 self.task = self.session.xenapi.task.create(label, description)
488 self.update_task()
490 def update_task_progress(self, key, value):
491 self.task_progress[key] = value
492 if self.task:
493 self.update_task()
495 def set_task_status(self, status):
496 if self.task:
497 self.session.xenapi.task.set_status(self.task, status)
500################################################################################
501#
502# VDI
503#
504class VDI(object):
505 """Object representing a VDI of a COW-based SR"""
507 POLL_INTERVAL = 1
508 POLL_TIMEOUT = 30
509 DEVICE_MAJOR = 202
511 # config keys & values
512 DB_VDI_PARENT = "vhd-parent"
513 DB_VDI_TYPE = "vdi_type"
514 DB_VDI_BLOCKS = "vhd-blocks"
515 DB_VDI_PAUSED = "paused"
516 DB_VDI_RELINKING = "relinking"
517 DB_VDI_ACTIVATING = "activating"
518 DB_GC = "gc"
519 DB_COALESCE = "coalesce"
520 DB_LEAFCLSC = "leaf-coalesce" # config key
521 DB_GC_NO_SPACE = "gc_no_space"
522 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce
523 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce
524 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means
525 # no space to snap-coalesce or unable to keep
526 # up with VDI. This is not used by the SM, it
527 # might be used by external components.
528 DB_ONBOOT = "on-boot"
529 ONBOOT_RESET = "reset"
530 DB_ALLOW_CACHING = "allow_caching"
532 CONFIG_TYPE = {
533 DB_VDI_PARENT: XAPI.CONFIG_SM,
534 DB_VDI_TYPE: XAPI.CONFIG_SM,
535 DB_VDI_BLOCKS: XAPI.CONFIG_SM,
536 DB_VDI_PAUSED: XAPI.CONFIG_SM,
537 DB_VDI_RELINKING: XAPI.CONFIG_SM,
538 DB_VDI_ACTIVATING: XAPI.CONFIG_SM,
539 DB_GC: XAPI.CONFIG_OTHER,
540 DB_COALESCE: XAPI.CONFIG_OTHER,
541 DB_LEAFCLSC: XAPI.CONFIG_OTHER,
542 DB_ONBOOT: XAPI.CONFIG_ON_BOOT,
543 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING,
544 DB_GC_NO_SPACE: XAPI.CONFIG_SM
545 }
547 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes
548 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds
549 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating
550 # feasibility of leaf coalesce
552 JRN_RELINK = "relink" # journal entry type for relinking children
553 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced
554 JRN_LEAF = "leaf" # used in coalesce-leaf
556 STR_TREE_INDENT = 4
558 def __init__(self, sr, uuid, vdi_type):
559 self.sr = sr
560 self.scanError = True
561 self.uuid = uuid
562 self.vdi_type = vdi_type
563 self.fileName = ""
564 self.parentUuid = ""
565 self.sizeVirt = -1
566 self._sizePhys = -1
567 self._sizeAllocated = -1
568 self._hidden = False
569 self.parent = None
570 self.children = []
571 self._vdiRef = None
572 self.cowutil = getCowUtil(vdi_type)
573 self._clearRef()
575 @staticmethod
576 def extractUuid(path):
577 raise NotImplementedError("Implement in sub class")
579 def load(self, info=None) -> None:
580 """Load VDI info"""
581 pass
583 def getDriverName(self) -> str:
584 return self.vdi_type
586 def getRef(self):
587 if self._vdiRef is None:
588 self._vdiRef = self.sr.xapi.getRefVDI(self)
589 return self._vdiRef
591 def getConfig(self, key, default=None):
592 config = self.sr.xapi.getConfigVDI(self, key)
593 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 593 ↛ 594line 593 didn't jump to line 594, because the condition on line 593 was never true
594 val = config
595 else:
596 val = config.get(key)
597 if val:
598 return val
599 return default
601 def setConfig(self, key, val):
602 self.sr.xapi.removeFromConfigVDI(self, key)
603 self.sr.xapi.addToConfigVDI(self, key, val)
604 Util.log("Set %s = %s for %s" % (key, val, self))
606 def delConfig(self, key):
607 self.sr.xapi.removeFromConfigVDI(self, key)
608 Util.log("Removed %s from %s" % (key, self))
610 def ensureUnpaused(self):
611 if self.getConfig(self.DB_VDI_PAUSED) == "true":
612 Util.log("Unpausing VDI %s" % self)
613 self.unpause()
615 def pause(self, failfast=False) -> None:
616 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid,
617 self.uuid, failfast):
618 raise util.SMException("Failed to pause VDI %s" % self)
620 def _report_tapdisk_unpause_error(self):
621 try:
622 xapi = self.sr.xapi.session.xenapi
623 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid)
624 msg_name = "failed to unpause tapdisk"
625 msg_body = "Failed to unpause tapdisk for VDI %s, " \
626 "VMs using this tapdisk have lost access " \
627 "to the corresponding disk(s)" % self.uuid
628 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body)
629 except Exception as e:
630 util.SMlog("failed to generate message: %s" % e)
632 def unpause(self):
633 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid,
634 self.uuid):
635 self._report_tapdisk_unpause_error()
636 raise util.SMException("Failed to unpause VDI %s" % self)
638 def refresh(self, ignoreNonexistent=True):
639 """Pause-unpause in one step"""
640 self.sr.lock()
641 try:
642 try:
643 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 643 ↛ 645line 643 didn't jump to line 645, because the condition on line 643 was never true
644 self.sr.uuid, self.uuid):
645 self._report_tapdisk_unpause_error()
646 raise util.SMException("Failed to refresh %s" % self)
647 except XenAPI.Failure as e:
648 if util.isInvalidVDI(e) and ignoreNonexistent:
649 Util.log("VDI %s not found, ignoring" % self)
650 return
651 raise
652 finally:
653 self.sr.unlock()
655 def isSnapshot(self):
656 return self.sr.xapi.isSnapshot(self)
658 def isAttachedRW(self):
659 return util.is_attached_rw(
660 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef()))
662 def getVDIBlocks(self):
663 val = self.updateBlockInfo()
664 bitmap = zlib.decompress(base64.b64decode(val))
665 return bitmap
667 def isCoalesceable(self):
668 """A VDI is coalesceable if it has no siblings and is not a leaf"""
669 return not self.scanError and \
670 self.parent and \
671 len(self.parent.children) == 1 and \
672 self.isHidden() and \
673 len(self.children) > 0
675 def isLeafCoalesceable(self):
676 """A VDI is leaf-coalesceable if it has no siblings and is a leaf"""
677 return not self.scanError and \
678 self.parent and \
679 len(self.parent.children) == 1 and \
680 not self.isHidden() and \
681 len(self.children) == 0
683 def canLiveCoalesce(self, speed):
684 """Can we stop-and-leaf-coalesce this VDI? The VDI must be
685 isLeafCoalesceable() already"""
686 feasibleSize = False
687 allowedDownTime = \
688 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT
689 allocated_size = self.getAllocatedSize()
690 if speed:
691 feasibleSize = \
692 allocated_size // speed < allowedDownTime
693 else:
694 feasibleSize = \
695 allocated_size < self.LIVE_LEAF_COALESCE_MAX_SIZE
697 return (feasibleSize or
698 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE)
700 def getAllPrunable(self):
701 if len(self.children) == 0: # base case
702 # it is possible to have a hidden leaf that was recently coalesced
703 # onto its parent, its children already relinked but not yet
704 # reloaded - in which case it may not be garbage collected yet:
705 # some tapdisks could still be using the file.
706 if self.sr.journaler.get(self.JRN_RELINK, self.uuid):
707 return []
708 if not self.scanError and self.isHidden():
709 return [self]
710 return []
712 thisPrunable = True
713 vdiList = []
714 for child in self.children:
715 childList = child.getAllPrunable()
716 vdiList.extend(childList)
717 if child not in childList:
718 thisPrunable = False
720 # We can destroy the current VDI if all childs are hidden BUT the
721 # current VDI must be hidden too to do that!
722 # Example in this case (after a failed live leaf coalesce):
723 #
724 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees):
725 # SMGC: [32436] b5458d61(1.000G/4.127M)
726 # SMGC: [32436] *OLD_b545(1.000G/4.129M)
727 #
728 # OLD_b545 is hidden and must be removed, but b5458d61 not.
729 # Normally we are not in this function when the delete action is
730 # executed but in `_liveLeafCoalesce`.
732 if not self.scanError and not self.isHidden() and thisPrunable:
733 vdiList.append(self)
734 return vdiList
736 def getSizePhys(self) -> int:
737 return self._sizePhys
739 def getAllocatedSize(self) -> int:
740 return self._sizeAllocated
742 def getTreeRoot(self):
743 "Get the root of the tree that self belongs to"
744 root = self
745 while root.parent:
746 root = root.parent
747 return root
749 def getTreeHeight(self):
750 "Get the height of the subtree rooted at self"
751 if len(self.children) == 0:
752 return 1
754 maxChildHeight = 0
755 for child in self.children:
756 childHeight = child.getTreeHeight()
757 if childHeight > maxChildHeight:
758 maxChildHeight = childHeight
760 return maxChildHeight + 1
762 def getAllLeaves(self) -> List["VDI"]:
763 "Get all leaf nodes in the subtree rooted at self"
764 if len(self.children) == 0:
765 return [self]
767 leaves = []
768 for child in self.children:
769 leaves.extend(child.getAllLeaves())
770 return leaves
772 def updateBlockInfo(self) -> Optional[str]:
773 val = base64.b64encode(self._queryCowBlocks()).decode()
774 try:
775 self.setConfig(VDI.DB_VDI_BLOCKS, val)
776 except Exception:
777 if self.vdi_type != VdiType.QCOW2:
778 raise
779 # Sometime with QCOW2, our allocation table is too big to be stored in XAPI, in this case we do not store it
780 # and we write `skipped` instead so that hasWork is happy (and the GC doesn't run in loop indefinitely).
781 self.setConfig(VDI.DB_VDI_BLOCKS, "skipped")
783 return val
785 def rename(self, uuid) -> None:
786 "Rename the VDI file"
787 assert(not self.sr.vdis.get(uuid))
788 self._clearRef()
789 oldUuid = self.uuid
790 self.uuid = uuid
791 self.children = []
792 # updating the children themselves is the responsibility of the caller
793 del self.sr.vdis[oldUuid]
794 self.sr.vdis[self.uuid] = self
796 def delete(self) -> None:
797 "Physically delete the VDI"
798 lock.Lock.cleanup(self.uuid, NS_PREFIX_LVM + self.sr.uuid)
799 lock.Lock.cleanupAll(self.uuid)
800 self._clear()
802 def getParent(self) -> str:
803 return self.cowutil.getParent(self.path, lambda x: x.strip()) 803 ↛ exitline 803 didn't run the lambda on line 803
805 def repair(self, parent) -> None:
806 self.cowutil.repair(parent)
808 @override
809 def __str__(self) -> str:
810 strHidden = ""
811 if self.isHidden(): 811 ↛ 812line 811 didn't jump to line 812, because the condition on line 811 was never true
812 strHidden = "*"
813 strSizeVirt = "?"
814 if self.sizeVirt > 0: 814 ↛ 815line 814 didn't jump to line 815, because the condition on line 814 was never true
815 strSizeVirt = Util.num2str(self.sizeVirt)
816 strSizePhys = "?"
817 if self._sizePhys > 0: 817 ↛ 818line 817 didn't jump to line 818, because the condition on line 817 was never true
818 strSizePhys = "/%s" % Util.num2str(self._sizePhys)
819 strSizeAllocated = "?"
820 if self._sizeAllocated >= 0:
821 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated)
822 strType = "[{}]".format(self.vdi_type)
824 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt,
825 strSizePhys, strSizeAllocated, strType)
827 def validate(self, fast=False) -> None:
828 if self.cowutil.check(self.path, fast=fast) != CowUtil.CheckResult.Success: 828 ↛ 829line 828 didn't jump to line 829, because the condition on line 828 was never true
829 raise util.SMException("COW image %s corrupted" % self)
831 def _clear(self):
832 self.uuid = ""
833 self.path = ""
834 self.parentUuid = ""
835 self.parent = None
836 self._clearRef()
838 def _clearRef(self):
839 self._vdiRef = None
841 @staticmethod
842 def _cancel_exception(sig, frame):
843 raise CancelException()
845 def _call_plugin_coalesce(self, hostRef):
846 signal.signal(signal.SIGTERM, self._cancel_exception)
847 args = {"path": self.path, "vdi_type": self.vdi_type}
848 util.SMlog("DAMS: Calling remote coalesce with: {}".format(args))
849 try:
850 ret = self.sr.xapi.session.xenapi.host.call_plugin( \
851 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_tapdisk", args)
852 util.SMlog("DAMS: Remote coalesce returned {}".format(ret))
853 except CancelException:
854 util.SMlog(f"DAMS: Cancelling online coalesce following signal {args}")
855 self.sr.xapi.session.xenapi.host.call_plugin( \
856 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_cancel", args)
857 except Exception:
858 raise
860 def _doCoalesceOnHost(self, hostRef):
861 self.validate()
862 self.parent.validate(True)
863 self.parent._increaseSizeVirt(self.sizeVirt)
864 self.sr._updateSlavesOnResize(self.parent)
865 #TODO: We might need to make the LV RW on the slave directly for coalesce?
866 # Children and parent need to be RW for QCOW2 coalesce, otherwise tapdisk(libqcow) will crash trying to access them
868 def abortTest():
869 file = self.sr._gc_running_file(self)
870 try:
871 with open(file, "r") as f:
872 if not f.read():
873 util.SMlog("DAMS: abortTest: Cancelling coalesce")
874 return True
875 except OSError as e:
876 if e.errno == errno.ENOENT:
877 util.SMlog("File {} does not exist".format(file))
878 else:
879 util.SMlog("IOError: {}".format(e))
880 return True
881 return False
883 Util.runAbortable(lambda: self._call_plugin_coalesce(hostRef), \
884 None, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0, prefSig=signal.SIGTERM)
886 self.parent.validate(True)
887 #self._verifyContents(0)
888 self.parent.updateBlockInfo()
890 def _isOpenOnHosts(self) -> Optional[str]:
891 for pbdRecord in self.sr.xapi.getAttachedPBDs():
892 hostRef = pbdRecord["host"]
893 args = {"path": self.path}
894 is_openers = util.strtobool(self.sr.xapi.session.xenapi.host.call_plugin( \
895 hostRef, XAPI.PLUGIN_ON_SLAVE, "is_openers", args))
896 if is_openers:
897 return hostRef
898 return None
900 def _doCoalesce(self) -> None:
901 """Coalesce self onto parent. Only perform the actual coalescing of
902 an image, but not the subsequent relinking. We'll do that as the next step,
903 after reloading the entire SR in case things have changed while we
904 were coalescing"""
905 self.validate()
906 self.parent.validate(True)
907 self.parent._increaseSizeVirt(self.sizeVirt)
908 self.sr._updateSlavesOnResize(self.parent)
909 self._coalesceCowImage(0)
910 self.parent.validate(True)
911 #self._verifyContents(0)
912 self.parent.updateBlockInfo()
914 def _verifyContents(self, timeOut):
915 Util.log(" Coalesce verification on %s" % self)
916 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
917 Util.runAbortable(lambda: self._runTapdiskDiff(), True,
918 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut)
919 Util.log(" Coalesce verification succeeded")
921 def _runTapdiskDiff(self):
922 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \
923 (self.getDriverName(), self.path, \
924 self.parent.getDriverName(), self.parent.path)
925 Util.doexec(cmd, 0)
926 return True
928 @staticmethod
929 def _reportCoalesceError(vdi, ce):
930 """Reports a coalesce error to XenCenter.
932 vdi: the VDI object on which the coalesce error occured
933 ce: the CommandException that was raised"""
935 msg_name = os.strerror(ce.code)
936 if ce.code == errno.ENOSPC:
937 # TODO We could add more information here, e.g. exactly how much
938 # space is required for the particular coalesce, as well as actions
939 # to be taken by the user and consequences of not taking these
940 # actions.
941 msg_body = 'Run out of space while coalescing.'
942 elif ce.code == errno.EIO:
943 msg_body = 'I/O error while coalescing.'
944 else:
945 msg_body = ''
946 util.SMlog('Coalesce failed on SR %s: %s (%s)'
947 % (vdi.sr.uuid, msg_name, msg_body))
949 # Create a XenCenter message, but don't spam.
950 xapi = vdi.sr.xapi.session.xenapi
951 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid)
952 oth_cfg = xapi.SR.get_other_config(sr_ref)
953 if COALESCE_ERR_RATE_TAG in oth_cfg:
954 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG])
955 else:
956 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE
958 xcmsg = False
959 if coalesce_err_rate == 0:
960 xcmsg = True
961 elif coalesce_err_rate > 0:
962 now = datetime.datetime.now()
963 sm_cfg = xapi.SR.get_sm_config(sr_ref)
964 if COALESCE_LAST_ERR_TAG in sm_cfg:
965 # seconds per message (minimum distance in time between two
966 # messages in seconds)
967 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60)
968 last = datetime.datetime.fromtimestamp(
969 float(sm_cfg[COALESCE_LAST_ERR_TAG]))
970 if now - last >= spm:
971 xapi.SR.remove_from_sm_config(sr_ref,
972 COALESCE_LAST_ERR_TAG)
973 xcmsg = True
974 else:
975 xcmsg = True
976 if xcmsg:
977 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG,
978 str(now.strftime('%s')))
979 if xcmsg:
980 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body)
982 def coalesce(self) -> int:
983 return self.cowutil.coalesce(self.path)
985 @staticmethod
986 def _doCoalesceCowImage(vdi: "VDI"):
987 try:
988 startTime = time.time()
989 allocated_size = vdi.getAllocatedSize()
990 coalesced_size = vdi.coalesce()
991 endTime = time.time()
992 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size)
993 except util.CommandException as ce:
994 # We use try/except for the following piece of code because it runs
995 # in a separate process context and errors will not be caught and
996 # reported by anyone.
997 try:
998 # Report coalesce errors back to user via XC
999 VDI._reportCoalesceError(vdi, ce)
1000 except Exception as e:
1001 util.SMlog('failed to create XenCenter message: %s' % e)
1002 raise ce
1003 except:
1004 raise
1006 def _vdi_is_raw(self, vdi_path):
1007 """
1008 Given path to vdi determine if it is raw
1009 """
1010 uuid = self.extractUuid(vdi_path)
1011 return self.sr.vdis[uuid].vdi_type == VdiType.RAW
1013 def _coalesceCowImage(self, timeOut):
1014 Util.log(" Running COW coalesce on %s" % self)
1015 def abortTest():
1016 if self.cowutil.isCoalesceableOnRemote():
1017 file = self.sr._gc_running_file(self)
1018 try:
1019 with open(file, "r") as f:
1020 if not f.read():
1021 return True
1022 except OSError as e:
1023 if e.errno == errno.ENOENT:
1024 util.SMlog("File {} does not exist".format(file))
1025 else:
1026 util.SMlog("IOError: {}".format(e))
1027 return True
1028 return IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1030 try:
1031 util.fistpoint.activate_custom_fn(
1032 "cleanup_coalesceVHD_inject_failure",
1033 util.inject_failure)
1034 Util.runAbortable(lambda: VDI._doCoalesceCowImage(self), None,
1035 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut)
1036 except:
1037 # Exception at this phase could indicate a failure in COW coalesce
1038 # or a kill of COW coalesce by runAbortable due to timeOut
1039 # Try a repair and reraise the exception
1040 parent = ""
1041 try:
1042 parent = self.getParent()
1043 if not self._vdi_is_raw(parent):
1044 # Repair error is logged and ignored. Error reraised later
1045 util.SMlog('Coalesce failed on %s, attempting repair on ' \
1046 'parent %s' % (self.uuid, parent))
1047 self.repair(parent)
1048 except Exception as e:
1049 util.SMlog('(error ignored) Failed to repair parent %s ' \
1050 'after failed coalesce on %s, err: %s' %
1051 (parent, self.path, e))
1052 raise
1054 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid)
1056 def _relinkSkip(self) -> None:
1057 """Relink children of this VDI to point to the parent of this VDI"""
1058 abortFlag = IPCFlag(self.sr.uuid)
1059 for child in self.children:
1060 if abortFlag.test(FLAG_TYPE_ABORT): 1060 ↛ 1061line 1060 didn't jump to line 1061, because the condition on line 1060 was never true
1061 raise AbortException("Aborting due to signal")
1062 Util.log(" Relinking %s from %s to %s" % \
1063 (child, self, self.parent))
1064 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid)
1065 child._setParent(self.parent)
1066 self.children = []
1068 def _reloadChildren(self, vdiSkip):
1069 """Pause & unpause all VDIs in the subtree to cause blktap to reload
1070 the COW image metadata for this file in any online VDI"""
1071 abortFlag = IPCFlag(self.sr.uuid)
1072 for child in self.children:
1073 if child == vdiSkip:
1074 continue
1075 if abortFlag.test(FLAG_TYPE_ABORT): 1075 ↛ 1076line 1075 didn't jump to line 1076, because the condition on line 1075 was never true
1076 raise AbortException("Aborting due to signal")
1077 Util.log(" Reloading VDI %s" % child)
1078 child._reload()
1080 def _reload(self):
1081 """Pause & unpause to cause blktap to reload the image metadata"""
1082 for child in self.children: 1082 ↛ 1083line 1082 didn't jump to line 1083, because the loop on line 1082 never started
1083 child._reload()
1085 # only leaves can be attached
1086 if len(self.children) == 0: 1086 ↛ exitline 1086 didn't return from function '_reload', because the condition on line 1086 was never false
1087 try:
1088 self.delConfig(VDI.DB_VDI_RELINKING)
1089 except XenAPI.Failure as e:
1090 if not util.isInvalidVDI(e):
1091 raise
1092 self.refresh()
1094 def _tagChildrenForRelink(self):
1095 if len(self.children) == 0:
1096 retries = 0
1097 try:
1098 while retries < 15:
1099 retries += 1
1100 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None:
1101 Util.log("VDI %s is activating, wait to relink" %
1102 self.uuid)
1103 else:
1104 self.setConfig(VDI.DB_VDI_RELINKING, "True")
1106 if self.getConfig(VDI.DB_VDI_ACTIVATING):
1107 self.delConfig(VDI.DB_VDI_RELINKING)
1108 Util.log("VDI %s started activating while tagging" %
1109 self.uuid)
1110 else:
1111 return
1112 time.sleep(2)
1114 raise util.SMException("Failed to tag vdi %s for relink" % self)
1115 except XenAPI.Failure as e:
1116 if not util.isInvalidVDI(e):
1117 raise
1119 for child in self.children:
1120 child._tagChildrenForRelink()
1122 def _loadInfoParent(self):
1123 ret = self.cowutil.getParent(self.path, LvmCowUtil.extractUuid)
1124 if ret:
1125 self.parentUuid = ret
1127 def _setParent(self, parent) -> None:
1128 self.cowutil.setParent(self.path, parent.path, False)
1129 self.parent = parent
1130 self.parentUuid = parent.uuid
1131 parent.children.append(self)
1132 try:
1133 self.setConfig(self.DB_VDI_PARENT, self.parentUuid)
1134 Util.log("Updated the vhd-parent field for child %s with %s" % \
1135 (self.uuid, self.parentUuid))
1136 except:
1137 Util.log("Failed to update %s with vhd-parent field %s" % \
1138 (self.uuid, self.parentUuid))
1140 def isHidden(self) -> bool:
1141 if self._hidden is None: 1141 ↛ 1142line 1141 didn't jump to line 1142, because the condition on line 1141 was never true
1142 self._loadInfoHidden()
1143 return self._hidden
1145 def _loadInfoHidden(self) -> None:
1146 hidden = self.cowutil.getHidden(self.path)
1147 self._hidden = (hidden != 0)
1149 def _setHidden(self, hidden=True) -> None:
1150 self._hidden = None
1151 self.cowutil.setHidden(self.path, hidden)
1152 self._hidden = hidden
1154 def _increaseSizeVirt(self, size, atomic=True) -> None:
1155 """ensure the virtual size of 'self' is at least 'size'. Note that
1156 resizing a COW image must always be offline and atomically: the file must
1157 not be open by anyone and no concurrent operations may take place.
1158 Thus we use the Agent API call for performing paused atomic
1159 operations. If the caller is already in the atomic context, it must
1160 call with atomic = False"""
1161 if self.sizeVirt >= size: 1161 ↛ 1163line 1161 didn't jump to line 1163, because the condition on line 1161 was never false
1162 return
1163 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \
1164 (self, Util.num2str(self.sizeVirt), Util.num2str(size)))
1166 msize = self.cowutil.getMaxResizeSize(self.path)
1167 if (size <= msize):
1168 self.cowutil.setSizeVirtFast(self.path, size)
1169 else:
1170 if atomic:
1171 vdiList = self._getAllSubtree()
1172 self.sr.lock()
1173 try:
1174 self.sr.pauseVDIs(vdiList)
1175 try:
1176 self._setSizeVirt(size)
1177 finally:
1178 self.sr.unpauseVDIs(vdiList)
1179 finally:
1180 self.sr.unlock()
1181 else:
1182 self._setSizeVirt(size)
1184 self.sizeVirt = self.cowutil.getSizeVirt(self.path)
1186 def _setSizeVirt(self, size) -> None:
1187 """WARNING: do not call this method directly unless all VDIs in the
1188 subtree are guaranteed to be unplugged (and remain so for the duration
1189 of the operation): this operation is only safe for offline COW images"""
1190 jFile = os.path.join(self.sr.path, self.uuid)
1191 self.cowutil.setSizeVirt(self.path, size, jFile)
1193 def _queryCowBlocks(self) -> bytes:
1194 return self.cowutil.getBlockBitmap(self.path)
1196 def _getCoalescedSizeData(self):
1197 """Get the data size of the resulting image if we coalesce self onto
1198 parent. We calculate the actual size by using the image block allocation
1199 information (as opposed to just adding up the two image sizes to get an
1200 upper bound)"""
1201 # make sure we don't use stale BAT info from vdi_rec since the child
1202 # was writable all this time
1203 self.delConfig(VDI.DB_VDI_BLOCKS)
1204 blocksChild = self.getVDIBlocks()
1205 blocksParent = self.parent.getVDIBlocks()
1206 numBlocks = Util.countBits(blocksChild, blocksParent)
1207 Util.log("Num combined blocks = %d" % numBlocks)
1208 sizeData = numBlocks * self.cowutil.getBlockSize(self.path)
1209 assert(sizeData <= self.sizeVirt)
1210 return sizeData
1212 def _calcExtraSpaceForCoalescing(self) -> int:
1213 sizeData = self._getCoalescedSizeData()
1214 sizeCoalesced = sizeData + self.cowutil.calcOverheadBitmap(sizeData) + \
1215 self.cowutil.calcOverheadEmpty(self.sizeVirt)
1216 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced))
1217 return sizeCoalesced - self.parent.getSizePhys()
1219 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1220 """How much extra space in the SR will be required to
1221 [live-]leaf-coalesce this VDI"""
1222 # the space requirements are the same as for inline coalesce
1223 return self._calcExtraSpaceForCoalescing()
1225 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1226 """How much extra space in the SR will be required to
1227 snapshot-coalesce this VDI"""
1228 return self._calcExtraSpaceForCoalescing() + \
1229 self.cowutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf
1231 def _getAllSubtree(self):
1232 """Get self and all VDIs in the subtree of self as a flat list"""
1233 vdiList = [self]
1234 for child in self.children:
1235 vdiList.extend(child._getAllSubtree())
1236 return vdiList
1239class FileVDI(VDI):
1240 """Object representing a VDI in a file-based SR (EXT or NFS)"""
1242 @override
1243 @staticmethod
1244 def extractUuid(path):
1245 fileName = os.path.basename(path)
1246 return os.path.splitext(fileName)[0]
1248 def __init__(self, sr, uuid, vdi_type):
1249 VDI.__init__(self, sr, uuid, vdi_type)
1250 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])
1252 @override
1253 def load(self, info=None) -> None:
1254 if not info:
1255 if not util.pathexists(self.path):
1256 raise util.SMException("%s not found" % self.path)
1257 try:
1258 info = self.cowutil.getInfo(self.path, self.extractUuid)
1259 except util.SMException:
1260 Util.log(" [VDI %s: failed to read COW image metadata]" % self.uuid)
1261 return
1262 self.parent = None
1263 self.children = []
1264 self.parentUuid = info.parentUuid
1265 self.sizeVirt = info.sizeVirt
1266 self._sizePhys = info.sizePhys
1267 self._sizeAllocated = info.sizeAllocated
1268 self._hidden = info.hidden
1269 self.scanError = False
1270 self.path = os.path.join(self.sr.path, "%s%s" % \
1271 (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]))
1273 @override
1274 def rename(self, uuid) -> None:
1275 oldPath = self.path
1276 VDI.rename(self, uuid)
1277 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])
1278 self.path = os.path.join(self.sr.path, self.fileName)
1279 assert(not util.pathexists(self.path))
1280 Util.log("Renaming %s -> %s" % (oldPath, self.path))
1281 os.rename(oldPath, self.path)
1283 @override
1284 def delete(self) -> None:
1285 if len(self.children) > 0: 1285 ↛ 1286line 1285 didn't jump to line 1286, because the condition on line 1285 was never true
1286 raise util.SMException("VDI %s has children, can't delete" % \
1287 self.uuid)
1288 try:
1289 self.sr.lock()
1290 try:
1291 os.unlink(self.path)
1292 self.sr.forgetVDI(self.uuid)
1293 finally:
1294 self.sr.unlock()
1295 except OSError:
1296 raise util.SMException("os.unlink(%s) failed" % self.path)
1297 VDI.delete(self)
1299 @override
1300 def getAllocatedSize(self) -> int:
1301 if self._sizeAllocated == -1: 1301 ↛ 1302line 1301 didn't jump to line 1302, because the condition on line 1301 was never true
1302 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path)
1303 return self._sizeAllocated
1306class LVMVDI(VDI):
1307 """Object representing a VDI in an LVM SR"""
1309 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent
1311 @override
1312 def load(self, info=None) -> None:
1313 # `info` is always set. `None` default value is only here to match parent method.
1314 assert info, "No info given to LVMVDI.load"
1315 self.parent = None
1316 self.children = []
1317 self._sizePhys = -1
1318 self._sizeAllocated = -1
1319 self.scanError = info.scanError
1320 self.sizeLV = info.sizeLV
1321 self.sizeVirt = info.sizeVirt
1322 self.fileName = info.lvName
1323 self.lvActive = info.lvActive
1324 self.lvOpen = info.lvOpen
1325 self.lvReadonly = info.lvReadonly
1326 self._hidden = info.hidden
1327 self.parentUuid = info.parentUuid
1328 self.path = os.path.join(self.sr.path, self.fileName)
1329 self.lvmcowutil = LvmCowUtil(self.cowutil)
1331 @override
1332 @staticmethod
1333 def extractUuid(path):
1334 return LvmCowUtil.extractUuid(path)
1336 def inflate(self, size):
1337 """inflate the LV containing the COW image to 'size'"""
1338 if not VdiType.isCowImage(self.vdi_type):
1339 return
1340 self._activate()
1341 self.sr.lock()
1342 try:
1343 self.lvmcowutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size)
1344 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid)
1345 finally:
1346 self.sr.unlock()
1347 self.sizeLV = self.sr.lvmCache.getSize(self.fileName)
1348 self._sizePhys = -1
1349 self._sizeAllocated = -1
1351 def deflate(self):
1352 """deflate the LV containing the image to minimum"""
1353 if not VdiType.isCowImage(self.vdi_type):
1354 return
1355 self._activate()
1356 self.sr.lock()
1357 try:
1358 self.lvmcowutil.deflate(self.sr.lvmCache, self.fileName, self.getSizePhys())
1359 finally:
1360 self.sr.unlock()
1361 self.sizeLV = self.sr.lvmCache.getSize(self.fileName)
1362 self._sizePhys = -1
1363 self._sizeAllocated = -1
1365 def inflateFully(self):
1366 self.inflate(self.lvmcowutil.calcVolumeSize(self.sizeVirt))
1368 def inflateParentForCoalesce(self):
1369 """Inflate the parent only as much as needed for the purposes of
1370 coalescing"""
1371 if not VdiType.isCowImage(self.parent.vdi_type):
1372 return
1373 inc = self._calcExtraSpaceForCoalescing()
1374 if inc > 0:
1375 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid)
1376 self.parent.inflate(self.parent.sizeLV + inc)
1378 @override
1379 def updateBlockInfo(self) -> Optional[str]:
1380 if VdiType.isCowImage(self.vdi_type):
1381 return VDI.updateBlockInfo(self)
1382 return None
1384 @override
1385 def rename(self, uuid) -> None:
1386 oldUuid = self.uuid
1387 oldLVName = self.fileName
1388 VDI.rename(self, uuid)
1389 self.fileName = LV_PREFIX[self.vdi_type] + self.uuid
1390 self.path = os.path.join(self.sr.path, self.fileName)
1391 assert(not self.sr.lvmCache.checkLV(self.fileName))
1393 self.sr.lvmCache.rename(oldLVName, self.fileName)
1394 if self.sr.lvActivator.get(oldUuid, False):
1395 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False)
1397 ns = NS_PREFIX_LVM + self.sr.uuid
1398 (cnt, bcnt) = RefCounter.check(oldUuid, ns)
1399 RefCounter.set(self.uuid, cnt, bcnt, ns)
1400 RefCounter.reset(oldUuid, ns)
1402 @override
1403 def delete(self) -> None:
1404 if len(self.children) > 0:
1405 raise util.SMException("VDI %s has children, can't delete" % \
1406 self.uuid)
1407 self.sr.lock()
1408 try:
1409 self.sr.lvmCache.remove(self.fileName)
1410 self.sr.forgetVDI(self.uuid)
1411 finally:
1412 self.sr.unlock()
1413 RefCounter.reset(self.uuid, NS_PREFIX_LVM + self.sr.uuid)
1414 VDI.delete(self)
1416 @override
1417 def getSizePhys(self) -> int:
1418 if self._sizePhys == -1:
1419 self._loadInfoSizePhys()
1420 return self._sizePhys
1422 def _loadInfoSizePhys(self):
1423 """Get the physical utilization of the COW image file. We do it individually
1424 (and not using the COW batch scanner) as an optimization: this info is
1425 relatively expensive and we need it only for VDI's involved in
1426 coalescing."""
1427 if not VdiType.isCowImage(self.vdi_type):
1428 return
1429 self._activate()
1430 self._sizePhys = self.cowutil.getSizePhys(self.path)
1431 if self._sizePhys <= 0:
1432 raise util.SMException("phys size of %s = %d" % \
1433 (self, self._sizePhys))
1435 @override
1436 def getAllocatedSize(self) -> int:
1437 if self._sizeAllocated == -1:
1438 self._loadInfoSizeAllocated()
1439 return self._sizeAllocated
1441 def _loadInfoSizeAllocated(self):
1442 """
1443 Get the allocated size of the COW volume.
1444 """
1445 if not VdiType.isCowImage(self.vdi_type):
1446 return
1447 self._activate()
1448 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path)
1450 @override
1451 def _loadInfoHidden(self) -> None:
1452 if not VdiType.isCowImage(self.vdi_type):
1453 self._hidden = self.sr.lvmCache.getHidden(self.fileName)
1454 else:
1455 VDI._loadInfoHidden(self)
1457 @override
1458 def _setHidden(self, hidden=True) -> None:
1459 if not VdiType.isCowImage(self.vdi_type):
1460 self._hidden = None
1461 self.sr.lvmCache.setHidden(self.fileName, hidden)
1462 self._hidden = hidden
1463 else:
1464 VDI._setHidden(self, hidden)
1466 @override
1467 def __str__(self) -> str:
1468 strType = self.vdi_type
1469 if self.vdi_type == VdiType.RAW:
1470 strType = "RAW"
1471 strHidden = ""
1472 if self.isHidden():
1473 strHidden = "*"
1474 strSizePhys = ""
1475 if self._sizePhys > 0:
1476 strSizePhys = Util.num2str(self._sizePhys)
1477 strSizeAllocated = ""
1478 if self._sizeAllocated >= 0:
1479 strSizeAllocated = Util.num2str(self._sizeAllocated)
1480 strActive = "n"
1481 if self.lvActive:
1482 strActive = "a"
1483 if self.lvOpen:
1484 strActive += "o"
1485 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType,
1486 Util.num2str(self.sizeVirt), strSizePhys, strSizeAllocated,
1487 Util.num2str(self.sizeLV), strActive)
1489 @override
1490 def validate(self, fast=False) -> None:
1491 if VdiType.isCowImage(self.vdi_type):
1492 VDI.validate(self, fast)
1494 def _setChainRw(self) -> List[str]:
1495 """
1496 Set the readonly LV and children writable.
1497 It's needed because the coalesce can be done by tapdisk directly
1498 and it will need to write parent information for children.
1499 The VDI we want to coalesce into it's parent need to be writable for libqcow coalesce part.
1500 Return a list of the LV that were previously readonly to be made RO again after the coalesce.
1501 """
1502 was_ro = []
1503 if self.lvReadonly:
1504 self.sr.lvmCache.setReadonly(self.fileName, False)
1505 was_ro.append(self.fileName)
1507 for child in self.children:
1508 if child.lvReadonly:
1509 self.sr.lvmCache.setReadonly(child.fileName, False)
1510 was_ro.append(child.fileName)
1512 return was_ro
1514 def _setChainRo(self, was_ro: List[str]) -> None:
1515 """Set the list of LV in parameters to readonly"""
1516 for lvName in was_ro:
1517 self.sr.lvmCache.setReadonly(lvName, True)
1519 @override
1520 def _doCoalesce(self) -> None:
1521 """LVMVDI parents must first be activated, inflated, and made writable"""
1522 was_ro = []
1523 try:
1524 self._activateChain()
1525 self.sr.lvmCache.setReadonly(self.parent.fileName, False)
1526 self.parent.validate()
1527 self.inflateParentForCoalesce()
1528 was_ro = self._setChainRw()
1529 VDI._doCoalesce(self)
1530 finally:
1531 self.parent._loadInfoSizePhys()
1532 self.parent.deflate()
1533 self.sr.lvmCache.setReadonly(self.parent.fileName, True)
1534 self._setChainRo(was_ro)
1536 @override
1537 def _setParent(self, parent) -> None:
1538 self._activate()
1539 if self.lvReadonly:
1540 self.sr.lvmCache.setReadonly(self.fileName, False)
1542 try:
1543 self.cowutil.setParent(self.path, parent.path, parent.vdi_type == VdiType.RAW)
1544 finally:
1545 if self.lvReadonly:
1546 self.sr.lvmCache.setReadonly(self.fileName, True)
1547 self._deactivate()
1548 self.parent = parent
1549 self.parentUuid = parent.uuid
1550 parent.children.append(self)
1551 try:
1552 self.setConfig(self.DB_VDI_PARENT, self.parentUuid)
1553 Util.log("Updated the VDI-parent field for child %s with %s" % \
1554 (self.uuid, self.parentUuid))
1555 except:
1556 Util.log("Failed to update the VDI-parent with %s for child %s" % \
1557 (self.parentUuid, self.uuid))
1559 def _activate(self):
1560 self.sr.lvActivator.activate(self.uuid, self.fileName, False)
1562 def _activateChain(self):
1563 vdi = self
1564 while vdi:
1565 vdi._activate()
1566 vdi = vdi.parent
1568 def _deactivate(self):
1569 self.sr.lvActivator.deactivate(self.uuid, False)
1571 @override
1572 def _increaseSizeVirt(self, size, atomic=True) -> None:
1573 "ensure the virtual size of 'self' is at least 'size'"
1574 self._activate()
1575 if VdiType.isCowImage(self.vdi_type):
1576 VDI._increaseSizeVirt(self, size, atomic)
1577 return
1579 # raw VDI case
1580 offset = self.sizeLV
1581 if self.sizeVirt < size:
1582 oldSize = self.sizeLV
1583 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size)
1584 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV))
1585 self.sr.lvmCache.setSize(self.fileName, self.sizeLV)
1586 offset = oldSize
1587 unfinishedZero = False
1588 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid)
1589 if jval:
1590 unfinishedZero = True
1591 offset = int(jval)
1592 length = self.sizeLV - offset
1593 if not length:
1594 return
1596 if unfinishedZero:
1597 Util.log(" ==> Redoing unfinished zeroing out")
1598 else:
1599 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \
1600 str(offset))
1601 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length))
1602 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1603 func = lambda: util.zeroOut(self.path, offset, length)
1604 Util.runAbortable(func, True, self.sr.uuid, abortTest,
1605 VDI.POLL_INTERVAL, 0)
1606 self.sr.journaler.remove(self.JRN_ZERO, self.uuid)
1608 @override
1609 def _setSizeVirt(self, size) -> None:
1610 """WARNING: do not call this method directly unless all VDIs in the
1611 subtree are guaranteed to be unplugged (and remain so for the duration
1612 of the operation): this operation is only safe for offline COW images."""
1613 self._activate()
1614 jFile = self.lvmcowutil.createResizeJournal(self.sr.lvmCache, self.uuid)
1615 try:
1616 self.lvmcowutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size, jFile)
1617 finally:
1618 self.lvmcowutil.destroyResizeJournal(self.sr.lvmCache, self.uuid)
1620 @override
1621 def _queryCowBlocks(self) -> bytes:
1622 self._activate()
1623 return VDI._queryCowBlocks(self)
1625 @override
1626 def _calcExtraSpaceForCoalescing(self) -> int:
1627 if not VdiType.isCowImage(self.parent.vdi_type):
1628 return 0 # raw parents are never deflated in the first place
1629 sizeCoalesced = self.lvmcowutil.calcVolumeSize(self._getCoalescedSizeData())
1630 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced))
1631 return sizeCoalesced - self.parent.sizeLV
1633 @override
1634 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1635 """How much extra space in the SR will be required to
1636 [live-]leaf-coalesce this VDI"""
1637 # we can deflate the leaf to minimize the space requirements
1638 deflateDiff = self.sizeLV - lvutil.calcSizeLV(self.getSizePhys())
1639 return self._calcExtraSpaceForCoalescing() - deflateDiff
1641 @override
1642 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1643 return self._calcExtraSpaceForCoalescing() + \
1644 lvutil.calcSizeLV(self.getSizePhys())
1647class LinstorVDI(VDI):
1648 """Object representing a VDI in a LINSTOR SR"""
1650 VOLUME_LOCK_TIMEOUT = 30
1652 @override
1653 def load(self, info=None) -> None:
1654 self.parentUuid = info.parentUuid
1655 self.scanError = True
1656 self.parent = None
1657 self.children = []
1659 self.fileName = self.sr._linstor.get_volume_name(self.uuid)
1660 self.path = self.sr._linstor.build_device_path(self.fileName)
1661 self.linstorcowutil = LinstorCowUtil(self.sr.xapi.session, self.sr._linstor, info.vdiType)
1663 if not info:
1664 try:
1665 info = self.linstorcowutil.get_info(self.uuid)
1666 except util.SMException:
1667 Util.log(
1668 ' [VDI {}: failed to read COW image metadata]'.format(self.uuid)
1669 )
1670 return
1672 self.parentUuid = info.parentUuid
1673 self.sizeVirt = info.sizeVirt
1674 self._sizePhys = -1
1675 self._sizeAllocated = -1
1676 self.drbd_size = -1
1677 self._hidden = info.hidden
1678 self.scanError = False
1680 @override
1681 def getSizePhys(self, fetch=False) -> int:
1682 if self._sizePhys < 0 or fetch:
1683 self._sizePhys = self.linstorcowutil.get_size_phys(self.uuid)
1684 return self._sizePhys
1686 def getDrbdSize(self, fetch=False):
1687 if self.drbd_size < 0 or fetch:
1688 self.drbd_size = self.linstorcowutil.get_drbd_size(self.uuid)
1689 return self.drbd_size
1691 @override
1692 def getAllocatedSize(self) -> int:
1693 if self._sizeAllocated == -1:
1694 if VdiType.isCowImage(self.vdi_type):
1695 self._sizeAllocated = self.linstorcowutil.get_allocated_size(self.uuid)
1696 return self._sizeAllocated
1698 def inflate(self, size):
1699 if not VdiType.isCowImage(self.vdi_type):
1700 return
1701 self.sr.lock()
1702 try:
1703 # Ensure we use the real DRBD size and not the cached one.
1704 # Why? Because this attribute can be changed if volume is resized by user.
1705 self.drbd_size = self.getDrbdSize(fetch=True)
1706 self.linstorcowutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size)
1707 finally:
1708 self.sr.unlock()
1709 self.drbd_size = -1
1710 self._sizePhys = -1
1711 self._sizeAllocated = -1
1713 def deflate(self):
1714 if not VdiType.isCowImage(self.vdi_type):
1715 return
1716 self.sr.lock()
1717 try:
1718 # Ensure we use the real sizes and not the cached info.
1719 self.drbd_size = self.getDrbdSize(fetch=True)
1720 self._sizePhys = self.getSizePhys(fetch=True)
1721 self.linstorcowutil.force_deflate(self.path, self._sizePhys, self.drbd_size, zeroize=False)
1722 finally:
1723 self.sr.unlock()
1724 self.drbd_size = -1
1725 self._sizePhys = -1
1726 self._sizeAllocated = -1
1728 def inflateFully(self):
1729 if VdiType.isCowImage(self.vdi_type):
1730 self.inflate(self.linstorcowutil.compute_volume_size(self.sizeVirt))
1732 @override
1733 def rename(self, uuid) -> None:
1734 Util.log('Renaming {} -> {} (path={})'.format(
1735 self.uuid, uuid, self.path
1736 ))
1737 self.sr._linstor.update_volume_uuid(self.uuid, uuid)
1738 VDI.rename(self, uuid)
1740 @override
1741 def delete(self) -> None:
1742 if len(self.children) > 0:
1743 raise util.SMException(
1744 'VDI {} has children, can\'t delete'.format(self.uuid)
1745 )
1746 self.sr.lock()
1747 try:
1748 self.sr._linstor.destroy_volume(self.uuid)
1749 self.sr.forgetVDI(self.uuid)
1750 finally:
1751 self.sr.unlock()
1752 VDI.delete(self)
1754 @override
1755 def validate(self, fast=False) -> None:
1756 if VdiType.isCowImage(self.vdi_type) and self.linstorcowutil.check(self.uuid, fast=fast) != CowUtil.CheckResult.Success:
1757 raise util.SMException('COW image {} corrupted'.format(self))
1759 @override
1760 def pause(self, failfast=False) -> None:
1761 self.sr._linstor.ensure_volume_is_not_locked(
1762 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT
1763 )
1764 return super(LinstorVDI, self).pause(failfast)
1766 @override
1767 def coalesce(self) -> int:
1768 # Note: We raise `SMException` here to skip the current coalesce in case of failure.
1769 # Using another exception we can't execute the next coalesce calls.
1770 return self.linstorcowutil.force_coalesce(self.path)
1772 @override
1773 def getParent(self) -> str:
1774 return self.linstorcowutil.get_parent(
1775 self.sr._linstor.get_volume_uuid_from_device_path(self.path)
1776 )
1778 @override
1779 def repair(self, parent_uuid) -> None:
1780 self.linstorcowutil.force_repair(
1781 self.sr._linstor.get_device_path(parent_uuid)
1782 )
1784 @override
1785 def _relinkSkip(self) -> None:
1786 abortFlag = IPCFlag(self.sr.uuid)
1787 for child in self.children:
1788 if abortFlag.test(FLAG_TYPE_ABORT):
1789 raise AbortException('Aborting due to signal')
1790 Util.log(
1791 ' Relinking {} from {} to {}'.format(
1792 child, self, self.parent
1793 )
1794 )
1796 session = child.sr.xapi.session
1797 sr_uuid = child.sr.uuid
1798 vdi_uuid = child.uuid
1799 try:
1800 self.sr._linstor.ensure_volume_is_not_locked(
1801 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT
1802 )
1803 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid)
1804 child._setParent(self.parent)
1805 finally:
1806 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid)
1807 self.children = []
1809 @override
1810 def _setParent(self, parent) -> None:
1811 self.sr._linstor.get_device_path(self.uuid)
1812 self.linstorcowutil.force_parent(self.path, parent.path)
1813 self.parent = parent
1814 self.parentUuid = parent.uuid
1815 parent.children.append(self)
1816 try:
1817 self.setConfig(self.DB_VDI_PARENT, self.parentUuid)
1818 Util.log("Updated the vhd-parent field for child %s with %s" % \
1819 (self.uuid, self.parentUuid))
1820 except:
1821 Util.log("Failed to update %s with vhd-parent field %s" % \
1822 (self.uuid, self.parentUuid))
1824 @override
1825 def _doCoalesce(self) -> None:
1826 try:
1827 self._activateChain()
1828 self.parent.validate()
1829 self._inflateParentForCoalesce()
1830 VDI._doCoalesce(self)
1831 finally:
1832 self.parent.deflate()
1834 def _activateChain(self):
1835 vdi = self
1836 while vdi:
1837 try:
1838 p = self.sr._linstor.get_device_path(vdi.uuid)
1839 except Exception as e:
1840 # Use SMException to skip coalesce.
1841 # Otherwise the GC is stopped...
1842 raise util.SMException(str(e))
1843 vdi = vdi.parent
1845 @override
1846 def _setHidden(self, hidden=True) -> None:
1847 HIDDEN_TAG = 'hidden'
1849 if not VdiType.isCowImage(self.vdi_type):
1850 self._hidden = None
1851 self.sr._linstor.update_volume_metadata(self.uuid, {
1852 HIDDEN_TAG: hidden
1853 })
1854 self._hidden = hidden
1855 else:
1856 VDI._setHidden(self, hidden)
1858 @override
1859 def _increaseSizeVirt(self, size, atomic=True):
1860 if self.vdi_type == VdiType.RAW:
1861 offset = self.drbd_size
1862 if self.sizeVirt < size:
1863 oldSize = self.drbd_size
1864 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size)
1865 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size))
1866 self.sr._linstor.resize_volume(self.uuid, self.drbd_size)
1867 offset = oldSize
1868 unfinishedZero = False
1869 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid)
1870 if jval:
1871 unfinishedZero = True
1872 offset = int(jval)
1873 length = self.drbd_size - offset
1874 if not length:
1875 return
1877 if unfinishedZero:
1878 Util.log(" ==> Redoing unfinished zeroing out")
1879 else:
1880 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset))
1881 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length))
1882 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1883 func = lambda: util.zeroOut(self.path, offset, length)
1884 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0)
1885 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid)
1886 return
1888 if self.sizeVirt >= size:
1889 return
1890 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \
1891 (self, Util.num2str(self.sizeVirt), Util.num2str(size)))
1893 msize = self.linstorcowutil.get_max_resize_size(self.uuid) * 1024 * 1024
1894 if (size <= msize):
1895 self.linstorcowutil.set_size_virt_fast(self.path, size)
1896 else:
1897 if atomic:
1898 vdiList = self._getAllSubtree()
1899 self.sr.lock()
1900 try:
1901 self.sr.pauseVDIs(vdiList)
1902 try:
1903 self._setSizeVirt(size)
1904 finally:
1905 self.sr.unpauseVDIs(vdiList)
1906 finally:
1907 self.sr.unlock()
1908 else:
1909 self._setSizeVirt(size)
1911 self.sizeVirt = self.linstorcowutil.get_size_virt(self.uuid)
1913 @override
1914 def _setSizeVirt(self, size) -> None:
1915 jfile = self.uuid + '-jvhd'
1916 self.sr._linstor.create_volume(
1917 jfile, self.cowutil.getResizeJournalSize(), persistent=False, volume_name=jfile
1918 )
1919 try:
1920 self.inflate(self.linstorcowutil.compute_volume_size(size))
1921 self.linstorcowutil.set_size_virt(self.path, size, jfile)
1922 finally:
1923 try:
1924 self.sr._linstor.destroy_volume(jfile)
1925 except Exception:
1926 # We can ignore it, in any case this volume is not persistent.
1927 pass
1929 @override
1930 def _queryCowBlocks(self) -> bytes:
1931 return self.linstorcowutil.get_block_bitmap(self.uuid)
1933 def _inflateParentForCoalesce(self):
1934 if not VdiType.isCowImage(self.parent.vdi_type):
1935 return
1936 inc = self._calcExtraSpaceForCoalescing()
1937 if inc > 0:
1938 self.parent.inflate(self.parent.getDrbdSize() + inc)
1940 @override
1941 def _calcExtraSpaceForCoalescing(self) -> int:
1942 if not VdiType.isCowImage(self.parent.vdi_type):
1943 return 0
1944 size_coalesced = self.linstorcowutil.compute_volume_size(self._getCoalescedSizeData())
1945 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced))
1946 return size_coalesced - self.parent.getDrbdSize()
1948 @override
1949 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1950 assert self.getDrbdSize() > 0
1951 assert self.getSizePhys() > 0
1952 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizePhys())
1953 assert deflate_diff >= 0
1954 return self._calcExtraSpaceForCoalescing() - deflate_diff
1956 @override
1957 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1958 assert self.getSizePhys() > 0
1959 return self._calcExtraSpaceForCoalescing() + \
1960 LinstorVolumeManager.round_up_volume_size(self.getSizePhys())
1962################################################################################
1963#
1964# SR
1965#
1966class SR(object):
1967 class LogFilter:
1968 def __init__(self, sr):
1969 self.sr = sr
1970 self.stateLogged = False
1971 self.prevState = {}
1972 self.currState = {}
1974 def logState(self):
1975 changes = ""
1976 self.currState.clear()
1977 for vdi in self.sr.vdiTrees:
1978 self.currState[vdi.uuid] = self._getTreeStr(vdi)
1979 if not self.prevState.get(vdi.uuid) or \
1980 self.prevState[vdi.uuid] != self.currState[vdi.uuid]:
1981 changes += self.currState[vdi.uuid]
1983 for uuid in self.prevState:
1984 if not self.currState.get(uuid):
1985 changes += "Tree %s gone\n" % uuid
1987 result = "SR %s (%d VDIs in %d COW trees): " % \
1988 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees))
1990 if len(changes) > 0:
1991 if self.stateLogged:
1992 result += "showing only COW trees that changed:"
1993 result += "\n%s" % changes
1994 else:
1995 result += "no changes"
1997 for line in result.split("\n"):
1998 Util.log("%s" % line)
1999 self.prevState.clear()
2000 for key, val in self.currState.items():
2001 self.prevState[key] = val
2002 self.stateLogged = True
2004 def logNewVDI(self, uuid):
2005 if self.stateLogged:
2006 Util.log("Found new VDI when scanning: %s" % uuid)
2008 def _getTreeStr(self, vdi, indent=8):
2009 treeStr = "%s%s\n" % (" " * indent, vdi)
2010 for child in vdi.children:
2011 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT)
2012 return treeStr
2014 TYPE_FILE = "file"
2015 TYPE_LVHD = "lvhd"
2016 TYPE_LINSTOR = "linstor"
2017 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR]
2019 LOCK_RETRY_INTERVAL = 3
2020 LOCK_RETRY_ATTEMPTS = 20
2021 LOCK_RETRY_ATTEMPTS_LOCK = 100
2023 SCAN_RETRY_ATTEMPTS = 3
2025 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM)
2026 TMP_RENAME_PREFIX = "OLD_"
2028 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline"
2029 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override"
2031 @staticmethod
2032 def getInstance(uuid, xapiSession, createLock=True, force=False):
2033 xapi = XAPI(xapiSession, uuid)
2034 type = normalizeType(xapi.srRecord["type"])
2035 if type == SR.TYPE_FILE:
2036 return FileSR(uuid, xapi, createLock, force)
2037 elif type == SR.TYPE_LVHD:
2038 return LVMSR(uuid, xapi, createLock, force)
2039 elif type == SR.TYPE_LINSTOR:
2040 return LinstorSR(uuid, xapi, createLock, force)
2041 raise util.SMException("SR type %s not recognized" % type)
2043 def __init__(self, uuid, xapi, createLock, force):
2044 self.logFilter = self.LogFilter(self)
2045 self.uuid = uuid
2046 self.path = ""
2047 self.name = ""
2048 self.vdis = {}
2049 self.vdiTrees = []
2050 self.journaler = None
2051 self.xapi = xapi
2052 self._locked = 0
2053 self._srLock = None
2054 if createLock: 2054 ↛ 2055line 2054 didn't jump to line 2055, because the condition on line 2054 was never true
2055 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, self.uuid)
2056 else:
2057 Util.log("Requested no SR locking")
2058 self.name = self.xapi.srRecord["name_label"]
2059 self._failedCoalesceTargets = []
2061 if not self.xapi.isPluggedHere():
2062 if force: 2062 ↛ 2063line 2062 didn't jump to line 2063, because the condition on line 2062 was never true
2063 Util.log("SR %s not attached on this host, ignoring" % uuid)
2064 else:
2065 if not self.wait_for_plug():
2066 raise util.SMException("SR %s not attached on this host" % uuid)
2068 if force: 2068 ↛ 2069line 2068 didn't jump to line 2069, because the condition on line 2068 was never true
2069 Util.log("Not checking if we are Master (SR %s)" % uuid)
2070 elif not self.xapi.isMaster(): 2070 ↛ 2071line 2070 didn't jump to line 2071, because the condition on line 2070 was never true
2071 raise util.SMException("This host is NOT master, will not run")
2073 self.no_space_candidates = {}
2075 def msg_cleared(self, xapi_session, msg_ref):
2076 try:
2077 msg = xapi_session.xenapi.message.get_record(msg_ref)
2078 except XenAPI.Failure:
2079 return True
2081 return msg is None
2083 def check_no_space_candidates(self):
2084 xapi_session = self.xapi.getSession()
2086 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE)
2087 if self.no_space_candidates:
2088 if msg_id is None or self.msg_cleared(xapi_session, msg_id):
2089 util.SMlog("Could not coalesce due to a lack of space "
2090 f"in SR {self.uuid}")
2091 msg_body = ("Unable to perform data coalesce due to a lack "
2092 f"of space in SR {self.uuid}")
2093 msg_id = xapi_session.xenapi.message.create(
2094 'SM_GC_NO_SPACE',
2095 3,
2096 "SR",
2097 self.uuid,
2098 msg_body)
2099 xapi_session.xenapi.SR.remove_from_sm_config(
2100 self.xapi.srRef, VDI.DB_GC_NO_SPACE)
2101 xapi_session.xenapi.SR.add_to_sm_config(
2102 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id)
2104 for candidate in self.no_space_candidates.values():
2105 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id)
2106 elif msg_id is not None:
2107 # Everything was coalescable, remove the message
2108 xapi_session.xenapi.message.destroy(msg_id)
2110 def clear_no_space_msg(self, vdi):
2111 msg_id = None
2112 try:
2113 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE)
2114 except XenAPI.Failure:
2115 pass
2117 self.no_space_candidates.pop(vdi.uuid, None)
2118 if msg_id is not None: 2118 ↛ exitline 2118 didn't return from function 'clear_no_space_msg', because the condition on line 2118 was never false
2119 vdi.delConfig(VDI.DB_GC_NO_SPACE)
2122 def wait_for_plug(self):
2123 for _ in range(1, 10):
2124 time.sleep(2)
2125 if self.xapi.isPluggedHere():
2126 return True
2127 return False
2129 def gcEnabled(self, refresh=True):
2130 if refresh:
2131 self.xapi.srRecord = \
2132 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef)
2133 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false":
2134 Util.log("GC is disabled for this SR, abort")
2135 return False
2136 return True
2138 def scan(self, force=False) -> None:
2139 """Scan the SR and load VDI info for each VDI. If called repeatedly,
2140 update VDI objects if they already exist"""
2141 pass
2143 def scanLocked(self, force=False):
2144 self.lock()
2145 try:
2146 self.scan(force)
2147 finally:
2148 self.unlock()
2150 def getVDI(self, uuid):
2151 return self.vdis.get(uuid)
2153 def hasWork(self):
2154 if len(self.findGarbage()) > 0:
2155 return True
2156 if self.findCoalesceable():
2157 return True
2158 if self.findLeafCoalesceable():
2159 return True
2160 if self.needUpdateBlockInfo():
2161 return True
2162 return False
2164 def findCoalesceable(self):
2165 """Find a coalesceable VDI. Return a vdi that should be coalesced
2166 (choosing one among all coalesceable candidates according to some
2167 criteria) or None if there is no VDI that could be coalesced"""
2169 candidates = []
2171 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE)
2172 if srSwitch == "false":
2173 Util.log("Coalesce disabled for this SR")
2174 return candidates
2176 # finish any VDI for which a relink journal entry exists first
2177 journals = self.journaler.getAll(VDI.JRN_RELINK)
2178 for uuid in journals:
2179 vdi = self.getVDI(uuid)
2180 if vdi and vdi not in self._failedCoalesceTargets:
2181 return vdi
2183 for vdi in self.vdis.values():
2184 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets:
2185 candidates.append(vdi)
2186 Util.log("%s is coalescable" % vdi.uuid)
2188 self.xapi.update_task_progress("coalescable", len(candidates))
2190 # pick one in the tallest tree
2191 treeHeight = dict()
2192 for c in candidates:
2193 height = c.getTreeRoot().getTreeHeight()
2194 if treeHeight.get(height):
2195 treeHeight[height].append(c)
2196 else:
2197 treeHeight[height] = [c]
2199 freeSpace = self.getFreeSpace()
2200 heights = list(treeHeight.keys())
2201 heights.sort(reverse=True)
2202 for h in heights:
2203 for c in treeHeight[h]:
2204 spaceNeeded = c._calcExtraSpaceForCoalescing()
2205 if spaceNeeded <= freeSpace:
2206 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h))
2207 self.clear_no_space_msg(c)
2208 return c
2209 else:
2210 self.no_space_candidates[c.uuid] = c
2211 Util.log("No space to coalesce %s (free space: %d)" % \
2212 (c, freeSpace))
2213 return None
2215 def getSwitch(self, key):
2216 return self.xapi.srRecord["other_config"].get(key)
2218 def forbiddenBySwitch(self, switch, condition, fail_msg):
2219 srSwitch = self.getSwitch(switch)
2220 ret = False
2221 if srSwitch:
2222 ret = srSwitch == condition
2224 if ret:
2225 Util.log(fail_msg)
2227 return ret
2229 def leafCoalesceForbidden(self):
2230 return (self.forbiddenBySwitch(VDI.DB_COALESCE,
2231 "false",
2232 "Coalesce disabled for this SR") or
2233 self.forbiddenBySwitch(VDI.DB_LEAFCLSC,
2234 VDI.LEAFCLSC_DISABLED,
2235 "Leaf-coalesce disabled for this SR"))
2237 def findLeafCoalesceable(self):
2238 """Find leaf-coalesceable VDIs in each COW tree"""
2240 candidates = []
2241 if self.leafCoalesceForbidden():
2242 return candidates
2244 self.gatherLeafCoalesceable(candidates)
2246 self.xapi.update_task_progress("coalescable", len(candidates))
2248 freeSpace = self.getFreeSpace()
2249 for candidate in candidates:
2250 # check the space constraints to see if leaf-coalesce is actually
2251 # feasible for this candidate
2252 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing()
2253 spaceNeededLive = spaceNeeded
2254 if spaceNeeded > freeSpace:
2255 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing()
2256 if candidate.canLiveCoalesce(self.getStorageSpeed()):
2257 spaceNeeded = spaceNeededLive
2259 if spaceNeeded <= freeSpace:
2260 Util.log("Leaf-coalesce candidate: %s" % candidate)
2261 self.clear_no_space_msg(candidate)
2262 return candidate
2263 else:
2264 Util.log("No space to leaf-coalesce %s (free space: %d)" % \
2265 (candidate, freeSpace))
2266 if spaceNeededLive <= freeSpace:
2267 Util.log("...but enough space if skip snap-coalesce")
2268 candidate.setConfig(VDI.DB_LEAFCLSC,
2269 VDI.LEAFCLSC_OFFLINE)
2270 self.no_space_candidates[candidate.uuid] = candidate
2272 return None
2274 def gatherLeafCoalesceable(self, candidates):
2275 for vdi in self.vdis.values():
2276 if not vdi.isLeafCoalesceable():
2277 continue
2278 if vdi in self._failedCoalesceTargets:
2279 continue
2280 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET:
2281 Util.log("Skipping reset-on-boot %s" % vdi)
2282 continue
2283 if vdi.getConfig(vdi.DB_ALLOW_CACHING):
2284 Util.log("Skipping allow_caching=true %s" % vdi)
2285 continue
2286 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED:
2287 Util.log("Leaf-coalesce disabled for %s" % vdi)
2288 continue
2289 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or
2290 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE):
2291 continue
2292 candidates.append(vdi)
2294 def coalesce(self, vdi, dryRun=False):
2295 """Coalesce vdi onto parent"""
2296 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent))
2297 if dryRun: 2297 ↛ 2298line 2297 didn't jump to line 2298, because the condition on line 2297 was never true
2298 return
2300 try:
2301 self._coalesce(vdi)
2302 except util.SMException as e:
2303 if isinstance(e, AbortException): 2303 ↛ 2304line 2303 didn't jump to line 2304, because the condition on line 2303 was never true
2304 self.cleanup()
2305 raise
2306 else:
2307 self._failedCoalesceTargets.append(vdi)
2308 Util.logException("coalesce")
2309 Util.log("Coalesce failed, skipping")
2310 self.cleanup()
2312 def coalesceLeaf(self, vdi, dryRun=False):
2313 """Leaf-coalesce vdi onto parent"""
2314 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent))
2315 if dryRun:
2316 return
2318 try:
2319 uuid = vdi.uuid
2320 try:
2321 # "vdi" object will no longer be valid after this call
2322 self._coalesceLeaf(vdi)
2323 finally:
2324 vdi = self.getVDI(uuid)
2325 if vdi:
2326 vdi.delConfig(vdi.DB_LEAFCLSC)
2327 except AbortException:
2328 self.cleanup()
2329 raise
2330 except (util.SMException, XenAPI.Failure) as e:
2331 self._failedCoalesceTargets.append(vdi)
2332 Util.logException("leaf-coalesce")
2333 Util.log("Leaf-coalesce failed on %s, skipping" % vdi)
2334 self.cleanup()
2336 def garbageCollect(self, dryRun=False):
2337 vdiList = self.findGarbage()
2338 Util.log("Found %d VDIs for deletion:" % len(vdiList))
2339 for vdi in vdiList:
2340 Util.log(" %s" % vdi)
2341 if not dryRun:
2342 self.deleteVDIs(vdiList)
2343 self.cleanupJournals(dryRun)
2345 def findGarbage(self):
2346 vdiList = []
2347 for vdi in self.vdiTrees:
2348 vdiList.extend(vdi.getAllPrunable())
2349 return vdiList
2351 def deleteVDIs(self, vdiList) -> None:
2352 for vdi in vdiList:
2353 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT):
2354 raise AbortException("Aborting due to signal")
2355 Util.log("Deleting unlinked VDI %s" % vdi)
2356 self.deleteVDI(vdi)
2358 def deleteVDI(self, vdi) -> None:
2359 assert(len(vdi.children) == 0)
2360 del self.vdis[vdi.uuid]
2361 if vdi.parent: 2361 ↛ 2363line 2361 didn't jump to line 2363, because the condition on line 2361 was never false
2362 vdi.parent.children.remove(vdi)
2363 if vdi in self.vdiTrees: 2363 ↛ 2364line 2363 didn't jump to line 2364, because the condition on line 2363 was never true
2364 self.vdiTrees.remove(vdi)
2365 vdi.delete()
2367 def forgetVDI(self, vdiUuid) -> None:
2368 self.xapi.forgetVDI(self.uuid, vdiUuid)
2370 def pauseVDIs(self, vdiList) -> None:
2371 paused = []
2372 failed = False
2373 for vdi in vdiList:
2374 try:
2375 vdi.pause()
2376 paused.append(vdi)
2377 except:
2378 Util.logException("pauseVDIs")
2379 failed = True
2380 break
2382 if failed:
2383 self.unpauseVDIs(paused)
2384 raise util.SMException("Failed to pause VDIs")
2386 def unpauseVDIs(self, vdiList):
2387 failed = False
2388 for vdi in vdiList:
2389 try:
2390 vdi.unpause()
2391 except:
2392 Util.log("ERROR: Failed to unpause VDI %s" % vdi)
2393 failed = True
2394 if failed:
2395 raise util.SMException("Failed to unpause VDIs")
2397 def getFreeSpace(self) -> int:
2398 return 0
2400 def cleanup(self):
2401 Util.log("In cleanup")
2402 return
2404 @override
2405 def __str__(self) -> str:
2406 if self.name:
2407 ret = "%s ('%s')" % (self.uuid[0:4], self.name)
2408 else:
2409 ret = "%s" % self.uuid
2410 return ret
2412 def lock(self):
2413 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort
2414 signal to avoid deadlocking (trying to acquire the SR lock while the
2415 lock is held by a process that is trying to abort us)"""
2416 if not self._srLock:
2417 return
2419 if self._locked == 0:
2420 abortFlag = IPCFlag(self.uuid)
2421 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK):
2422 if self._srLock.acquireNoblock():
2423 self._locked += 1
2424 return
2425 if abortFlag.test(FLAG_TYPE_ABORT):
2426 raise AbortException("Abort requested")
2427 time.sleep(SR.LOCK_RETRY_INTERVAL)
2428 raise util.SMException("Unable to acquire the SR lock")
2430 self._locked += 1
2432 def unlock(self):
2433 if not self._srLock: 2433 ↛ 2435line 2433 didn't jump to line 2435, because the condition on line 2433 was never false
2434 return
2435 assert(self._locked > 0)
2436 self._locked -= 1
2437 if self._locked == 0:
2438 self._srLock.release()
2440 def needUpdateBlockInfo(self) -> bool:
2441 for vdi in self.vdis.values():
2442 if vdi.scanError or len(vdi.children) == 0:
2443 continue
2444 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
2445 return True
2446 return False
2448 def updateBlockInfo(self) -> None:
2449 for vdi in self.vdis.values():
2450 if vdi.scanError or len(vdi.children) == 0:
2451 continue
2452 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
2453 vdi.updateBlockInfo()
2455 def cleanupCoalesceJournals(self):
2456 """Remove stale coalesce VDI indicators"""
2457 entries = self.journaler.getAll(VDI.JRN_COALESCE)
2458 for uuid, jval in entries.items():
2459 self.journaler.remove(VDI.JRN_COALESCE, uuid)
2461 def cleanupJournals(self, dryRun=False):
2462 """delete journal entries for non-existing VDIs"""
2463 for t in [LVMVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]:
2464 entries = self.journaler.getAll(t)
2465 for uuid, jval in entries.items():
2466 if self.getVDI(uuid):
2467 continue
2468 if t == SR.JRN_CLONE:
2469 baseUuid, clonUuid = jval.split("_")
2470 if self.getVDI(baseUuid):
2471 continue
2472 Util.log(" Deleting stale '%s' journal entry for %s "
2473 "(%s)" % (t, uuid, jval))
2474 if not dryRun:
2475 self.journaler.remove(t, uuid)
2477 def cleanupCache(self, maxAge=-1) -> int:
2478 return 0
2480 def _hasLeavesAttachedOn(self, vdi: VDI):
2481 leaves = vdi.getAllLeaves()
2482 leaves_vdi = [leaf.uuid for leaf in leaves]
2483 return util.get_hosts_attached_on(self.xapi.session, leaves_vdi)
2485 def _gc_running_file(self, vdi: VDI):
2486 run_file = "gc_running_{}".format(vdi.uuid)
2487 return os.path.join(NON_PERSISTENT_DIR, str(self.uuid), run_file)
2489 def _create_running_file(self, vdi: VDI):
2490 with open(self._gc_running_file(vdi), "w") as f:
2491 f.write("1")
2493 def _delete_running_file(self, vdi: VDI):
2494 os.unlink(self._gc_running_file(vdi))
2496 def _coalesce(self, vdi: VDI):
2497 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2497 ↛ 2500line 2497 didn't jump to line 2500, because the condition on line 2497 was never true
2498 # this means we had done the actual coalescing already and just
2499 # need to finish relinking and/or refreshing the children
2500 Util.log("==> Coalesce apparently already done: skipping")
2501 else:
2502 # JRN_COALESCE is used to check which VDI is being coalesced in
2503 # order to decide whether to abort the coalesce. We remove the
2504 # journal as soon as the COW coalesce step is done, because we
2505 # don't expect the rest of the process to take long
2507 if os.path.exists(self._gc_running_file(vdi)): 2507 ↛ 2508line 2507 didn't jump to line 2508, because the condition on line 2507 was never true
2508 util.SMlog("gc_running already exist for {}. Ignoring...".format(self.uuid))
2510 self._create_running_file(vdi)
2512 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1")
2513 host_refs = self._hasLeavesAttachedOn(vdi)
2514 #TODO: this check of multiple host_refs should be done earlier in `is_coalesceable` to avoid stopping this late every time
2515 if len(host_refs) > 1: 2515 ↛ 2516line 2515 didn't jump to line 2516, because the condition on line 2515 was never true
2516 util.SMlog("Not coalesceable, chain activated more than once")
2517 raise Exception("Not coalesceable, chain activated more than once") #TODO: Use correct error
2519 try:
2520 if host_refs and vdi.cowutil.isCoalesceableOnRemote(): 2520 ↛ 2522line 2520 didn't jump to line 2522, because the condition on line 2520 was never true
2521 #Leaf opened on another host, we need to call online coalesce
2522 util.SMlog("Remote coalesce for {}".format(vdi.path))
2523 vdi._doCoalesceOnHost(list(host_refs)[0])
2524 else:
2525 util.SMlog("Offline coalesce for {}".format(vdi.path))
2526 vdi._doCoalesce()
2527 except Exception as e:
2528 util.SMlog("EXCEPTION while coalescing: {}".format(e))
2529 self._delete_running_file(vdi)
2530 raise
2532 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid)
2533 self._delete_running_file(vdi)
2535 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid)
2537 # we now need to relink the children: lock the SR to prevent ops
2538 # like SM.clone from manipulating the VDIs we'll be relinking and
2539 # rescan the SR first in case the children changed since the last
2540 # scan
2541 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1")
2543 self.lock()
2544 try:
2545 vdi.parent._tagChildrenForRelink()
2546 self.scan()
2547 vdi._relinkSkip() #TODO: We could check if the parent is already the right one before doing the relink, or we could do the relink a second time, it doesn't seem to cause issues
2548 finally:
2549 self.unlock()
2550 # Reload the children to leave things consistent
2551 vdi.parent._reloadChildren(vdi)
2552 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid)
2554 self.deleteVDI(vdi)
2556 class CoalesceTracker:
2557 GRACE_ITERATIONS = 2
2558 MAX_ITERATIONS_NO_PROGRESS = 3
2559 MAX_ITERATIONS = 10
2560 MAX_INCREASE_FROM_MINIMUM = 1.2
2561 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \
2562 " --> Final size {finSize}"
2564 def __init__(self, sr):
2565 self.itsNoProgress = 0
2566 self.its = 0
2567 self.minSize = float("inf")
2568 self.history = []
2569 self.reason = ""
2570 self.startSize = None
2571 self.finishSize = None
2572 self.sr = sr
2573 self.grace_remaining = self.GRACE_ITERATIONS
2575 def abortCoalesce(self, prevSize, curSize):
2576 self.its += 1
2577 self.history.append(self.HISTORY_STRING.format(its=self.its,
2578 initSize=prevSize,
2579 finSize=curSize))
2581 self.finishSize = curSize
2583 if self.startSize is None:
2584 self.startSize = prevSize
2586 if curSize < self.minSize:
2587 self.minSize = curSize
2589 if prevSize < self.minSize:
2590 self.minSize = prevSize
2592 if self.its == 1:
2593 # Skip evaluating conditions on first iteration
2594 return False
2596 if prevSize < curSize:
2597 self.itsNoProgress += 1
2598 Util.log("No progress, attempt:"
2599 " {attempt}".format(attempt=self.itsNoProgress))
2600 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid)
2601 else:
2602 # We made progress
2603 return False
2605 if self.its > self.MAX_ITERATIONS:
2606 max = self.MAX_ITERATIONS
2607 self.reason = \
2608 "Max iterations ({max}) exceeded".format(max=max)
2609 return True
2611 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS:
2612 max = self.MAX_ITERATIONS_NO_PROGRESS
2613 self.reason = \
2614 "No progress made for {max} iterations".format(max=max)
2615 return True
2617 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize
2618 if curSize > maxSizeFromMin:
2619 self.grace_remaining -= 1
2620 if self.grace_remaining == 0:
2621 self.reason = "Unexpected bump in size," \
2622 " compared to minimum achieved"
2624 return True
2626 return False
2628 def printSizes(self):
2629 Util.log("Starting size was {size}"
2630 .format(size=self.startSize))
2631 Util.log("Final size was {size}"
2632 .format(size=self.finishSize))
2633 Util.log("Minimum size achieved was {size}"
2634 .format(size=self.minSize))
2636 def printReasoning(self):
2637 Util.log("Aborted coalesce")
2638 for hist in self.history:
2639 Util.log(hist)
2640 Util.log(self.reason)
2641 self.printSizes()
2643 def printSummary(self):
2644 if self.its == 0:
2645 return
2647 if self.reason: 2647 ↛ 2648line 2647 didn't jump to line 2648, because the condition on line 2647 was never true
2648 Util.log("Aborted coalesce")
2649 Util.log(self.reason)
2650 else:
2651 Util.log("Coalesce summary")
2653 Util.log(f"Performed {self.its} iterations")
2654 self.printSizes()
2657 def _coalesceLeaf(self, vdi):
2658 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot
2659 complete due to external changes, namely vdi_delete and vdi_snapshot
2660 that alter leaf-coalescibility of vdi"""
2661 tracker = self.CoalesceTracker(self)
2662 while not vdi.canLiveCoalesce(self.getStorageSpeed()):
2663 prevSizePhys = vdi.getSizePhys()
2664 if not self._snapshotCoalesce(vdi): 2664 ↛ 2665line 2664 didn't jump to line 2665, because the condition on line 2664 was never true
2665 return False
2666 if tracker.abortCoalesce(prevSizePhys, vdi.getSizePhys()):
2667 tracker.printReasoning()
2668 raise util.SMException("VDI {uuid} could not be coalesced"
2669 .format(uuid=vdi.uuid))
2670 tracker.printSummary()
2671 return self._liveLeafCoalesce(vdi)
2673 def calcStorageSpeed(self, startTime, endTime, coalescedSize):
2674 speed = None
2675 total_time = endTime - startTime
2676 if total_time > 0:
2677 speed = float(coalescedSize) / float(total_time)
2678 return speed
2680 def writeSpeedToFile(self, speed):
2681 content = []
2682 speedFile = None
2683 path = SPEED_LOG_ROOT.format(uuid=self.uuid)
2684 self.lock()
2685 try:
2686 Util.log("Writing to file: {myfile}".format(myfile=path))
2687 lines = ""
2688 if not os.path.isfile(path):
2689 lines = str(speed) + "\n"
2690 else:
2691 speedFile = open(path, "r+")
2692 content = speedFile.readlines()
2693 content.append(str(speed) + "\n")
2694 if len(content) > N_RUNNING_AVERAGE:
2695 del content[0]
2696 lines = "".join(content)
2698 util.atomicFileWrite(path, VAR_RUN, lines)
2699 finally:
2700 if speedFile is not None:
2701 speedFile.close()
2702 Util.log("Closing file: {myfile}".format(myfile=path))
2703 self.unlock()
2705 def recordStorageSpeed(self, startTime, endTime, coalescedSize):
2706 speed = self.calcStorageSpeed(startTime, endTime, coalescedSize)
2707 if speed is None:
2708 return
2710 self.writeSpeedToFile(speed)
2712 def getStorageSpeed(self):
2713 speedFile = None
2714 path = SPEED_LOG_ROOT.format(uuid=self.uuid)
2715 self.lock()
2716 try:
2717 speed = None
2718 if os.path.isfile(path):
2719 speedFile = open(path)
2720 content = speedFile.readlines()
2721 try:
2722 content = [float(i) for i in content]
2723 except ValueError:
2724 Util.log("Something bad in the speed log:{log}".
2725 format(log=speedFile.readlines()))
2726 return speed
2728 if len(content):
2729 speed = sum(content) / float(len(content))
2730 if speed <= 0: 2730 ↛ 2732line 2730 didn't jump to line 2732, because the condition on line 2730 was never true
2731 # Defensive, should be impossible.
2732 Util.log("Bad speed: {speed} calculated for SR: {uuid}".
2733 format(speed=speed, uuid=self.uuid))
2734 speed = None
2735 else:
2736 Util.log("Speed file empty for SR: {uuid}".
2737 format(uuid=self.uuid))
2738 else:
2739 Util.log("Speed log missing for SR: {uuid}".
2740 format(uuid=self.uuid))
2741 return speed
2742 finally:
2743 if not (speedFile is None):
2744 speedFile.close()
2745 self.unlock()
2747 def _snapshotCoalesce(self, vdi):
2748 # Note that because we are not holding any locks here, concurrent SM
2749 # operations may change this tree under our feet. In particular, vdi
2750 # can be deleted, or it can be snapshotted.
2751 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED)
2752 Util.log("Single-snapshotting %s" % vdi)
2753 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid)
2754 try:
2755 ret = self.xapi.singleSnapshotVDI(vdi)
2756 Util.log("Single-snapshot returned: %s" % ret)
2757 except XenAPI.Failure as e:
2758 if util.isInvalidVDI(e):
2759 Util.log("The VDI appears to have been concurrently deleted")
2760 return False
2761 raise
2762 self.scanLocked()
2763 tempSnap = vdi.parent
2764 if not tempSnap.isCoalesceable():
2765 Util.log("The VDI appears to have been concurrently snapshotted")
2766 return False
2767 Util.log("Coalescing parent %s" % tempSnap)
2768 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid)
2769 sizePhys = vdi.getSizePhys()
2770 self._coalesce(tempSnap)
2771 if not vdi.isLeafCoalesceable():
2772 Util.log("The VDI tree appears to have been altered since")
2773 return False
2774 return True
2776 def _liveLeafCoalesce(self, vdi: VDI) -> bool:
2777 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid)
2778 self.lock()
2779 try:
2780 self.scan()
2781 if not self.getVDI(vdi.uuid):
2782 Util.log("The VDI appears to have been deleted meanwhile")
2783 return False
2784 if not vdi.isLeafCoalesceable():
2785 Util.log("The VDI is no longer leaf-coalesceable")
2786 return False
2788 uuid = vdi.uuid
2789 vdi.pause(failfast=True)
2790 try:
2791 try:
2792 # "vdi" object will no longer be valid after this call
2793 self._create_running_file(vdi)
2794 self._doCoalesceLeaf(vdi)
2795 except:
2796 Util.logException("_doCoalesceLeaf")
2797 self._handleInterruptedCoalesceLeaf()
2798 raise
2799 finally:
2800 vdi = self.getVDI(uuid)
2801 if vdi:
2802 vdi.ensureUnpaused()
2803 self._delete_running_file(vdi)
2804 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid)
2805 if vdiOld:
2806 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid)
2807 self.deleteVDI(vdiOld)
2808 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
2809 finally:
2810 self.cleanup()
2811 self.unlock()
2812 self.logFilter.logState()
2813 return True
2815 def _doCoalesceLeaf(self, vdi: VDI):
2816 """Actual coalescing of a leaf VDI onto parent. Must be called in an
2817 offline/atomic context"""
2818 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid)
2819 self._prepareCoalesceLeaf(vdi)
2820 vdi.parent._setHidden(False)
2821 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False)
2822 vdi.validate(True)
2823 vdi.parent.validate(True)
2824 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid)
2825 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT
2826 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 2826 ↛ 2827line 2826 didn't jump to line 2827, because the condition on line 2826 was never true
2827 Util.log("Leaf-coalesce forced, will not use timeout")
2828 timeout = 0
2829 vdi._coalesceCowImage(timeout)
2830 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid)
2831 vdi.parent.validate(True)
2832 #vdi._verifyContents(timeout / 2)
2834 # rename
2835 vdiUuid = vdi.uuid
2836 oldName = vdi.fileName
2837 origParentUuid = vdi.parent.uuid
2838 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid)
2839 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid)
2840 vdi.parent.rename(vdiUuid)
2841 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid)
2842 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid)
2844 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is
2845 # garbage
2847 # update the VDI record
2848 if vdi.parent.vdi_type == VdiType.RAW: 2848 ↛ 2849line 2848 didn't jump to line 2849, because the condition on line 2848 was never true
2849 vdi.parent.setConfig(VDI.DB_VDI_TYPE, VdiType.RAW)
2850 vdi.parent.delConfig(VDI.DB_VDI_BLOCKS)
2851 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid)
2853 self._updateNode(vdi)
2855 # delete the obsolete leaf & inflate the parent (in that order, to
2856 # minimize free space requirements)
2857 parent = vdi.parent
2858 vdi._setHidden(True)
2859 vdi.parent.children = []
2860 vdi.parent = None
2862 if parent.parent is None:
2863 parent.delConfig(VDI.DB_VDI_PARENT)
2865 extraSpace = self._calcExtraSpaceNeeded(vdi, parent)
2866 freeSpace = self.getFreeSpace()
2867 if freeSpace < extraSpace: 2867 ↛ 2870line 2867 didn't jump to line 2870, because the condition on line 2867 was never true
2868 # don't delete unless we need the space: deletion is time-consuming
2869 # because it requires contacting the slaves, and we're paused here
2870 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid)
2871 self.deleteVDI(vdi)
2872 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
2874 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid)
2875 self.journaler.remove(VDI.JRN_LEAF, vdiUuid)
2877 self.forgetVDI(origParentUuid)
2878 self._finishCoalesceLeaf(parent)
2879 self._updateSlavesOnResize(parent)
2881 def _calcExtraSpaceNeeded(self, child, parent) -> int:
2882 assert(VdiType.isCowImage(parent.vdi_type))
2883 extra = child.getSizePhys() - parent.getSizePhys()
2884 if extra < 0: 2884 ↛ 2885line 2884 didn't jump to line 2885, because the condition on line 2884 was never true
2885 extra = 0
2886 return extra
2888 def _prepareCoalesceLeaf(self, vdi) -> None:
2889 pass
2891 def _updateNode(self, vdi) -> None:
2892 pass
2894 def _finishCoalesceLeaf(self, parent) -> None:
2895 pass
2897 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None:
2898 pass
2900 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None:
2901 pass
2903 def _updateSlavesOnResize(self, vdi) -> None:
2904 pass
2906 def _removeStaleVDIs(self, uuidsPresent) -> None:
2907 for uuid in list(self.vdis.keys()):
2908 if not uuid in uuidsPresent:
2909 Util.log("VDI %s disappeared since last scan" % \
2910 self.vdis[uuid])
2911 del self.vdis[uuid]
2913 def _handleInterruptedCoalesceLeaf(self) -> None:
2914 """An interrupted leaf-coalesce operation may leave the COW tree in an
2915 inconsistent state. If the old-leaf VDI is still present, we revert the
2916 operation (in case the original error is persistent); otherwise we must
2917 finish the operation"""
2918 pass
2920 def _buildTree(self, force):
2921 self.vdiTrees = []
2922 for vdi in self.vdis.values():
2923 if vdi.parentUuid:
2924 parent = self.getVDI(vdi.parentUuid)
2925 if not parent:
2926 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX):
2927 self.vdiTrees.append(vdi)
2928 continue
2929 if force:
2930 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \
2931 (vdi.parentUuid, vdi.uuid))
2932 self.vdiTrees.append(vdi)
2933 continue
2934 else:
2935 raise util.SMException("Parent VDI %s of %s not " \
2936 "found" % (vdi.parentUuid, vdi.uuid))
2937 vdi.parent = parent
2938 parent.children.append(vdi)
2939 else:
2940 self.vdiTrees.append(vdi)
2943class FileSR(SR):
2944 TYPE = SR.TYPE_FILE
2945 CACHE_FILE_EXT = ".vhdcache"
2946 # cache cleanup actions
2947 CACHE_ACTION_KEEP = 0
2948 CACHE_ACTION_REMOVE = 1
2949 CACHE_ACTION_REMOVE_IF_INACTIVE = 2
2951 def __init__(self, uuid, xapi, createLock, force):
2952 SR.__init__(self, uuid, xapi, createLock, force)
2953 self.path = "/var/run/sr-mount/%s" % self.uuid
2954 self.journaler = fjournaler.Journaler(self.path)
2956 @override
2957 def scan(self, force=False) -> None:
2958 if not util.pathexists(self.path):
2959 raise util.SMException("directory %s not found!" % self.uuid)
2961 uuidsPresent: List[str] = []
2963 for vdi_type in VDI_COW_TYPES:
2964 scan_result = self._scan(vdi_type, force)
2965 for uuid, image_info in scan_result.items():
2966 vdi = self.getVDI(uuid)
2967 if not vdi:
2968 self.logFilter.logNewVDI(uuid)
2969 vdi = FileVDI(self, uuid, vdi_type)
2970 self.vdis[uuid] = vdi
2971 vdi.load(image_info)
2972 uuidsPresent.extend(scan_result.keys())
2974 rawList = [x for x in os.listdir(self.path) if x.endswith(VdiTypeExtension.RAW)]
2975 for rawName in rawList:
2976 uuid = FileVDI.extractUuid(rawName)
2977 uuidsPresent.append(uuid)
2978 vdi = self.getVDI(uuid)
2979 if not vdi:
2980 self.logFilter.logNewVDI(uuid)
2981 vdi = FileVDI(self, uuid, VdiType.RAW)
2982 self.vdis[uuid] = vdi
2983 self._removeStaleVDIs(uuidsPresent)
2984 self._buildTree(force)
2985 self.logFilter.logState()
2986 self._handleInterruptedCoalesceLeaf()
2988 @override
2989 def getFreeSpace(self) -> int:
2990 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path)
2992 @override
2993 def deleteVDIs(self, vdiList) -> None:
2994 rootDeleted = False
2995 for vdi in vdiList:
2996 if not vdi.parent:
2997 rootDeleted = True
2998 break
2999 SR.deleteVDIs(self, vdiList)
3000 if self.xapi.srRecord["type"] == "nfs" and rootDeleted:
3001 self.xapi.markCacheSRsDirty()
3003 @override
3004 def cleanupCache(self, maxAge=-1) -> int:
3005 """Clean up IntelliCache cache files. Caches for leaf nodes are
3006 removed when the leaf node no longer exists or its allow-caching
3007 attribute is not set. Caches for parent nodes are removed when the
3008 parent node no longer exists or it hasn't been used in more than
3009 <maxAge> hours.
3010 Return number of caches removed.
3011 """
3012 numRemoved = 0
3013 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)]
3014 Util.log("Found %d cache files" % len(cacheFiles))
3015 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge)
3016 for cacheFile in cacheFiles:
3017 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)]
3018 action = self.CACHE_ACTION_KEEP
3019 rec = self.xapi.getRecordVDI(uuid)
3020 if not rec:
3021 Util.log("Cache %s: VDI doesn't exist" % uuid)
3022 action = self.CACHE_ACTION_REMOVE
3023 elif rec["managed"] and not rec["allow_caching"]:
3024 Util.log("Cache %s: caching disabled" % uuid)
3025 action = self.CACHE_ACTION_REMOVE
3026 elif not rec["managed"] and maxAge >= 0:
3027 lastAccess = datetime.datetime.fromtimestamp( \
3028 os.path.getatime(os.path.join(self.path, cacheFile)))
3029 if lastAccess < cutoff:
3030 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge))
3031 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE
3033 if action == self.CACHE_ACTION_KEEP:
3034 Util.log("Keeping cache %s" % uuid)
3035 continue
3037 lockId = uuid
3038 parentUuid = None
3039 if rec and rec["managed"]:
3040 parentUuid = rec["sm_config"].get("vhd-parent")
3041 if parentUuid:
3042 lockId = parentUuid
3044 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId)
3045 cacheLock.acquire()
3046 try:
3047 if self._cleanupCache(uuid, action):
3048 numRemoved += 1
3049 finally:
3050 cacheLock.release()
3051 return numRemoved
3053 def _cleanupCache(self, uuid, action):
3054 assert(action != self.CACHE_ACTION_KEEP)
3055 rec = self.xapi.getRecordVDI(uuid)
3056 if rec and rec["allow_caching"]:
3057 Util.log("Cache %s appears to have become valid" % uuid)
3058 return False
3060 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT)
3061 tapdisk = blktap2.Tapdisk.find_by_path(fullPath)
3062 if tapdisk:
3063 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE:
3064 Util.log("Cache %s still in use" % uuid)
3065 return False
3066 Util.log("Shutting down tapdisk for %s" % fullPath)
3067 tapdisk.shutdown()
3069 Util.log("Deleting file %s" % fullPath)
3070 os.unlink(fullPath)
3071 return True
3073 def _isCacheFileName(self, name):
3074 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \
3075 name.endswith(self.CACHE_FILE_EXT)
3077 def _scan(self, vdi_type, force):
3078 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3079 error = False
3080 pattern = os.path.join(self.path, "*%s" % VDI_TYPE_TO_EXTENSION[vdi_type])
3081 scan_result = getCowUtil(vdi_type).getAllInfoFromVG(pattern, FileVDI.extractUuid)
3082 for uuid, vdiInfo in scan_result.items():
3083 if vdiInfo.error:
3084 error = True
3085 break
3086 if not error:
3087 return scan_result
3088 Util.log("Scan error on attempt %d" % i)
3089 if force:
3090 return scan_result
3091 raise util.SMException("Scan error")
3093 @override
3094 def deleteVDI(self, vdi) -> None:
3095 self._checkSlaves(vdi)
3096 SR.deleteVDI(self, vdi)
3098 def _checkSlaves(self, vdi):
3099 onlineHosts = self.xapi.getOnlineHosts()
3100 abortFlag = IPCFlag(self.uuid)
3101 for pbdRecord in self.xapi.getAttachedPBDs():
3102 hostRef = pbdRecord["host"]
3103 if hostRef == self.xapi._hostRef:
3104 continue
3105 if abortFlag.test(FLAG_TYPE_ABORT):
3106 raise AbortException("Aborting due to signal")
3107 try:
3108 self._checkSlave(hostRef, vdi)
3109 except util.CommandException:
3110 if hostRef in onlineHosts:
3111 raise
3113 def _checkSlave(self, hostRef, vdi):
3114 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path})
3115 Util.log("Checking with slave: %s" % repr(call))
3116 _host = self.xapi.session.xenapi.host
3117 text = _host.call_plugin( * call)
3119 @override
3120 def _handleInterruptedCoalesceLeaf(self) -> None:
3121 entries = self.journaler.getAll(VDI.JRN_LEAF)
3122 for uuid, parentUuid in entries.items():
3123 fileList = os.listdir(self.path)
3124 childName = uuid + VdiTypeExtension.VHD
3125 tmpChildName = self.TMP_RENAME_PREFIX + uuid + VdiTypeExtension.VHD
3126 parentName1 = parentUuid + VdiTypeExtension.VHD
3127 parentName2 = parentUuid + VdiTypeExtension.RAW
3128 parentPresent = (parentName1 in fileList or parentName2 in fileList)
3129 if parentPresent or tmpChildName in fileList:
3130 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3131 else:
3132 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3133 self.journaler.remove(VDI.JRN_LEAF, uuid)
3134 vdi = self.getVDI(uuid)
3135 if vdi:
3136 vdi.ensureUnpaused()
3138 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3139 Util.log("*** UNDO LEAF-COALESCE")
3140 parent = self.getVDI(parentUuid)
3141 if not parent:
3142 parent = self.getVDI(childUuid)
3143 if not parent:
3144 raise util.SMException("Neither %s nor %s found" % \
3145 (parentUuid, childUuid))
3146 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid))
3147 parent.rename(parentUuid)
3148 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
3150 child = self.getVDI(childUuid)
3151 if not child:
3152 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3153 if not child:
3154 raise util.SMException("Neither %s nor %s found" % \
3155 (childUuid, self.TMP_RENAME_PREFIX + childUuid))
3156 Util.log("Renaming child back to %s" % childUuid)
3157 child.rename(childUuid)
3158 Util.log("Updating the VDI record")
3159 child.setConfig(VDI.DB_VDI_PARENT, parentUuid)
3160 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type)
3161 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
3163 if child.isHidden():
3164 child._setHidden(False)
3165 if not parent.isHidden():
3166 parent._setHidden(True)
3167 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3168 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid)
3169 Util.log("*** leaf-coalesce undo successful")
3170 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"):
3171 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
3173 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3174 Util.log("*** FINISH LEAF-COALESCE")
3175 vdi = self.getVDI(childUuid)
3176 if not vdi:
3177 Util.log(f"_finishInterruptedCoalesceLeaf, vdi {childUuid} not found, aborting")
3178 raise util.SMException("VDI %s not found" % childUuid)
3179 try:
3180 self.forgetVDI(parentUuid)
3181 except XenAPI.Failure:
3182 Util.logException('_finishInterruptedCoalesceLeaf')
3183 pass
3184 self._updateSlavesOnResize(vdi)
3185 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid)
3186 Util.log("*** finished leaf-coalesce successfully")
3189class LVMSR(SR):
3190 TYPE = SR.TYPE_LVHD
3191 SUBTYPES = ["lvhdoiscsi", "lvhdohba"]
3193 def __init__(self, uuid, xapi, createLock, force):
3194 SR.__init__(self, uuid, xapi, createLock, force)
3195 self.vgName = "%s%s" % (VG_PREFIX, self.uuid)
3196 self.path = os.path.join(VG_LOCATION, self.vgName)
3198 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid)
3199 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref)
3200 lvm_conf = other_conf.get('lvm-conf') if other_conf else None
3201 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf)
3203 self.lvActivator = LVActivator(self.uuid, self.lvmCache)
3204 self.journaler = journaler.Journaler(self.lvmCache)
3206 @override
3207 def deleteVDI(self, vdi) -> None:
3208 if self.lvActivator.get(vdi.uuid, False):
3209 self.lvActivator.deactivate(vdi.uuid, False)
3210 self._checkSlaves(vdi)
3211 SR.deleteVDI(self, vdi)
3213 @override
3214 def forgetVDI(self, vdiUuid) -> None:
3215 SR.forgetVDI(self, vdiUuid)
3216 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME)
3217 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid)
3219 @override
3220 def getFreeSpace(self) -> int:
3221 stats = lvutil._getVGstats(self.vgName)
3222 return stats['physical_size'] - stats['physical_utilisation']
3224 @override
3225 def cleanup(self):
3226 if not self.lvActivator.deactivateAll():
3227 Util.log("ERROR deactivating LVs while cleaning up")
3229 @override
3230 def needUpdateBlockInfo(self) -> bool:
3231 for vdi in self.vdis.values():
3232 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0:
3233 continue
3234 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
3235 return True
3236 return False
3238 @override
3239 def updateBlockInfo(self) -> None:
3240 numUpdated = 0
3241 for vdi in self.vdis.values():
3242 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0:
3243 continue
3244 if not vdi.getConfig(vdi.DB_VDI_BLOCKS):
3245 vdi.updateBlockInfo()
3246 numUpdated += 1
3247 if numUpdated:
3248 # deactivate the LVs back sooner rather than later. If we don't
3249 # now, by the time this thread gets to deactivations, another one
3250 # might have leaf-coalesced a node and deleted it, making the child
3251 # inherit the refcount value and preventing the correct decrement
3252 self.cleanup()
3254 @override
3255 def scan(self, force=False) -> None:
3256 vdis = self._scan(force)
3257 for uuid, vdiInfo in vdis.items():
3258 vdi = self.getVDI(uuid)
3259 if not vdi:
3260 self.logFilter.logNewVDI(uuid)
3261 vdi = LVMVDI(self, uuid, vdiInfo.vdiType)
3262 self.vdis[uuid] = vdi
3263 vdi.load(vdiInfo)
3264 self._removeStaleVDIs(vdis.keys())
3265 self._buildTree(force)
3266 self.logFilter.logState()
3267 self._handleInterruptedCoalesceLeaf()
3269 def _scan(self, force):
3270 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3271 error = False
3272 self.lvmCache.refresh()
3273 vdis = LvmCowUtil.getVDIInfo(self.lvmCache)
3274 for uuid, vdiInfo in vdis.items():
3275 if vdiInfo.scanError:
3276 error = True
3277 break
3278 if not error:
3279 return vdis
3280 Util.log("Scan error, retrying (%d)" % i)
3281 if force:
3282 return vdis
3283 raise util.SMException("Scan error")
3285 @override
3286 def _removeStaleVDIs(self, uuidsPresent) -> None:
3287 for uuid in list(self.vdis.keys()):
3288 if not uuid in uuidsPresent:
3289 Util.log("VDI %s disappeared since last scan" % \
3290 self.vdis[uuid])
3291 del self.vdis[uuid]
3292 if self.lvActivator.get(uuid, False):
3293 self.lvActivator.remove(uuid, False)
3295 @override
3296 def _liveLeafCoalesce(self, vdi) -> bool:
3297 """If the parent is raw and the child was resized (virt. size), then
3298 we'll need to resize the parent, which can take a while due to zeroing
3299 out of the extended portion of the LV. Do it before pausing the child
3300 to avoid a protracted downtime"""
3301 if not VdiType.isCowImage(vdi.parent.vdi_type) and vdi.sizeVirt > vdi.parent.sizeVirt:
3302 self.lvmCache.setReadonly(vdi.parent.fileName, False)
3303 vdi.parent._increaseSizeVirt(vdi.sizeVirt)
3305 return SR._liveLeafCoalesce(self, vdi)
3307 @override
3308 def _prepareCoalesceLeaf(self, vdi) -> None:
3309 vdi._activateChain()
3310 self.lvmCache.setReadonly(vdi.parent.fileName, False)
3311 vdi.deflate()
3312 vdi.inflateParentForCoalesce()
3314 @override
3315 def _updateNode(self, vdi) -> None:
3316 # fix the refcounts: the remaining node should inherit the binary
3317 # refcount from the leaf (because if it was online, it should remain
3318 # refcounted as such), but the normal refcount from the parent (because
3319 # this node is really the parent node) - minus 1 if it is online (since
3320 # non-leaf nodes increment their normal counts when they are online and
3321 # we are now a leaf, storing that 1 in the binary refcount).
3322 ns = NS_PREFIX_LVM + self.uuid
3323 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns)
3324 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns)
3325 pCnt = pCnt - cBcnt
3326 assert(pCnt >= 0)
3327 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns)
3329 @override
3330 def _finishCoalesceLeaf(self, parent) -> None:
3331 if not parent.isSnapshot() or parent.isAttachedRW():
3332 parent.inflateFully()
3333 else:
3334 parent.deflate()
3336 @override
3337 def _calcExtraSpaceNeeded(self, child, parent) -> int:
3338 return parent.lvmcowutil.calcVolumeSize(parent.sizeVirt) - parent.sizeLV
3340 @override
3341 def _handleInterruptedCoalesceLeaf(self) -> None:
3342 entries = self.journaler.getAll(VDI.JRN_LEAF)
3343 for uuid, parentUuid in entries.items():
3344 undo = False
3345 for prefix in LV_PREFIX.values():
3346 parentLV = prefix + parentUuid
3347 undo = self.lvmCache.checkLV(parentLV)
3348 if undo:
3349 break
3351 if not undo:
3352 for prefix in LV_PREFIX.values():
3353 tmpChildLV = prefix + uuid
3354 undo = self.lvmCache.checkLV(tmpChildLV)
3355 if undo:
3356 break
3358 if undo:
3359 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3360 else:
3361 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3362 self.journaler.remove(VDI.JRN_LEAF, uuid)
3363 vdi = self.getVDI(uuid)
3364 if vdi:
3365 vdi.ensureUnpaused()
3367 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3368 Util.log("*** UNDO LEAF-COALESCE")
3369 parent = self.getVDI(parentUuid)
3370 if not parent:
3371 parent = self.getVDI(childUuid)
3372 if not parent:
3373 raise util.SMException("Neither %s nor %s found" % \
3374 (parentUuid, childUuid))
3375 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid))
3376 parent.rename(parentUuid)
3377 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
3379 child = self.getVDI(childUuid)
3380 if not child:
3381 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3382 if not child:
3383 raise util.SMException("Neither %s nor %s found" % \
3384 (childUuid, self.TMP_RENAME_PREFIX + childUuid))
3385 Util.log("Renaming child back to %s" % childUuid)
3386 child.rename(childUuid)
3387 Util.log("Updating the VDI record")
3388 child.setConfig(VDI.DB_VDI_PARENT, parentUuid)
3389 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type)
3390 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
3392 # refcount (best effort - assume that it had succeeded if the
3393 # second rename succeeded; if not, this adjustment will be wrong,
3394 # leading to a non-deactivation of the LV)
3395 ns = NS_PREFIX_LVM + self.uuid
3396 cCnt, cBcnt = RefCounter.check(child.uuid, ns)
3397 pCnt, pBcnt = RefCounter.check(parent.uuid, ns)
3398 pCnt = pCnt + cBcnt
3399 RefCounter.set(parent.uuid, pCnt, 0, ns)
3400 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid)
3402 parent.deflate()
3403 child.inflateFully()
3404 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid)
3405 if child.isHidden():
3406 child._setHidden(False)
3407 if not parent.isHidden():
3408 parent._setHidden(True)
3409 if not parent.lvReadonly:
3410 self.lvmCache.setReadonly(parent.fileName, True)
3411 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3412 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid)
3413 Util.log("*** leaf-coalesce undo successful")
3414 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"):
3415 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
3417 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3418 Util.log("*** FINISH LEAF-COALESCE")
3419 vdi = self.getVDI(childUuid)
3420 if not vdi:
3421 raise util.SMException("VDI %s not found" % childUuid)
3422 vdi.inflateFully()
3423 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid)
3424 try:
3425 self.forgetVDI(parentUuid)
3426 except XenAPI.Failure:
3427 pass
3428 self._updateSlavesOnResize(vdi)
3429 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid)
3430 Util.log("*** finished leaf-coalesce successfully")
3432 def _checkSlaves(self, vdi):
3433 """Confirm with all slaves in the pool that 'vdi' is not in use. We
3434 try to check all slaves, including those that the Agent believes are
3435 offline, but ignore failures for offline hosts. This is to avoid cases
3436 where the Agent thinks a host is offline but the host is up."""
3437 args = {"vgName": self.vgName,
3438 "action1": "deactivateNoRefcount",
3439 "lvName1": vdi.fileName,
3440 "action2": "cleanupLockAndRefcount",
3441 "uuid2": vdi.uuid,
3442 "ns2": NS_PREFIX_LVM + self.uuid}
3443 onlineHosts = self.xapi.getOnlineHosts()
3444 abortFlag = IPCFlag(self.uuid)
3445 for pbdRecord in self.xapi.getAttachedPBDs():
3446 hostRef = pbdRecord["host"]
3447 if hostRef == self.xapi._hostRef:
3448 continue
3449 if abortFlag.test(FLAG_TYPE_ABORT):
3450 raise AbortException("Aborting due to signal")
3451 Util.log("Checking with slave %s (path %s)" % (
3452 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path))
3453 try:
3454 self.xapi.ensureInactive(hostRef, args)
3455 except XenAPI.Failure:
3456 if hostRef in onlineHosts:
3457 raise
3459 @override
3460 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None:
3461 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid])
3462 if not slaves:
3463 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \
3464 child)
3465 return
3467 tmpName = child.vdi_type + self.TMP_RENAME_PREFIX + child.uuid
3468 args = {"vgName": self.vgName,
3469 "action1": "deactivateNoRefcount",
3470 "lvName1": tmpName,
3471 "action2": "deactivateNoRefcount",
3472 "lvName2": child.fileName,
3473 "action3": "refresh",
3474 "lvName3": child.fileName,
3475 "action4": "refresh",
3476 "lvName4": parent.fileName}
3477 for slave in slaves:
3478 Util.log("Updating %s, %s, %s on slave %s" % \
3479 (tmpName, child.fileName, parent.fileName,
3480 self.xapi.getRecordHost(slave)['hostname']))
3481 text = self.xapi.session.xenapi.host.call_plugin( \
3482 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args)
3483 Util.log("call-plugin returned: '%s'" % text)
3485 @override
3486 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None:
3487 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid])
3488 if not slaves:
3489 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi)
3490 return
3492 args = {"vgName": self.vgName,
3493 "action1": "deactivateNoRefcount",
3494 "lvName1": oldNameLV,
3495 "action2": "refresh",
3496 "lvName2": vdi.fileName,
3497 "action3": "cleanupLockAndRefcount",
3498 "uuid3": origParentUuid,
3499 "ns3": NS_PREFIX_LVM + self.uuid}
3500 for slave in slaves:
3501 Util.log("Updating %s to %s on slave %s" % \
3502 (oldNameLV, vdi.fileName,
3503 self.xapi.getRecordHost(slave)['hostname']))
3504 text = self.xapi.session.xenapi.host.call_plugin( \
3505 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args)
3506 Util.log("call-plugin returned: '%s'" % text)
3508 @override
3509 def _updateSlavesOnResize(self, vdi) -> None:
3510 uuids = [x.uuid for x in vdi.getAllLeaves()]
3511 slaves = util.get_slaves_attached_on(self.xapi.session, uuids)
3512 if not slaves:
3513 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi)
3514 return
3515 LvmCowUtil.refreshVolumeOnSlaves(self.xapi.session, self.uuid, self.vgName,
3516 vdi.fileName, vdi.uuid, slaves)
3519class LinstorSR(SR):
3520 TYPE = SR.TYPE_LINSTOR
3522 def __init__(self, uuid, xapi, createLock, force):
3523 if not LINSTOR_AVAILABLE:
3524 raise util.SMException(
3525 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing'
3526 )
3528 SR.__init__(self, uuid, xapi, createLock, force)
3529 self.path = LinstorVolumeManager.DEV_ROOT_PATH
3531 class LinstorProxy:
3532 def __init__(self, sr: LinstorSR) -> None:
3533 self.sr = sr
3535 def __getattr__(self, attr: str) -> Any:
3536 assert self.sr, "Cannot use `LinstorProxy` without valid `LinstorVolumeManager` instance"
3537 return getattr(self.sr._linstor, attr)
3539 self._linstor_proxy = LinstorProxy(self)
3540 self._reloadLinstor(journaler_only=True)
3542 @override
3543 def deleteVDI(self, vdi) -> None:
3544 self._checkSlaves(vdi)
3545 SR.deleteVDI(self, vdi)
3547 @override
3548 def getFreeSpace(self) -> int:
3549 return self._linstor.max_volume_size_allowed
3551 @override
3552 def scan(self, force=False) -> None:
3553 all_vdi_info = self._scan(force)
3554 for uuid, vdiInfo in all_vdi_info.items():
3555 # When vdiInfo is None, the VDI is RAW.
3556 vdi = self.getVDI(uuid)
3557 if not vdi:
3558 self.logFilter.logNewVDI(uuid)
3559 vdi = LinstorVDI(self, uuid, vdiInfo.vdiType if vdiInfo else VdiType.RAW)
3560 self.vdis[uuid] = vdi
3561 if vdiInfo:
3562 vdi.load(vdiInfo)
3563 self._removeStaleVDIs(all_vdi_info.keys())
3564 self._buildTree(force)
3565 self.logFilter.logState()
3566 self._handleInterruptedCoalesceLeaf()
3568 @override
3569 def pauseVDIs(self, vdiList) -> None:
3570 self._linstor.ensure_volume_list_is_not_locked(
3571 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
3572 )
3573 return super(LinstorSR, self).pauseVDIs(vdiList)
3575 def _reloadLinstor(self, journaler_only=False):
3576 session = self.xapi.session
3577 host_ref = util.get_this_host_ref(session)
3578 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid)
3580 pbd = util.find_my_pbd(session, host_ref, sr_ref)
3581 if pbd is None:
3582 raise util.SMException('Failed to find PBD')
3584 dconf = session.xenapi.PBD.get_device_config(pbd)
3585 group_name = dconf['group-name']
3587 controller_uri = get_controller_uri()
3588 self.journaler = LinstorJournaler(
3589 controller_uri, group_name, logger=util.SMlog
3590 )
3592 if journaler_only:
3593 return
3595 self._linstor = LinstorVolumeManager(
3596 controller_uri,
3597 group_name,
3598 repair=True,
3599 logger=util.SMlog
3600 )
3602 def _scan(self, force):
3603 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3604 self._reloadLinstor()
3605 error = False
3606 try:
3607 all_vdi_info = self._load_vdi_info()
3608 for uuid, vdiInfo in all_vdi_info.items():
3609 if vdiInfo and vdiInfo.error:
3610 error = True
3611 break
3612 if not error:
3613 return all_vdi_info
3614 Util.log('Scan error, retrying ({})'.format(i))
3615 except Exception as e:
3616 Util.log('Scan exception, retrying ({}): {}'.format(i, e))
3617 Util.log(traceback.format_exc())
3619 if force:
3620 return all_vdi_info
3621 raise util.SMException('Scan error')
3623 def _load_vdi_info(self):
3624 all_vdi_info = {}
3626 # TODO: Ensure metadata contains the right info.
3628 all_volume_info = self._linstor.get_volumes_with_info()
3629 volumes_metadata = self._linstor.get_volumes_with_metadata()
3630 for vdi_uuid, volume_info in all_volume_info.items():
3631 vdi_type = VdiType.RAW
3632 try:
3633 volume_metadata = volumes_metadata[vdi_uuid]
3634 if not volume_info.name and not list(volume_metadata.items()):
3635 continue # Ignore it, probably deleted.
3637 if vdi_uuid.startswith('DELETED_'):
3638 # Assume it's really a RAW volume of a failed snap without COW header/footer.
3639 # We must remove this VDI now without adding it in the VDI list.
3640 # Otherwise `Relinking` calls and other actions can be launched on it.
3641 # We don't want that...
3642 Util.log('Deleting bad VDI {}'.format(vdi_uuid))
3644 self.lock()
3645 try:
3646 self._linstor.destroy_volume(vdi_uuid)
3647 try:
3648 self.forgetVDI(vdi_uuid)
3649 except:
3650 pass
3651 except Exception as e:
3652 Util.log('Cannot delete bad VDI: {}'.format(e))
3653 finally:
3654 self.unlock()
3655 continue
3657 vdi_type = volume_metadata.get(VDI_TYPE_TAG)
3658 volume_name = self._linstor.get_volume_name(vdi_uuid)
3659 if volume_name.startswith(LINSTOR_PERSISTENT_PREFIX):
3660 # Always RAW!
3661 info = None
3662 elif VdiType.isCowImage(vdi_type):
3663 info = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type).get_info(vdi_uuid)
3664 else:
3665 # Ensure it's not a COW image...
3666 linstorcowutil = LinstorCowUtil(self.xapi.session, self._linstor, vdi_type)
3667 try:
3668 info = linstorcowutil.get_info(vdi_uuid)
3669 except:
3670 try:
3671 linstorcowutil.force_repair(
3672 self._linstor.get_device_path(vdi_uuid)
3673 )
3674 info = linstorcowutil.get_info(vdi_uuid)
3675 except:
3676 info = None
3678 except Exception as e:
3679 Util.log(
3680 ' [VDI {}: failed to load VDI info]: {}'
3681 .format(vdi_uuid, e)
3682 )
3683 info = CowImageInfo(vdi_uuid)
3684 info.error = 1
3686 if info:
3687 info.vdiType = vdi_type
3689 all_vdi_info[vdi_uuid] = info
3691 return all_vdi_info
3693 @override
3694 def _prepareCoalesceLeaf(self, vdi) -> None:
3695 vdi._activateChain()
3696 vdi.deflate()
3697 vdi._inflateParentForCoalesce()
3699 @override
3700 def _finishCoalesceLeaf(self, parent) -> None:
3701 if not parent.isSnapshot() or parent.isAttachedRW():
3702 parent.inflateFully()
3703 else:
3704 parent.deflate()
3706 @override
3707 def _calcExtraSpaceNeeded(self, child, parent) -> int:
3708 return LinstorCowUtil(
3709 self.xapi.session, self._linstor, parent.vdi_type
3710 ).compute_volume_size(parent.sizeVirt) - parent.getDrbdSize()
3712 def _hasValidDevicePath(self, uuid):
3713 try:
3714 self._linstor.get_device_path(uuid)
3715 except Exception:
3716 # TODO: Maybe log exception.
3717 return False
3718 return True
3720 @override
3721 def _liveLeafCoalesce(self, vdi) -> bool:
3722 self.lock()
3723 try:
3724 self._linstor.ensure_volume_is_not_locked(
3725 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
3726 )
3727 return super(LinstorSR, self)._liveLeafCoalesce(vdi)
3728 finally:
3729 self.unlock()
3731 @override
3732 def _handleInterruptedCoalesceLeaf(self) -> None:
3733 entries = self.journaler.get_all(VDI.JRN_LEAF)
3734 for uuid, parentUuid in entries.items():
3735 if self._hasValidDevicePath(parentUuid) or \
3736 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid):
3737 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3738 else:
3739 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3740 self.journaler.remove(VDI.JRN_LEAF, uuid)
3741 vdi = self.getVDI(uuid)
3742 if vdi:
3743 vdi.ensureUnpaused()
3745 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3746 Util.log('*** UNDO LEAF-COALESCE')
3747 parent = self.getVDI(parentUuid)
3748 if not parent:
3749 parent = self.getVDI(childUuid)
3750 if not parent:
3751 raise util.SMException(
3752 'Neither {} nor {} found'.format(parentUuid, childUuid)
3753 )
3754 Util.log(
3755 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid)
3756 )
3757 parent.rename(parentUuid)
3759 child = self.getVDI(childUuid)
3760 if not child:
3761 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3762 if not child:
3763 raise util.SMException(
3764 'Neither {} nor {} found'.format(
3765 childUuid, self.TMP_RENAME_PREFIX + childUuid
3766 )
3767 )
3768 Util.log('Renaming child back to {}'.format(childUuid))
3769 child.rename(childUuid)
3770 Util.log('Updating the VDI record')
3771 child.setConfig(VDI.DB_VDI_PARENT, parentUuid)
3772 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type)
3774 # TODO: Maybe deflate here.
3776 if child.isHidden():
3777 child._setHidden(False)
3778 if not parent.isHidden():
3779 parent._setHidden(True)
3780 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3781 Util.log('*** leaf-coalesce undo successful')
3783 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3784 Util.log('*** FINISH LEAF-COALESCE')
3785 vdi = self.getVDI(childUuid)
3786 if not vdi:
3787 raise util.SMException('VDI {} not found'.format(childUuid))
3788 # TODO: Maybe inflate.
3789 try:
3790 self.forgetVDI(parentUuid)
3791 except XenAPI.Failure:
3792 pass
3793 self._updateSlavesOnResize(vdi)
3794 Util.log('*** finished leaf-coalesce successfully')
3796 def _checkSlaves(self, vdi):
3797 try:
3798 all_openers = self._linstor.get_volume_openers(vdi.uuid)
3799 for openers in all_openers.values():
3800 for opener in openers.values():
3801 if opener['process-name'] != 'tapdisk':
3802 raise util.SMException(
3803 'VDI {} is in use: {}'.format(vdi.uuid, all_openers)
3804 )
3805 except LinstorVolumeManagerError as e:
3806 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS:
3807 raise
3810################################################################################
3811#
3812# Helpers
3813#
3814def daemonize():
3815 pid = os.fork()
3816 if pid:
3817 os.waitpid(pid, 0)
3818 Util.log("New PID [%d]" % pid)
3819 return False
3820 os.chdir("/")
3821 os.setsid()
3822 pid = os.fork()
3823 if pid:
3824 Util.log("Will finish as PID [%d]" % pid)
3825 os._exit(0)
3826 for fd in [0, 1, 2]:
3827 try:
3828 os.close(fd)
3829 except OSError:
3830 pass
3831 # we need to fill those special fd numbers or pread won't work
3832 sys.stdin = open("/dev/null", 'r')
3833 sys.stderr = open("/dev/null", 'w')
3834 sys.stdout = open("/dev/null", 'w')
3835 # As we're a new process we need to clear the lock objects
3836 lock.Lock.clearAll()
3837 return True
3840def normalizeType(type):
3841 if type in LVMSR.SUBTYPES:
3842 type = SR.TYPE_LVHD
3843 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]:
3844 # temporary while LVHD is symlinked as LVM
3845 type = SR.TYPE_LVHD
3846 if type in [
3847 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs",
3848 "moosefs", "xfs", "zfs", "largeblock"
3849 ]:
3850 type = SR.TYPE_FILE
3851 if type in ["linstor"]:
3852 type = SR.TYPE_LINSTOR
3853 if type not in SR.TYPES:
3854 raise util.SMException("Unsupported SR type: %s" % type)
3855 return type
3857GCPAUSE_DEFAULT_SLEEP = 5 * 60
3860def _gc_init_file(sr_uuid):
3861 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init')
3864def _create_init_file(sr_uuid):
3865 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid)))
3866 with open(os.path.join(_gc_init_file(sr_uuid)), 'w+') as f:
3867 f.write('1')
3870def _gcLoopPause(sr, dryRun=False, immediate=False):
3871 if immediate:
3872 return
3874 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist
3875 # point will just return. Otherwise, fall back on an abortable sleep.
3877 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT):
3879 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3879 ↛ exitline 3879 didn't jump to the function exit
3880 lambda *args: None)
3881 elif os.path.exists(_gc_init_file(sr.uuid)):
3882 def abortTest():
3883 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT)
3885 # If time.sleep hangs we are in deep trouble, however for
3886 # completeness we set the timeout of the abort thread to
3887 # 110% of GCPAUSE_DEFAULT_SLEEP.
3888 Util.log("GC active, about to go quiet")
3889 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3889 ↛ exitline 3889 didn't run the lambda on line 3889
3890 None, sr.uuid, abortTest, VDI.POLL_INTERVAL,
3891 GCPAUSE_DEFAULT_SLEEP * 1.1)
3892 Util.log("GC active, quiet period ended")
3895def _gcLoop(sr, dryRun=False, immediate=False):
3896 if not lockGCActive.acquireNoblock(): 3896 ↛ 3897line 3896 didn't jump to line 3897, because the condition on line 3896 was never true
3897 Util.log("Another GC instance already active, exiting")
3898 return
3900 # Check we're still attached after acquiring locks
3901 if not sr.xapi.isPluggedHere():
3902 Util.log("SR no longer attached, exiting")
3903 return
3905 # Clean up Intellicache files
3906 sr.cleanupCache()
3908 # Track how many we do
3909 coalesced = 0
3910 task_status = "success"
3911 try:
3912 # Check if any work needs to be done
3913 if not sr.xapi.isPluggedHere(): 3913 ↛ 3914line 3913 didn't jump to line 3914, because the condition on line 3913 was never true
3914 Util.log("SR no longer attached, exiting")
3915 return
3916 sr.scanLocked()
3917 if not sr.hasWork():
3918 Util.log("No work, exiting")
3919 return
3920 sr.xapi.create_task(
3921 "Garbage Collection",
3922 "Garbage collection for SR %s" % sr.uuid)
3923 _gcLoopPause(sr, dryRun, immediate=immediate)
3924 while True:
3925 if SIGTERM:
3926 Util.log("Term requested")
3927 return
3929 if not sr.xapi.isPluggedHere(): 3929 ↛ 3930line 3929 didn't jump to line 3930, because the condition on line 3929 was never true
3930 Util.log("SR no longer attached, exiting")
3931 break
3932 sr.scanLocked()
3933 if not sr.hasWork():
3934 Util.log("No work, exiting")
3935 break
3937 if not lockGCRunning.acquireNoblock(): 3937 ↛ 3938line 3937 didn't jump to line 3938, because the condition on line 3937 was never true
3938 Util.log("Unable to acquire GC running lock.")
3939 return
3940 try:
3941 if not sr.gcEnabled(): 3941 ↛ 3942line 3941 didn't jump to line 3942, because the condition on line 3941 was never true
3942 break
3944 sr.xapi.update_task_progress("done", coalesced)
3946 sr.cleanupCoalesceJournals()
3947 # Create the init file here in case startup is waiting on it
3948 _create_init_file(sr.uuid)
3949 sr.scanLocked()
3950 sr.updateBlockInfo()
3952 howmany = len(sr.findGarbage())
3953 if howmany > 0:
3954 Util.log("Found %d orphaned vdis" % howmany)
3955 sr.lock()
3956 try:
3957 sr.garbageCollect(dryRun)
3958 finally:
3959 sr.unlock()
3960 sr.xapi.srUpdate()
3962 candidate = sr.findCoalesceable()
3963 if candidate:
3964 util.fistpoint.activate(
3965 "LVHDRT_finding_a_suitable_pair", sr.uuid)
3966 sr.coalesce(candidate, dryRun)
3967 sr.xapi.srUpdate()
3968 coalesced += 1
3969 continue
3971 candidate = sr.findLeafCoalesceable()
3972 if candidate: 3972 ↛ 3979line 3972 didn't jump to line 3979, because the condition on line 3972 was never false
3973 sr.coalesceLeaf(candidate, dryRun)
3974 sr.xapi.srUpdate()
3975 coalesced += 1
3976 continue
3978 finally:
3979 lockGCRunning.release() 3979 ↛ 3984line 3979 didn't jump to line 3984, because the break on line 3942 wasn't executed
3980 except:
3981 task_status = "failure"
3982 raise
3983 finally:
3984 sr.xapi.set_task_status(task_status)
3985 Util.log("GC process exiting, no work left")
3986 _create_init_file(sr.uuid)
3987 lockGCActive.release()
3990def _gc(session, srUuid, dryRun=False, immediate=False):
3991 init(srUuid)
3992 sr = SR.getInstance(srUuid, session)
3993 if not sr.gcEnabled(False): 3993 ↛ 3994line 3993 didn't jump to line 3994, because the condition on line 3993 was never true
3994 return
3996 try:
3997 _gcLoop(sr, dryRun, immediate=immediate)
3998 finally:
3999 sr.check_no_space_candidates()
4000 sr.cleanup()
4001 sr.logFilter.logState()
4002 del sr.xapi
4005def _abort(srUuid, soft=False):
4006 """Aborts an GC/coalesce.
4008 srUuid: the UUID of the SR whose GC/coalesce must be aborted
4009 soft: If set to True and there is a pending abort signal, the function
4010 doesn't do anything. If set to False, a new abort signal is issued.
4012 returns: If soft is set to False, we return True holding lockGCActive. If
4013 soft is set to False and an abort signal is pending, we return False
4014 without holding lockGCActive. An exception is raised in case of error."""
4015 Util.log("=== SR %s: abort ===" % (srUuid))
4016 init(srUuid)
4017 if not lockGCActive.acquireNoblock():
4018 gotLock = False
4019 Util.log("Aborting currently-running instance (SR %s)" % srUuid)
4020 abortFlag = IPCFlag(srUuid)
4021 if not abortFlag.set(FLAG_TYPE_ABORT, soft):
4022 return False
4023 for i in range(SR.LOCK_RETRY_ATTEMPTS):
4024 gotLock = lockGCActive.acquireNoblock()
4025 if gotLock:
4026 break
4027 time.sleep(SR.LOCK_RETRY_INTERVAL)
4028 abortFlag.clear(FLAG_TYPE_ABORT)
4029 if not gotLock:
4030 raise util.CommandException(code=errno.ETIMEDOUT,
4031 reason="SR %s: error aborting existing process" % srUuid)
4032 return True
4035def init(srUuid):
4036 global lockGCRunning
4037 if not lockGCRunning: 4037 ↛ 4038line 4037 didn't jump to line 4038, because the condition on line 4037 was never true
4038 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid)
4039 global lockGCActive
4040 if not lockGCActive: 4040 ↛ 4041line 4040 didn't jump to line 4041, because the condition on line 4040 was never true
4041 lockGCActive = LockActive(srUuid)
4044class LockActive:
4045 """
4046 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired
4047 if another process holds the SR lock.
4048 """
4049 def __init__(self, srUuid):
4050 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid)
4051 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, srUuid)
4053 def acquireNoblock(self):
4054 self._srLock.acquire()
4056 try:
4057 return self._lock.acquireNoblock()
4058 finally:
4059 self._srLock.release()
4061 def release(self):
4062 self._lock.release()
4065def usage():
4066 output = """Garbage collect and/or coalesce COW images in a COW-based SR
4068Parameters:
4069 -u --uuid UUID SR UUID
4070 and one of:
4071 -g --gc garbage collect, coalesce, and repeat while there is work
4072 -G --gc_force garbage collect once, aborting any current operations
4073 -c --cache-clean <max_age> clean up IntelliCache cache files older than
4074 max_age hours
4075 -a --abort abort any currently running operation (GC or coalesce)
4076 -q --query query the current state (GC'ing, coalescing or not running)
4077 -x --disable disable GC/coalesce (will be in effect until you exit)
4078 -t --debug see Debug below
4080Options:
4081 -b --background run in background (return immediately) (valid for -g only)
4082 -f --force continue in the presence of COW images with errors (when doing
4083 GC, this might cause removal of any such images) (only valid
4084 for -G) (DANGEROUS)
4086Debug:
4087 The --debug parameter enables manipulation of LVHD VDIs for debugging
4088 purposes. ** NEVER USE IT ON A LIVE VM **
4089 The following parameters are required:
4090 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate",
4091 "deflate".
4092 -v --vdi_uuid VDI UUID
4093 """
4094 #-d --dry-run don't actually perform any SR-modifying operations
4095 print(output)
4096 Util.log("(Invalid usage)")
4097 sys.exit(1)
4100##############################################################################
4101#
4102# API
4103#
4104def abort(srUuid, soft=False):
4105 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair.
4106 """
4107 if _abort(srUuid, soft):
4108 Util.log("abort: releasing the process lock")
4109 lockGCActive.release()
4110 return True
4111 else:
4112 return False
4115def gc(session, srUuid, inBackground, dryRun=False):
4116 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return
4117 immediately if inBackground=True.
4119 The following algorithm is used:
4120 1. If we are already GC'ing in this SR, return
4121 2. If we are already coalescing a VDI pair:
4122 a. Scan the SR and determine if the VDI pair is GC'able
4123 b. If the pair is not GC'able, return
4124 c. If the pair is GC'able, abort coalesce
4125 3. Scan the SR
4126 4. If there is nothing to collect, nor to coalesce, return
4127 5. If there is something to collect, GC all, then goto 3
4128 6. If there is something to coalesce, coalesce one pair, then goto 3
4129 """
4130 Util.log("=== SR %s: gc ===" % srUuid)
4132 signal.signal(signal.SIGTERM, receiveSignal)
4134 if inBackground:
4135 if daemonize(): 4135 ↛ exitline 4135 didn't return from function 'gc', because the condition on line 4135 was never false
4136 # we are now running in the background. Catch & log any errors
4137 # because there is no other way to propagate them back at this
4138 # point
4140 try:
4141 _gc(None, srUuid, dryRun)
4142 except AbortException:
4143 Util.log("Aborted")
4144 except Exception:
4145 Util.logException("gc")
4146 Util.log("* * * * * SR %s: ERROR\n" % srUuid)
4147 os._exit(0)
4148 else:
4149 _gc(session, srUuid, dryRun, immediate=True)
4152def start_gc(session, sr_uuid):
4153 """
4154 This function is used to try to start a backgrounded GC session by forking
4155 the current process. If using the systemd version, call start_gc_service() instead.
4156 """
4157 # don't bother if an instance already running (this is just an
4158 # optimization to reduce the overhead of forking a new process if we
4159 # don't have to, but the process will check the lock anyways)
4160 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid)
4161 if not lockRunning.acquireNoblock():
4162 if should_preempt(session, sr_uuid):
4163 util.SMlog("Aborting currently-running coalesce of garbage VDI")
4164 try:
4165 if not abort(sr_uuid, soft=True):
4166 util.SMlog("The GC has already been scheduled to re-start")
4167 except util.CommandException as e:
4168 if e.code != errno.ETIMEDOUT:
4169 raise
4170 util.SMlog('failed to abort the GC')
4171 else:
4172 util.SMlog("A GC instance already running, not kicking")
4173 return
4174 else:
4175 lockRunning.release()
4177 util.SMlog(f"Starting GC file is {__file__}")
4178 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'],
4179 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4181def start_gc_service(sr_uuid, wait=False):
4182 """
4183 This starts the templated systemd service which runs GC on the given SR UUID.
4184 If the service was already started, this is a no-op.
4186 Because the service is a one-shot with RemainAfterExit=no, when called with
4187 wait=True this will run the service synchronously and will not return until the
4188 run has finished. This is used to force a run of the GC instead of just kicking it
4189 in the background.
4190 """
4191 sr_uuid_esc = sr_uuid.replace("-", "\\x2d")
4192 util.SMlog(f"Kicking SMGC@{sr_uuid}...")
4193 cmd=[ "/usr/bin/systemctl", "--quiet" ]
4194 if not wait: 4194 ↛ 4196line 4194 didn't jump to line 4196, because the condition on line 4194 was never false
4195 cmd.append("--no-block")
4196 cmd += ["start", f"SMGC@{sr_uuid_esc}"]
4197 subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4200def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False):
4201 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure
4202 the SR lock is held.
4203 The following algorithm is used:
4204 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce
4205 2. Scan the SR
4206 3. GC
4207 4. return
4208 """
4209 Util.log("=== SR %s: gc_force ===" % srUuid)
4210 init(srUuid)
4211 sr = SR.getInstance(srUuid, session, lockSR, True)
4212 if not lockGCActive.acquireNoblock():
4213 abort(srUuid)
4214 else:
4215 Util.log("Nothing was running, clear to proceed")
4217 if force:
4218 Util.log("FORCED: will continue even if there are COW image errors")
4219 sr.scanLocked(force)
4220 sr.cleanupCoalesceJournals()
4222 try:
4223 sr.cleanupCache()
4224 sr.garbageCollect(dryRun)
4225 finally:
4226 sr.cleanup()
4227 sr.logFilter.logState()
4228 lockGCActive.release()
4231def get_state(srUuid):
4232 """Return whether GC/coalesce is currently running or not. This asks systemd for
4233 the state of the templated SMGC service and will return True if it is "activating"
4234 or "running" (for completeness, as in practice it will never achieve the latter state)
4235 """
4236 sr_uuid_esc = srUuid.replace("-", "\\x2d")
4237 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"]
4238 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4239 state = result.stdout.decode('utf-8').rstrip()
4240 if state == "activating" or state == "running":
4241 return True
4242 return False
4245def should_preempt(session, srUuid):
4246 sr = SR.getInstance(srUuid, session)
4247 entries = sr.journaler.getAll(VDI.JRN_COALESCE)
4248 if len(entries) == 0:
4249 return False
4250 elif len(entries) > 1:
4251 raise util.SMException("More than one coalesce entry: " + str(entries))
4252 sr.scanLocked()
4253 coalescedUuid = entries.popitem()[0]
4254 garbage = sr.findGarbage()
4255 for vdi in garbage:
4256 if vdi.uuid == coalescedUuid:
4257 return True
4258 return False
4261def get_coalesceable_leaves(session, srUuid, vdiUuids):
4262 coalesceable = []
4263 sr = SR.getInstance(srUuid, session)
4264 sr.scanLocked()
4265 for uuid in vdiUuids:
4266 vdi = sr.getVDI(uuid)
4267 if not vdi:
4268 raise util.SMException("VDI %s not found" % uuid)
4269 if vdi.isLeafCoalesceable():
4270 coalesceable.append(uuid)
4271 return coalesceable
4274def cache_cleanup(session, srUuid, maxAge):
4275 sr = SR.getInstance(srUuid, session)
4276 return sr.cleanupCache(maxAge)
4279def debug(sr_uuid, cmd, vdi_uuid):
4280 Util.log("Debug command: %s" % cmd)
4281 sr = SR.getInstance(sr_uuid, None)
4282 if not isinstance(sr, LVMSR):
4283 print("Error: not an LVHD SR")
4284 return
4285 sr.scanLocked()
4286 vdi = sr.getVDI(vdi_uuid)
4287 if not vdi:
4288 print("Error: VDI %s not found")
4289 return
4290 print("Running %s on SR %s" % (cmd, sr))
4291 print("VDI before: %s" % vdi)
4292 if cmd == "activate":
4293 vdi._activate()
4294 print("VDI file: %s" % vdi.path)
4295 if cmd == "deactivate":
4296 ns = NS_PREFIX_LVM + sr.uuid
4297 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False)
4298 if cmd == "inflate":
4299 vdi.inflateFully()
4300 sr.cleanup()
4301 if cmd == "deflate":
4302 vdi.deflate()
4303 sr.cleanup()
4304 sr.scanLocked()
4305 print("VDI after: %s" % vdi)
4308def abort_optional_reenable(uuid):
4309 print("Disabling GC/coalesce for %s" % uuid)
4310 ret = _abort(uuid)
4311 input("Press enter to re-enable...")
4312 print("GC/coalesce re-enabled")
4313 lockGCRunning.release()
4314 if ret:
4315 lockGCActive.release()
4318##############################################################################
4319#
4320# CLI
4321#
4322def main():
4323 action = ""
4324 maxAge = 0
4325 uuid = ""
4326 background = False
4327 force = False
4328 dryRun = False
4329 debug_cmd = ""
4330 vdi_uuid = ""
4331 shortArgs = "gGc:aqxu:bfdt:v:"
4332 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable",
4333 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="]
4335 try:
4336 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs)
4337 except getopt.GetoptError:
4338 usage()
4339 for o, a in opts:
4340 if o in ("-g", "--gc"):
4341 action = "gc"
4342 if o in ("-G", "--gc_force"):
4343 action = "gc_force"
4344 if o in ("-c", "--clean_cache"):
4345 action = "clean_cache"
4346 maxAge = int(a)
4347 if o in ("-a", "--abort"):
4348 action = "abort"
4349 if o in ("-q", "--query"):
4350 action = "query"
4351 if o in ("-x", "--disable"):
4352 action = "disable"
4353 if o in ("-u", "--uuid"):
4354 uuid = a
4355 if o in ("-b", "--background"):
4356 background = True
4357 if o in ("-f", "--force"):
4358 force = True
4359 if o in ("-d", "--dry-run"):
4360 Util.log("Dry run mode")
4361 dryRun = True
4362 if o in ("-t", "--debug"):
4363 action = "debug"
4364 debug_cmd = a
4365 if o in ("-v", "--vdi_uuid"):
4366 vdi_uuid = a
4368 if not action or not uuid:
4369 usage()
4370 if action == "debug" and not (debug_cmd and vdi_uuid) or \
4371 action != "debug" and (debug_cmd or vdi_uuid):
4372 usage()
4374 if action != "query" and action != "debug":
4375 print("All output goes to log")
4377 if action == "gc":
4378 gc(None, uuid, background, dryRun)
4379 elif action == "gc_force":
4380 gc_force(None, uuid, force, dryRun, True)
4381 elif action == "clean_cache":
4382 cache_cleanup(None, uuid, maxAge)
4383 elif action == "abort":
4384 abort(uuid)
4385 elif action == "query":
4386 print("Currently running: %s" % get_state(uuid))
4387 elif action == "disable":
4388 abort_optional_reenable(uuid)
4389 elif action == "debug":
4390 debug(uuid, debug_cmd, vdi_uuid)
4393if __name__ == '__main__': 4393 ↛ 4394line 4393 didn't jump to line 4394, because the condition on line 4393 was never true
4394 main()